├── chapter05-cluster ├── KH.png ├── 18.20.png ├── scenery.png ├── Basketball.zip ├── data-fenci.txt ├── chapter05-cluster-06.py ├── chapter05-cluster-08.py ├── chapter05-cluster-02.py ├── chapter05-cluster-09.py ├── chapter05-cluster-01.py ├── chapter05-cluster-12.py ├── chapter05-cluster-10.py ├── chapter05-cluster-13.py ├── chapter05-cluster-14.py ├── chapter05-cluster-07.py ├── chapter05-cluster-03.py ├── chapter05-cluster-11.py ├── chapter05-cluster-16py.py ├── chapter05-cluster-15.py ├── chapter05-cluster-04.py ├── chapter05-cluster-05.py ├── data.txt └── glass.csv ├── chapter06-classifier ├── wine.rar ├── 实验结果.xls ├── chapter06-classifier-01-dtc.py ├── chapter06-classifier-08-svm.py ├── chapter06-classifier-05-knn.py ├── chapter06-classifier-06-knn.py ├── chapter06-classifier-02-dtc.py ├── chapter06-classifier-04-dtc.py ├── wine │ ├── wine Data Set Information.txt │ └── wine.txt ├── chapter06-classifier-10-svm.py ├── chapter06-classifier-03-dtc.py ├── chapter06-classifier-09-svm.py ├── chapter06-classifier-07-knn.py └── chapter06-classifier-11-all.py ├── chapter08-textcluster ├── 08.10.png ├── 08.11.png ├── 08.3.png ├── 08.5.png ├── 08.9.png ├── test.txt ├── test3.txt ├── test2.txt ├── result.txt ├── chapter08_textcluster_02.py ├── chapter08_textcluster_05.py ├── chapter08_textcluster_01.py ├── chapter08_textcluster_03.py ├── chapter08_textcluster_06.py ├── chapter08_textcluster_04.py └── chapter08_textcluster_07.py ├── chapter09-TopicAnalysis ├── pic.png ├── test.txt ├── result.png ├── cloudimg.png ├── result2.png ├── data-fenci.txt ├── lda-1.1.0-cp37-cp37m-win32.whl ├── lda-1.1.0-cp37-cp37m-win_amd64.whl ├── chapter09_TopicAnalysis_01.py ├── chapter09_TopicAnalysis_04.py ├── chapter09_TopicAnalysis_02.py ├── chapter09_TopicAnalysis_06.py ├── chapter09_TopicAnalysis_05.py ├── chapter09_TopicAnalysis_08.py ├── chapter09_TopicAnalysis_09.py ├── chapter09_TopicAnalysis_07.py └── chapter09_TopicAnalysis_03.py ├── chapter02-dataming-based ├── data.xls ├── chapter02-09-sklearn.py ├── data.csv ├── chapter02-01-numpy.py ├── chapter02-03-numpy.py ├── chapter02-05-pandas.py ├── chapter02-04-numpy.py ├── chapter02-06-pandas.py ├── chapter02-02-numpy.py ├── chapter02-07-pandas.py └── chapter02-08-matplotlib.py ├── chapter04-regression ├── chapter04-01.png ├── chapter04-regression-01.py ├── chapter04-regression-10.py ├── chapter04-regression-09.py ├── chapter04-regression-03.py ├── chapter04-regression-12.py ├── chapter04-regression-11.py ├── chapter04-regression-02.py ├── chapter04-regression-04.py ├── chapter04-regression-05.py ├── chapter04-regression-13.py ├── chapter04-regression-06.py ├── chapter04-regression-07.py └── chapter04-regression-08.py └── README.md /chapter05-cluster/KH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/KH.png -------------------------------------------------------------------------------- /chapter05-cluster/18.20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/18.20.png -------------------------------------------------------------------------------- /chapter05-cluster/scenery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/scenery.png -------------------------------------------------------------------------------- /chapter06-classifier/wine.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter06-classifier/wine.rar -------------------------------------------------------------------------------- /chapter06-classifier/实验结果.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter06-classifier/实验结果.xls -------------------------------------------------------------------------------- /chapter05-cluster/Basketball.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/Basketball.zip -------------------------------------------------------------------------------- /chapter05-cluster/data-fenci.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/data-fenci.txt -------------------------------------------------------------------------------- /chapter08-textcluster/08.10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.10.png -------------------------------------------------------------------------------- /chapter08-textcluster/08.11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.11.png -------------------------------------------------------------------------------- /chapter08-textcluster/08.3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.3.png -------------------------------------------------------------------------------- /chapter08-textcluster/08.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.5.png -------------------------------------------------------------------------------- /chapter08-textcluster/08.9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.9.png -------------------------------------------------------------------------------- /chapter08-textcluster/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/test.txt -------------------------------------------------------------------------------- /chapter08-textcluster/test3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/test3.txt -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/pic.png -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/test.txt -------------------------------------------------------------------------------- /chapter02-dataming-based/data.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter02-dataming-based/data.xls -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/result.png -------------------------------------------------------------------------------- /chapter04-regression/chapter04-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter04-regression/chapter04-01.png -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/cloudimg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/cloudimg.png -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/result2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/result2.png -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/data-fenci.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/data-fenci.txt -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-01-dtc.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_iris 2 | iris = load_iris() 3 | print(iris.data) 4 | print(iris.target) 5 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-01.py: -------------------------------------------------------------------------------- 1 | from sklearn import linear_model #导入线性模型 2 | regr = linear_model.LinearRegression() #使用线性回归 3 | print(regr) 4 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win32.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win32.whl -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win_amd64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win_amd64.whl -------------------------------------------------------------------------------- /chapter08-textcluster/test2.txt: -------------------------------------------------------------------------------- 1 | 贵州省 位于 中国 西南地区 简称 黔 贵 2 | 走遍 神州大地 醉美 多彩 贵州 3 | 贵阳市 是 贵州省 省会 有 林城 美誉 4 | 数据分析 是 数学 计算机科学 相结合 产物 5 | 回归 聚类 分类 算法 广泛应用 数据分析 6 | 数据 爬取 数据 存储 数据分析 紧密 相关 过程 7 | 最 甜美 爱情 最 苦涩 爱情 8 | 一只 鸡蛋 可以 画 无数次 一场 爱情 9 | 真爱 往往 珍藏 最 平凡 普通 生活 -------------------------------------------------------------------------------- /chapter08-textcluster/result.txt: -------------------------------------------------------------------------------- 1 | 贵州省 位于 中国 西南地区 简称 黔 贵 2 | 走遍 神州大地 醉美 多彩 贵州 3 | 贵阳市 贵州省 省会 林城 美誉 4 | 数据分析 数学 计算机科学 相结合 产物 5 | 回归 聚类 分类 算法 广泛应用 数据分析 6 | 数据 爬取 数据 存储 数据分析 紧密 相关 过程 7 | 甜美 爱情 苦涩 爱情 8 | 一只 鸡蛋 可以 画 无数次 一场 爱情 能 9 | 真 爱 往往 珍藏 平凡 普通 生活 10 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-06.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.cluster import Birch 4 | X = [[1,1],[2,1],[1,3],[6,6],[8,5],[7,8]] 5 | y = [0,0,0,1,1,1] 6 | clf = Birch(n_clusters=2) 7 | clf.fit(X,y) 8 | print(clf.labels_) 9 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-09-sklearn.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | from sklearn.cluster import KMeans 4 | 5 | X = [[1],[2],[3],[4],[5]] 6 | y = [4,2,6,1,3] 7 | clf = KMeans(n_clusters=2) 8 | clf.fit(X,y) 9 | print(clf) 10 | print(clf.labels_) 11 | -------------------------------------------------------------------------------- /chapter02-dataming-based/data.csv: -------------------------------------------------------------------------------- 1 | 1,235.83,324.03,478.32 2 | 2,236.27,325.63,515.45 3 | 3,238.05,328.08,517.09 4 | 4,235.9,,514.89 5 | 5,236.76,268.82, 6 | 6,,404.04,486.09 7 | 7,237.41,391.26,516.23 8 | 8,238.65,380.81, 9 | 9,237.61,388.02,435.35 10 | 10,238.03,206.43,487.675 11 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-10.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.linear_model import LogisticRegression #导入逻辑回归模型 4 | clf = LogisticRegression() 5 | print(clf) 6 | clf.fit(train_feature,label) 7 | predict['label'] = clf.predict(predict_feature) 8 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-01-numpy.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | 4 | #导入包并重命名np 5 | import numpy as np 6 | 7 | #定义一维数组 8 | a = np.array([2, 0, 1, 5, 8, 3]) 9 | print('原始数据:', a) 10 | 11 | #输出最大、最小值及形状 12 | print('最小值:', a.min()) 13 | print('最大值:', a.max()) 14 | print('形状', a.shape) 15 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-08-svm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.svm import SVC 3 | 4 | X = np.array([[-1, -1], [-2, -2], [1, 3], [4, 6]]) 5 | y = np.array([1, 1, 2, 2]) 6 | clf = SVC() 7 | clf.fit(X, y) 8 | print(clf) 9 | print(clf.predict([[-0.8,-1], [2,1]])) 10 | 11 | #输出结果:[1, 2] 12 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-08.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import numpy as np 4 | from sklearn.decomposition import PCA 5 | X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 6 | pca = PCA(n_components=2) 7 | print(pca) 8 | pca.fit(X) 9 | print(pca.explained_variance_ratio_) 10 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-03-numpy.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | 4 | #定义二维数组 5 | import numpy as np 6 | c = np.array([[1, 2, 3, 4],[4, 5, 6, 7], [7, 8, 9, 10]]) 7 | 8 | print('形状:', c.shape) 9 | print('获取值:', c[1][0]) 10 | print('获取某行:') 11 | print(c[1][:]) 12 | print('获取某行并切片:') 13 | print(c[0][:-1]) 14 | print(c[0][-1:]) 15 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-02.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.cluster import KMeans 4 | X = [[1,1],[2,1],[1,3],[6,6],[8,5],[7,8]] 5 | y = [0,0,0,1,1,1] 6 | clf = KMeans(n_clusters=2) 7 | clf.fit(X,y) 8 | print(clf) 9 | print(clf.labels_) 10 | 11 | import matplotlib.pyplot as plt 12 | a = [n[0] for n in X] 13 | b = [n[1] for n in X] 14 | plt.scatter(a, b, c=clf.labels_) 15 | plt.show() 16 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-05-pandas.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | import pandas as pd 4 | 5 | #读取数据,其中参数header设置Excel无标题头 6 | data = pd.read_excel("data.xls", header=None) 7 | print(data) 8 | 9 | #计算数据长度 10 | print('行数', len(data)) 11 | 12 | #计算用户A\B\C消费求和 13 | print(data.sum()) 14 | 15 | #计算用户A\B\C消费算术平均数 16 | mm = data.sum() 17 | print(mm) 18 | 19 | #输出预览前5行数据 20 | print('预览前5行数据') 21 | print(data.head()) 22 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-04-numpy.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | import numpy as np 4 | 5 | #调用sin函数和2的3次方 6 | print(np.sin(np.pi/6)) 7 | print(type(np.sin(0.5))) 8 | f = np.power(2, 3) 9 | print(f) 10 | 11 | #范围定义 12 | print(np.arange(0,4)) 13 | print(type(np.arange(0,4))) 14 | 15 | #调用求和函数、平均值函数、标准差函数 16 | print(np.sum([1, 2, 3, 4])) 17 | print(np.mean([4, 5, 6, 7])) 18 | print(np.std([1, 2, 3, 2, 1, 3, 2, 0])) 19 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-09.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | #载入数据集 4 | from sklearn.datasets import load_boston 5 | d = load_boston() 6 | x = d.data 7 | y = d.target 8 | print(x[:2]) 9 | print('形状:', x.shape) 10 | 11 | #降维 12 | import numpy as np 13 | from sklearn.decomposition import PCA 14 | pca = PCA(n_components=2) 15 | newData = pca.fit_transform(x) 16 | print('降维后数据:') 17 | print(newData[:4]) 18 | print('形状:', newData.shape) 19 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_01.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | import jieba 4 | import sys 5 | import matplotlib.pyplot as plt 6 | from wordcloud import WordCloud 7 | 8 | text = open('test.txt').read() 9 | print(type(text)) 10 | wordlist = jieba.cut(text, cut_all = True) 11 | wl_space_split = " ".join(wordlist) 12 | print(wl_space_split) 13 | my_wordcloud = WordCloud().generate(wl_space_split) 14 | plt.imshow(my_wordcloud) 15 | plt.axis("off") 16 | plt.show() 17 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-01.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.cluster import MiniBatchKMeans 4 | X= [[1],[2],[3],[4],[3],[2]] 5 | mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, n_init=10) 6 | clf = mbk.fit(X) 7 | print(clf.labels_) 8 | #输出:[0 2 1 1 1 2] 9 | 10 | from sklearn.cluster import Birch 11 | X = [[1],[2],[3],[4],[3],[2]] 12 | clf = Birch(n_clusters=2) 13 | clf.fit(X) 14 | y_pred = clf.fit_predict(X) 15 | print(clf) 16 | print(y_pred) 17 | #输出:[1 1 0 0 0 1] 18 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-06-pandas.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | import pandas as pd 4 | 5 | #读取数据,其中参数header设置Excel无标题头 6 | data = pd.read_excel("data.xls", header=None) 7 | print(data) 8 | 9 | #计算数据长度 10 | print('行数', len(data)) 11 | 12 | #计算用户A\B\C消费求和 13 | print(data.sum()) 14 | 15 | #计算用户A\B\C消费算术平均数 16 | mm = data.sum() 17 | print(mm) 18 | 19 | #输出预览前5行数据 20 | print('预览前5行数据') 21 | print(data.head()) 22 | 23 | #输出数据基本统计量 24 | print('输出数据基本统计量') 25 | print(data.describe()) 26 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-05-knn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | import numpy as np 4 | from sklearn.neighbors import KNeighborsClassifier 5 | 6 | X = np.array([[-1,-1],[-2,-2],[1,2], [1,1],[-3,-4],[3,2]]) 7 | Y = [0,0,1,1,0,1] 8 | x = [[4,5],[-4,-3],[2,6]] 9 | knn = KNeighborsClassifier(n_neighbors=3, algorithm="ball_tree") 10 | knn.fit(X,Y) 11 | pre = knn.predict(x) 12 | print(pre) 13 | 14 | distances, indices = knn.kneighbors(X) 15 | print(indices) 16 | print(distances) 17 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_02.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | import os 4 | import codecs 5 | import jieba 6 | import jieba.analyse 7 | 8 | source = open("test.txt", 'r') 9 | line = source.readline().rstrip('\n') 10 | content = [] 11 | while line!="": 12 | seglist = jieba.cut(line,cut_all=False) #精确模式 13 | output = ' '.join(list(seglist)) #空格拼接 14 | print(output) 15 | content.append(output) 16 | line = source.readline().rstrip('\n') 17 | else: 18 | source.close() 19 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-02-numpy.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | 4 | #导入包并重命名np 5 | import numpy as np 6 | 7 | #定义一维数组 8 | a = np.array([2, 0, 1, 5, 8, 3]) 9 | print('原始数据:', a) 10 | 11 | #输出最大、最小值及形状 12 | print('最小值:', a.min()) 13 | print('最大值:', a.max()) 14 | print('形状', a.shape) 15 | 16 | #数据切片 17 | print('切片操作:') 18 | print(a[:-2]) 19 | print(a[-2:]) 20 | print(a[:1]) 21 | 22 | #排序 23 | print(type(a)) 24 | a.sort() 25 | print('排序后:', a) 26 | # 27 | # 排序后: [0 1 2 3 5 8] 28 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-12.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import cv2 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | #读取原始图像灰度颜色 8 | img = cv2.imread('scenery.png') 9 | 10 | spatialRad = 100 #空间窗口大小 11 | colorRad = 100 #色彩窗口大小 12 | maxPyrLevel = 2 #金字塔层数 13 | 14 | #图像均值漂移分割 15 | dst = cv2.pyrMeanShiftFiltering( img, spatialRad, colorRad, maxPyrLevel) 16 | 17 | #显示图像 18 | cv2.imshow('src', img) 19 | cv2.imshow('dst', dst) 20 | cv2.waitKey() 21 | cv2.destroyAllWindows() 22 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-09.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | def Sigmoid(x): 7 | return 1.0 / (1.0 + np.exp(-x)) 8 | 9 | x= np.arange(-10, 10, 0.1) 10 | h = Sigmoid(x) #Sigmoid函数 11 | plt.plot(x, h) 12 | plt.axvline(0.0, color='k') #坐标轴上加一条竖直的线(0位置) 13 | plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1.0, ls='dotted') 14 | plt.axhline(y=0.5, ls='dotted', color='k') 15 | plt.yticks([0.0, 0.5, 1.0]) #y轴标度 16 | plt.ylim(-0.1, 1.1) #y轴范围 17 | plt.show() 18 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-03.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn import datasets 4 | diabetes = datasets.load_diabetes() #载入数据 5 | print(diabetes.data) #数据 6 | print(diabetes.target) #类标 7 | print('总行数: ', len(diabetes.data), len(diabetes.target)) 8 | print('特征数: ', len(diabetes.data[0])) #每行数据集维数 9 | print('数据类型: ', diabetes.data.shape) 10 | print(type(diabetes.data), type(diabetes.target)) 11 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-07-pandas.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | from pandas import Series, DataFrame 4 | 5 | a = Series([4, 7, -5, 3]) 6 | print('创建Series:') 7 | print(a) 8 | 9 | b = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c']) 10 | print('创建带有索引的Series:') 11 | print(b) 12 | 13 | sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} 14 | c = Series(sdata) 15 | print('通过传递字典创建Series:') 16 | print(c) 17 | 18 | states = ['California', 'Ohio', 'Oregon', 'Texas'] 19 | d = Series(sdata, index=states) 20 | print('California没有字典为空:') 21 | print(d) 22 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_05.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | 5 | #存储读取语料 一行预料为一个文档 6 | corpus = [] 7 | for line in open('result.txt', 'r', encoding="utf-8").readlines(): 8 | corpus.append(line.strip()) 9 | 10 | #将文本中的词语转换为词频矩阵 11 | vectorizer = CountVectorizer() 12 | 13 | #计算个词语出现的次数 14 | X = vectorizer.fit_transform(corpus) 15 | 16 | #获取词袋中所有文本关键词 17 | word = vectorizer.get_feature_names() 18 | for n in range(len(word)): 19 | print(word[n],end=" ") 20 | print('') 21 | 22 | #查看词频结果 23 | print(X.toarray()) 24 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_01.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | import jieba 4 | 5 | text = "小杨毕业于北京理工大学,从事Python人工智能相关工作。" 6 | 7 | #全模式 8 | data = jieba.cut(text,cut_all=True) 9 | print(type(data)) 10 | print(u"[全模式]: ", "/".join(data)) 11 | 12 | #精确模式 13 | data = jieba.cut(text,cut_all=False) 14 | print(u"[精确模式]: ", "/".join(data)) 15 | 16 | #默认是精确模式 17 | data = jieba.cut(text) 18 | print(u"[默认模式]: ", "/".join(data)) 19 | 20 | #搜索引擎模式 21 | data = jieba.cut_for_search(text) 22 | print(u"[搜索引擎模式]: ", "/".join(data)) 23 | 24 | #返回列表 25 | seg_list = jieba.lcut(text, cut_all=False) 26 | print("[返回列表]: {0}".format(seg_list)) 27 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_04.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | 6 | #读取语料 7 | corpus = [] 8 | for line in open('test.txt', 'r').readlines(): 9 | corpus.append(line.strip()) 10 | 11 | #将文本中的词语转换为词频矩阵 12 | vectorizer = CountVectorizer() 13 | 14 | #计算个词语出现的次数 15 | X = vectorizer.fit_transform(corpus) 16 | 17 | #获取词袋中所有文本关键词 18 | word = vectorizer.get_feature_names() 19 | 20 | print('特征个数:', len(word)) 21 | for n in range(len(word)): 22 | print(word[n],end=" ") 23 | print('') 24 | 25 | #查看词频结果 26 | print(X.toarray()) 27 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_02.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | import jieba 4 | 5 | #全模式 6 | text = "我来到北京清华大学" 7 | seg_list = jieba.cut(text, cut_all=True) 8 | print("[全模式]: ", "/ ".join(seg_list)) 9 | #[全模式]: 我 / 来到 / 北京 / 清华 / 清华大学 / 华大 /大学 10 | 11 | #精确模式 12 | seg_list = jieba.cut(text, cut_all=False) 13 | print("[精确模式]: ", "/ ".join(seg_list)) 14 | #[精确模式]: 我 / 来到 / 北京 / 清华大学 15 | 16 | #默认是精确模式 17 | seg_list = jieba.cut(text) 18 | print("[默认模式]: ", "/ ".join(seg_list)) 19 | #[默认模式]: 我 / 来到 / 北京 / 清华大学 20 | 21 | #搜索引擎模式 22 | seg_list = jieba.cut_for_search(text) 23 | print("[搜索引擎模式]: ", "/ ".join(seg_list)) 24 | #[搜索引擎模式]: 我 / 来到 / 北京 / 清华 / 华大 / 大学 / 清华大学 25 | -------------------------------------------------------------------------------- /chapter02-dataming-based/chapter02-08-matplotlib.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 2021-06-28 3 | import pandas as pd 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | data = pd.read_csv("data.csv", header=None) 8 | print(data) 9 | mm = data.sum() #求和 10 | print(mm[1:]) #第一列为序号,取后面三列值 11 | 12 | ind = np.arange(3) #3个用户 0 1 2 13 | width = 0.35 #设置宽度 14 | x = [u'用户A', u'用户B', u'用户C'] 15 | plt.rc('font', family='SimHei', size=13) #中文字体显示 16 | 17 | #绘图 18 | plt.bar(ind, mm[1:], width, color='r', label='sum num') 19 | plt.xlabel(u"用户") 20 | plt.ylabel(u"消费数据") 21 | plt.title(u"用户消费数据对比柱状图") 22 | plt.legend() 23 | #设置底部名称 24 | plt.xticks(ind+width/2, x, rotation=40) #旋转40度 25 | plt.show() 26 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-10.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | #第一步 数据获取 4 | import pandas as pd 5 | glass = pd.read_csv("glass.csv") 6 | print(glass[:4]) 7 | 8 | #第二步 聚类 9 | from sklearn.cluster import Birch 10 | clf = Birch(n_clusters=3) 11 | clf.fit(glass) 12 | pre = clf.predict(glass) 13 | print(pre) 14 | 15 | #第三步 降维 16 | from sklearn.decomposition import PCA 17 | pca = PCA(n_components=2) 18 | newData = pca.fit_transform(glass) 19 | print(newData[:4]) 20 | x1 = [n[0] for n in newData] 21 | x2 = [n[1] for n in newData] 22 | 23 | #第四步 绘图 24 | import matplotlib.pyplot as plt 25 | plt.xlabel("x feature") 26 | plt.ylabel("y feature") 27 | plt.scatter(x1, x2, c=pre, marker='x') 28 | plt.show() 29 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-12.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | 4 | #第一步 导入数据集 5 | from sklearn.datasets import load_iris 6 | hua = load_iris() 7 | 8 | #获取花瓣的长和宽 9 | x = [n[0] for n in hua.data] 10 | y = [n[1] for n in hua.data] 11 | import numpy as np #转换成数组 12 | x = np.array(x).reshape(len(x),1) 13 | y = np.array(y).reshape(len(y),1) 14 | 15 | #第二步 线性回归分析 16 | from sklearn.linear_model import LinearRegression 17 | clf = LinearRegression() 18 | clf.fit(x,y) 19 | pre = clf.predict(x) 20 | print(pre) 21 | 22 | #第三步 画图 23 | import matplotlib.pyplot as plt 24 | plt.scatter(x,y,s=100) 25 | plt.plot(x,pre,"r-",linewidth=4) 26 | for idx, m in enumerate(x): 27 | plt.plot([m,m],[y[idx],pre[idx]], 'g-') 28 | plt.show() 29 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-06-knn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import numpy as np 4 | path = "wine/wine.txt" 5 | data = np.loadtxt(path,dtype=float,delimiter=",") 6 | print(data) 7 | 8 | yy, x = np.split(data, (1,), axis=1) 9 | print(yy.shape, x.shape) 10 | y = [] 11 | for n in yy: 12 | y.append(int(n)) 13 | 14 | train_data = np.concatenate((x[0:40,:], x[60:100,:], x[140:160,:]), axis = 0) #训练集 15 | train_target = np.concatenate((y[0:40], y[60:100], y[140:160]), axis = 0) #样本类别 16 | test_data = np.concatenate((x[40:60, :], x[100:140, :], x[160:,:]), axis = 0) #测试集 17 | test_target = np.concatenate((y[40:60], y[100:140], y[160:]), axis = 0) #样本类别 18 | 19 | print(train_data.shape, train_target.shape) 20 | print(test_data.shape, test_target.shape) 21 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-11.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn.datasets import load_iris #导入数据集iris 6 | 7 | #载入数据集 8 | iris = load_iris() 9 | print(iris.data) #输出数据集 10 | print(iris.target) #输出真实标签 11 | 12 | #获取花卉两列数据集 13 | DD = iris.data 14 | X = [x[0] for x in DD] 15 | print(X) 16 | Y = [x[1] for x in DD] 17 | print(Y) 18 | 19 | #plt.scatter(X, Y, c=iris.target, marker='x') 20 | plt.scatter(X[:50], Y[:50], color='red', marker='o', label='setosa') #前50个样本 21 | plt.scatter(X[50:100], Y[50:100], color='blue', marker='x', label='versicolor') #中间50个 22 | plt.scatter(X[100:], Y[100:],color='green', marker='+', label='Virginica') #后50个样本 23 | plt.legend(loc=2) #左上角 24 | plt.show() 25 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-02-dtc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | 4 | #导入数据集iris 5 | from sklearn.datasets import load_iris 6 | iris = load_iris() 7 | print(iris.data) #输出数据集 8 | print(iris.target) #输出真实标签 9 | print(len(iris.target)) 10 | print(iris.data.shape) #150个样本 每个样本4个特征 11 | 12 | #导入决策树DTC包 13 | from sklearn.tree import DecisionTreeClassifier 14 | clf = DecisionTreeClassifier() 15 | clf.fit(iris.data, iris.target) #训练 16 | print(clf) 17 | predicted = clf.predict(iris.data) #预测 18 | 19 | #获取花卉两列数据集 20 | X = iris.data 21 | L1 = [x[0] for x in X] 22 | L2 = [x[1] for x in X] 23 | 24 | #绘图 25 | import numpy as np 26 | import matplotlib.pyplot as plt 27 | plt.scatter(L1, L2, c=predicted, marker='x') #cmap=plt.cm.Paired 28 | plt.title("DTC") 29 | plt.show() 30 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-13.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import cv2 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | #读取原始图像灰度颜色 8 | img = cv2.imread('scenery.png') 9 | 10 | #获取图像行和列 11 | rows, cols = img.shape[:2] 12 | 13 | #mask必须行和列都加2且必须为uint8单通道阵列 14 | mask = np.zeros([rows+2, cols+2], np.uint8) 15 | 16 | spatialRad = 100 #空间窗口大小 17 | colorRad = 100 #色彩窗口大小 18 | maxPyrLevel = 2 #金字塔层数 19 | 20 | #图像均值漂移分割 21 | dst = cv2.pyrMeanShiftFiltering( img, spatialRad, colorRad, maxPyrLevel) 22 | 23 | #图像漫水填充处理 24 | cv2.floodFill(dst, mask, (30, 30), (0, 255, 255), 25 | (100, 100, 100), (50, 50, 50), 26 | cv2.FLOODFILL_FIXED_RANGE) 27 | 28 | #显示图像 29 | cv2.imshow('src', img) 30 | cv2.imshow('dst', dst) 31 | cv2.waitKey() 32 | cv2.destroyAllWindows() 33 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_06.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | import lda 6 | import numpy as np 7 | 8 | #生成词频矩阵 9 | corpus = [] 10 | for line in open('test.txt', 'r').readlines(): 11 | corpus.append(line.strip()) 12 | vectorizer = CountVectorizer() 13 | X = vectorizer.fit_transform(corpus) 14 | word = vectorizer.get_feature_names() 15 | 16 | #LDA分布 17 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1) 18 | model.fit(X) 19 | 20 | #文档-主题(Document-Topic)分布 21 | doc_topic = model.doc_topic_ 22 | print("shape: {}".format(doc_topic.shape)) 23 | for n in range(9): 24 | topic_most_pr = doc_topic[n].argmax() 25 | print(u"文档: {} 主题: {}".format(n,topic_most_pr)) 26 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_03.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | import os 4 | import codecs 5 | import jieba 6 | import jieba.analyse 7 | 8 | #停用词表 9 | stopwords = {}.fromkeys(['的', '或', '等', '是', '有', '之', '与', 10 | '和', '也', '被', '吗', '于', '中', '最']) 11 | 12 | source = open("test.txt", 'r') 13 | line = source.readline().rstrip('\n') 14 | content = [] #完整文本 15 | 16 | while line!="": 17 | seglist = jieba.cut(line,cut_all=False) #精确模式 18 | final = [] #存储去除停用词内容 19 | for seg in seglist: 20 | if seg not in stopwords: 21 | final.append(seg) 22 | output = ' '.join(list(final)) #空格拼接 23 | print(output) 24 | content.append(output) 25 | line = source.readline().rstrip('\n') 26 | else: 27 | source.close() 28 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_06.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | 6 | #存储读取语料 7 | corpus = [] 8 | for line in open('result.txt', 'r', encoding="utf-8").readlines(): 9 | corpus.append(line.strip()) 10 | vectorizer = CountVectorizer() #将文本中的词语转换为词频矩阵 11 | X = vectorizer.fit_transform(corpus) #计算个词语出现的次数 12 | word = vectorizer.get_feature_names() #获取词袋中所有文本关键词 13 | for n in range(len(word)): 14 | print(word[n],end=" ") 15 | print('') 16 | print(X.toarray()) #查看词频结果 17 | 18 | #计算TF-IDF值 19 | transformer = TfidfTransformer() 20 | print(transformer) 21 | tfidf = transformer.fit_transform(X) #将词频矩阵X统计成TF-IDF值 22 | #查看数据结构 23 | print(tfidf.toarray()) #tfidf[i][j]表示i类文本中的tf-idf权重 24 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_05.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | 6 | #读取语料 7 | corpus = [] 8 | for line in open('test.txt', 'r').readlines(): 9 | corpus.append(line.strip()) 10 | 11 | #将文本中的词语转换为词频矩阵 12 | vectorizer = CountVectorizer() 13 | X = vectorizer.fit_transform(corpus) #计算个词语出现的次数 14 | word = vectorizer.get_feature_names() #获取词袋中所有文本关键词 15 | print('特征个数:', len(word)) 16 | for n in range(len(word)): 17 | print(word[n],end=" ") 18 | print('') 19 | print(X.toarray()) #查看词频结果 20 | 21 | #计算TF-IDF值 22 | transformer = TfidfTransformer() 23 | print(transformer) 24 | tfidf = transformer.fit_transform(X) #将词频矩阵X统计成TF-IDF值 25 | 26 | #查看数据结构 输出tf-idf权重 27 | print(tfidf.toarray()) 28 | weight = tfidf.toarray() 29 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_04.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #By:Eastmount CSDN 3 | import os 4 | import codecs 5 | import jieba 6 | import jieba.analyse 7 | 8 | #停用词表 9 | stopwords = {}.fromkeys(['的', '或', '等', '是', '有', '之', '与', 10 | '和', '也', '被', '吗', '于', '中', '最', 11 | '“', '”', '。', ',', '?', '、', ';']) 12 | 13 | source = open("test.txt", 'r') 14 | result = codecs.open("result.txt", 'w', 'utf-8') 15 | line = source.readline().rstrip('\n') 16 | content = [] #完整文本 17 | 18 | while line!="": 19 | seglist = jieba.cut(line,cut_all=False) #精确模式 20 | final = [] #存储去除停用词内容 21 | for seg in seglist: 22 | if seg not in stopwords: 23 | final.append(seg) 24 | output = ' '.join(list(final)) #空格拼接 25 | print(output) 26 | content.append(output) 27 | result.write(output + '\r\n') 28 | line = source.readline().rstrip('\n') 29 | else: 30 | source.close() 31 | result.close() 32 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-14.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import cv2 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | #读取原始图像灰度颜色 8 | img = cv2.imread('scenery.png', 0) 9 | print(img.shape) 10 | 11 | #获取图像高度、宽度和深度 12 | rows, cols = img.shape[:] 13 | 14 | #图像二维像素转换为一维 15 | data = img.reshape((rows * cols, 1)) 16 | data = np.float32(data) 17 | 18 | #定义中心 (type,max_iter,epsilon) 19 | criteria = (cv2.TERM_CRITERIA_EPS + 20 | cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) 21 | 22 | #设置标签 23 | flags = cv2.KMEANS_RANDOM_CENTERS 24 | 25 | #K-Means聚类 聚集成4类 26 | compactness, labels, centers = cv2.kmeans(data, 4, None, criteria, 10, flags) 27 | 28 | #生成最终图像 29 | dst = labels.reshape((img.shape[0], img.shape[1])) 30 | 31 | #用来正常显示中文标签 32 | plt.rcParams['font.sans-serif']=['SimHei'] 33 | 34 | #显示图像 35 | titles = [u'原始图像', u'聚类图像'] 36 | images = [img, dst] 37 | for i in range(2): 38 | plt.subplot(1,2,i+1), plt.imshow(images[i], 'gray'), 39 | plt.title(titles[i]) 40 | plt.xticks([]),plt.yticks([]) 41 | plt.show() 42 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-02.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn import linear_model #导入线性模型 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | #X表示企业成本 Y表示企业利润 8 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]] 9 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]] 10 | print('数据集X: ', X) 11 | print('数据集Y: ', Y) 12 | 13 | #回归训练 14 | clf = linear_model.LinearRegression() 15 | clf.fit(X, Y) 16 | 17 | #预测结果 18 | X2 = [[400], [750], [950]] 19 | Y2 = clf.predict(X2) 20 | print(Y2) 21 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0] 22 | print('预测成本1200元的利润:$%.1f' % res) 23 | 24 | #绘制线性回归图形 25 | plt.plot(X, Y, 'ks') #绘制训练数据集散点图 26 | plt.plot(X2, Y2, 'g-') #绘制预测数据集直线 27 | plt.show() 28 | 29 | print('系数', clf.coef_) 30 | print('截距', clf.intercept_) 31 | print('评分函数', clf.score(X, Y)) 32 | 33 | ''' 34 | 系数 [[ 0.62402912]] 35 | 截距 [-173.70433885] 36 | 评分函数 0.911831188777 37 | ''' 38 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-04.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn import datasets 4 | import matplotlib.pyplot as plt 5 | from sklearn import linear_model 6 | import numpy as np 7 | 8 | #数据集划分 9 | diabetes = datasets.load_diabetes() #载入数据 10 | diabetes_x_temp = diabetes.data[:, np.newaxis, 2] #获取其中一个特征 11 | diabetes_x_train = diabetes_x_temp[:-20] #训练样本 12 | diabetes_x_test = diabetes_x_temp[-20:] #测试样本 后20行 13 | diabetes_y_train = diabetes.target[:-20] #训练标记 14 | diabetes_y_test = diabetes.target[-20:] #预测对比标记 15 | 16 | #回归训练及预测 17 | clf = linear_model.LinearRegression() 18 | clf.fit(diabetes_x_train, diabetes_y_train) #训练数据集 19 | pre = clf.predict(diabetes_x_test) 20 | 21 | #绘图 22 | plt.title(u'LinearRegression Diabetes') #标题 23 | plt.xlabel(u'Attributes') #x轴坐标 24 | plt.ylabel(u'Measure of disease') #y轴坐标 25 | plt.scatter(diabetes_x_test, diabetes_y_test, color = 'black') #散点图 26 | plt.plot(diabetes_x_test, pre, color='blue', linewidth = 2) #预测直线 27 | plt.show() 28 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-07.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from sklearn.cluster import Birch 6 | 7 | #数据获取 8 | glass=pd.read_csv("glass.csv") 9 | X1 = glass.al 10 | X2 = glass.ri 11 | T = dict(zip(X1,X2)) #生成二维数组 12 | X = list(map(lambda x,y: (x,y), T.keys(),T.values())) #dict类型转换为list 13 | y = glass.glass_type 14 | 15 | #聚类 16 | clf = Birch(n_clusters=3) 17 | clf.fit(X, y) 18 | y_pred = clf.predict(X) 19 | print(y_pred) 20 | 21 | #分别获取不同类别数据点 22 | x1, y1 = [], [] 23 | x2, y2 = [], [] 24 | x3, y3 = [], [] 25 | i = 0 26 | while i < len(X): 27 | if y_pred[i]==0: 28 | x1.append(X[i][0]) 29 | y1.append(X[i][1]) 30 | elif y_pred[i]==1: 31 | x2.append(X[i][0]) 32 | y2.append(X[i][1]) 33 | elif y_pred[i]==2: 34 | x3.append(X[i][0]) 35 | y3.append(X[i][1]) 36 | i = i + 1 37 | 38 | #三种颜色 红 绿 蓝,marker='x'表示类型,o表示圆点 *表示星型 x表示点 39 | plot1, = plt.plot(x1, y1, 'or', marker="x") 40 | plot2, = plt.plot(x2, y2, 'og', marker="o") 41 | plot3, = plt.plot(x3, y3, 'ob', marker="*") 42 | plt.xlabel('al') 43 | plt.ylabel('ri') 44 | plt.show() 45 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-03.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.cluster import KMeans 4 | 5 | X = [[0.0888, 0.5885], 6 | [0.1399, 0.8291], 7 | [0.0747, 0.4974], 8 | [0.0983, 0.5772], 9 | [0.1276, 0.5703], 10 | [0.1671, 0.5835], 11 | [0.1906, 0.5276], 12 | [0.1061, 0.5523], 13 | [0.2446, 0.4007], 14 | [0.1670, 0.4770], 15 | [0.2485, 0.4313], 16 | [0.1227, 0.4909], 17 | [0.1240, 0.5668], 18 | [0.1461, 0.5113], 19 | [0.2315, 0.3788], 20 | [0.0494, 0.5590], 21 | [0.1107, 0.4799], 22 | [0.2521, 0.5735], 23 | [0.1007, 0.6318], 24 | [0.1067, 0.4326], 25 | [0.1956, 0.4280] 26 | ] 27 | print(X) 28 | 29 | # Kmeans聚类 30 | clf = KMeans(n_clusters=3) 31 | y_pred = clf.fit_predict(X) 32 | print(clf) 33 | print(y_pred) 34 | 35 | # 可视化操作 36 | import numpy as np 37 | import matplotlib.pyplot as plt 38 | x = [n[0] for n in X] 39 | y = [n[1] for n in X] 40 | 41 | plt.scatter(x, y, c=y_pred, marker='x') 42 | plt.title("Kmeans-Basketball Data") 43 | plt.xlabel("assists_per_minute") 44 | plt.ylabel("points_per_minute") 45 | plt.legend(["Rank"]) 46 | plt.show() 47 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-05.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn import datasets 4 | import numpy as np 5 | from sklearn import linear_model 6 | import matplotlib.pyplot as plt 7 | 8 | #第一步 数据集划分 9 | d = datasets.load_diabetes() #数据 10*442 10 | x = d.data 11 | x_one = x[:,np.newaxis, 2] #获取一个特征 第3列数据 12 | y = d.target #获取的正确结果 13 | x_train = x_one[:-42] #训练集X [ 0:400] 14 | x_test = x_one[-42:] #预测集X [401:442] 15 | y_train = y[:-42] #训练集Y [ 0:400] 16 | y_test = y[-42:] #预测集Y [401:442] 17 | 18 | #第二步 线性回归实现 19 | clf = linear_model.LinearRegression() 20 | print(clf) 21 | clf.fit(x_train, y_train) 22 | pre = clf.predict(x_test) 23 | print('预测结果', pre) 24 | print('真实结果', y_test) 25 | 26 | #第三步 评价结果 27 | cost = np.mean(y_test-pre)**2 #2次方 28 | print('平方和计算:', cost) 29 | print('系数', clf.coef_) 30 | print('截距', clf.intercept_) 31 | print('方差', clf.score(x_test, y_test)) 32 | 33 | #第四步 绘图 34 | plt.plot(x_test, y_test, 'k.') #散点图 35 | plt.plot(x_test, pre, 'g-') #预测回归直线 36 | #绘制点到直线距离 37 | for idx, m in enumerate(x_test): 38 | plt.plot([m, m],[y_test[idx], pre[idx]], 'r-') 39 | 40 | plt.savefig('blog12-01.png', dpi=300) #保存图片 41 | plt.show() 42 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-04-dtc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn.datasets import load_iris 6 | from sklearn.tree import DecisionTreeClassifier 7 | 8 | #载入鸢尾花数据集 9 | iris = load_iris() 10 | X = X = iris.data[:, :2] #获取花卉前两列数据 11 | Y = iris.target 12 | lr = DecisionTreeClassifier() 13 | lr.fit(X,Y) 14 | 15 | #meshgrid函数生成两个网格矩阵 16 | h = .02 17 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 18 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 19 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 20 | 21 | #pcolormesh函数将xx,yy两个网格矩阵和对应的预测结果Z绘制在图片上 22 | Z = lr.predict(np.c_[xx.ravel(), yy.ravel()]) 23 | Z = Z.reshape(xx.shape) 24 | plt.figure(1, figsize=(8,6)) 25 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) 26 | 27 | #绘制散点图 28 | plt.scatter(X[:50,0], X[:50,1], color='red',marker='o', label='setosa') 29 | plt.scatter(X[50:100,0], X[50:100,1], color='blue', marker='x', label='versicolor') 30 | plt.scatter(X[100:,0], X[100:,1], color='green', marker='s', label='Virginica') 31 | plt.xlabel('Sepal length') 32 | plt.ylabel('Sepal width') 33 | plt.xlim(xx.min(), xx.max()) 34 | plt.ylim(yy.min(), yy.max()) 35 | plt.xticks(()) 36 | plt.yticks(()) 37 | plt.legend(loc=2) 38 | plt.show() 39 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_08.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | import lda 6 | import numpy as np 7 | 8 | #生词频矩阵 9 | corpus = [] 10 | for line in open('test.txt', 'r').readlines(): 11 | corpus.append(line.strip()) 12 | vectorizer = CountVectorizer() 13 | X = vectorizer.fit_transform(corpus) 14 | 15 | #LDA分布 16 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1) 17 | model.fit_transform(X) 18 | 19 | #文档-主题(Document-Topic)分布 20 | doc_topic = model.doc_topic_ 21 | print("shape: {}".format(doc_topic.shape)) 22 | for n in range(9): 23 | topic_most_pr = doc_topic[n].argmax() 24 | print("文档: {} 主题: {}".format(n+1,topic_most_pr)) 25 | 26 | #可视化分析 27 | import matplotlib.pyplot as plt 28 | f, ax= plt.subplots(9, 1, figsize=(10, 10), sharex=True) 29 | for i, k in enumerate([0,1,2,3,4,5,6,7,8]): 30 | ax[i].stem(doc_topic[k,:], linefmt='r-', 31 | markerfmt='ro', basefmt='w-') 32 | ax[i].set_xlim(-1, 3) #三个主题 33 | ax[i].set_ylim(0, 1.0) #权重0-1之间 34 | ax[i].set_ylabel("y") 35 | ax[i].set_title("Document {}".format(k+1)) 36 | ax[4].set_xlabel("Topic") 37 | plt.tight_layout() 38 | plt.savefig("result.png") 39 | plt.show() 40 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_09.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | import lda 6 | import numpy as np 7 | 8 | #生词频矩阵 9 | corpus = [] 10 | for line in open('test.txt', 'r').readlines(): 11 | corpus.append(line.strip()) 12 | vectorizer = CountVectorizer() 13 | X = vectorizer.fit_transform(corpus) 14 | 15 | #LDA分布 16 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1) 17 | model.fit_transform(X) 18 | 19 | #文档-主题(Document-Topic)分布 20 | doc_topic = model.doc_topic_ 21 | print("shape: {}".format(doc_topic.shape)) 22 | for n in range(9): 23 | topic_most_pr = doc_topic[n].argmax() 24 | print(u"文档: {} 主题: {}".format(n+1,topic_most_pr)) 25 | topic_word = model.topic_word_ 26 | 27 | #可视化分析 28 | import matplotlib.pyplot as plt 29 | f, ax= plt.subplots(3, 1, figsize=(8,6), sharex=True) #三个主题 30 | for i, k in enumerate([0, 1, 2]): 31 | ax[i].stem(topic_word[k,:], linefmt='b-', 32 | markerfmt='bo', basefmt='w-') 33 | ax[i].set_xlim(-1, 43) #单词43个 34 | ax[i].set_ylim(0, 0.5) #单词出现频率 35 | ax[i].set_ylabel("y") 36 | ax[i].set_title("Topic {}".format(k)) 37 | ax[1].set_xlabel("word") 38 | plt.tight_layout() 39 | plt.savefig("result2.png") 40 | plt.show() 41 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-13.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn.datasets import load_iris 6 | from sklearn.linear_model import LogisticRegression 7 | 8 | #载入数据集 9 | iris = load_iris() 10 | X = X = iris.data[:, :2] #获取花卉两列数据集 11 | Y = iris.target 12 | 13 | #逻辑回归模型 14 | lr = LogisticRegression(C=1e5) 15 | lr.fit(X,Y) 16 | 17 | #meshgrid函数生成两个网格矩阵 18 | h = .02 19 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 20 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 21 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 22 | 23 | #pcolormesh函数将xx,yy两个网格矩阵和对应的预测结果Z绘制在图片上 24 | Z = lr.predict(np.c_[xx.ravel(), yy.ravel()]) 25 | Z = Z.reshape(xx.shape) 26 | plt.figure(1, figsize=(8,6)) 27 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) 28 | 29 | #绘制散点图 30 | plt.scatter(X[:50,0], X[:50,1], color='red',marker='o', label='setosa') 31 | plt.scatter(X[50:100,0], X[50:100,1], color='blue', marker='x', label='versicolor') 32 | plt.scatter(X[100:,0], X[100:,1], color='green', marker='s', label='Virginica') 33 | 34 | plt.xlabel('Sepal length') 35 | plt.ylabel('Sepal width') 36 | plt.xlim(xx.min(), xx.max()) 37 | plt.ylim(yy.min(), yy.max()) 38 | plt.xticks(()) 39 | plt.yticks(()) 40 | plt.legend(loc=2) 41 | plt.show() 42 | -------------------------------------------------------------------------------- /chapter06-classifier/wine/wine Data Set Information.txt: -------------------------------------------------------------------------------- 1 | Data Set Information 2 | 3 | These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines. 4 | 5 | I think that the initial data set had around 30 variables, but for some reason I only have the 13 dimensional version. I had a list of what the 30 or so variables were, but a.) I lost it, and b.), I would not know which 13 variables are included in the set. 6 | 7 | The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it ) 8 | 1) Alcohol 9 | 2) Malic acid 10 | 3) Ash 11 | 4) Alcalinity of ash 12 | 5) Magnesium 13 | 6) Total phenols 14 | 7) Flavanoids 15 | 8) Nonflavanoid phenols 16 | 9) Proanthocyanins 17 | 10)Color intensity 18 | 11)Hue 19 | 12)OD280OD315 of diluted wines 20 | 13)Proline 21 | 22 | In a classification context, this is a well posed problem with well behaved class structures. A good data set for first testing of a new classifier, but not very challenging. 23 | 24 | 25 | 26 | Attribute Information 27 | 28 | All attributes are continuous 29 | 30 | No statistics available, but suggest to standardise variables for certain uses (e.g. for us with classifiers which are NOT scale invariant) 31 | 32 | NOTE 1st attribute is class identifier (1-3) 33 | 34 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_07.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | import lda 6 | import numpy as np 7 | 8 | #生成词频矩阵 9 | corpus = [] 10 | for line in open('test.txt', 'r').readlines(): 11 | corpus.append(line.strip()) 12 | vectorizer = CountVectorizer() 13 | X = vectorizer.fit_transform(corpus) 14 | word = vectorizer.get_feature_names() 15 | 16 | #LDA分布 17 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1) 18 | model.fit(X) 19 | 20 | #文档-主题(Document-Topic)分布 21 | doc_topic = model.doc_topic_ 22 | print("shape: {}".format(doc_topic.shape)) 23 | for n in range(9): 24 | topic_most_pr = doc_topic[n].argmax() 25 | print(u"文档: {} 主题: {}".format(n,topic_most_pr)) 26 | 27 | #主题-单词(Topic-Word)分布 28 | word = vectorizer.get_feature_names() 29 | topic_word = model.topic_word_ 30 | for w in word: 31 | print(w,end=" ") 32 | print('') 33 | 34 | n = 5 35 | for i, topic_dist in enumerate(topic_word): 36 | topic_words = np.array(word)[np.argsort(topic_dist)][:-(n+1):-1] 37 | print(u'*Topic {}\n- {}'.format(i, ' '.join(topic_words))) 38 | 39 | #主题-单词(Topic-Word)分布 40 | print("shape: {}".format(topic_word.shape)) 41 | print(topic_word[:, :3]) 42 | for n in range(3): 43 | sum_pr = sum(topic_word[n,:]) 44 | print("topic: {} sum: {}".format(n, sum_pr)) 45 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-11.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from sklearn.decomposition import PCA 6 | from sklearn.cluster import Birch 7 | 8 | #获取数据集及降维 9 | glass = pd.read_csv("glass.csv") 10 | pca = PCA(n_components=2) 11 | newData = pca.fit_transform(glass) 12 | print(newData[:4]) 13 | L1 = [n[0] for n in newData] 14 | L2 = [n[1] for n in newData] 15 | plt.rc('font', family='SimHei', size=8) #设置字体 16 | plt.rcParams['axes.unicode_minus'] = False #负号 17 | 18 | #聚类 类簇数=2 19 | clf = Birch(n_clusters=2) 20 | clf.fit(glass) 21 | pre = clf.predict(glass) 22 | p1 = plt.subplot(221) 23 | plt.title(u"Birch聚类 n=2") 24 | plt.scatter(L1,L2,c=pre,marker="s") 25 | plt.sca(p1) 26 | 27 | #聚类 类簇数=3 28 | clf = Birch(n_clusters=3) 29 | clf.fit(glass) 30 | pre = clf.predict(glass) 31 | p2 = plt.subplot(222) 32 | plt.title(u"Birch聚类 n=3") 33 | plt.scatter(L1,L2,c=pre,marker="o") 34 | plt.sca(p2) 35 | 36 | #聚类 类簇数=4 37 | clf = Birch(n_clusters=4) 38 | clf.fit(glass) 39 | pre = clf.predict(glass) 40 | p3 = plt.subplot(223) 41 | plt.title(u"Birch聚类 n=4") 42 | plt.scatter(L1,L2,c=pre,marker="o") 43 | plt.sca(p3) 44 | 45 | #聚类 类簇数=5 46 | clf = Birch(n_clusters=5) 47 | clf.fit(glass) 48 | pre = clf.predict(glass) 49 | p4 = plt.subplot(224) 50 | plt.title(u"Birch聚类 n=5") 51 | plt.scatter(L1,L2,c=pre,marker="s") 52 | plt.sca(p4) 53 | plt.savefig('18.20.png', dpi=300) 54 | plt.show() 55 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-06.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.linear_model import LinearRegression 4 | from sklearn.preprocessing import PolynomialFeatures 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | #X表示企业成本 Y表示企业利润 9 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]] 10 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]] 11 | print('数据集X: ', X) 12 | print('数据集Y: ', Y) 13 | 14 | #第一步 线性回归分析 15 | clf = LinearRegression() 16 | clf.fit(X, Y) 17 | X2 = [[400], [750], [950]] 18 | Y2 = clf.predict(X2) 19 | print(Y2) 20 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0] 21 | print('预测成本1200元的利润:$%.1f' % res) 22 | plt.plot(X, Y, 'ks') #绘制训练数据集散点图 23 | plt.plot(X2, Y2, 'g-') #绘制预测数据集直线 24 | 25 | #第二步 多项式回归分析 26 | xx = np.linspace(350,950,100) #350到950等差数列 27 | quadratic_featurizer = PolynomialFeatures(degree = 2) #实例化一个二次多项式 28 | x_train_quadratic = quadratic_featurizer.fit_transform(X) #用二次多项式x做变换 29 | X_test_quadratic = quadratic_featurizer.transform(X2) 30 | regressor_quadratic = LinearRegression() 31 | regressor_quadratic.fit(x_train_quadratic, Y) 32 | 33 | #把训练好X值的多项式特征实例应用到一系列点上,形成矩阵 34 | xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1)) 35 | plt.plot(xx, regressor_quadratic.predict(xx_quadratic), "r--", 36 | label="$y = ax^2 + bx + c$",linewidth=2) 37 | plt.legend() 38 | plt.show() 39 | -------------------------------------------------------------------------------- /chapter08-textcluster/chapter08_textcluster_07.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | #By:Eastmount CSDN 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.feature_extraction.text import TfidfTransformer 5 | 6 | #第一步 生成词频矩阵 7 | corpus = [] 8 | for line in open('result.txt', 'r', encoding="utf-8").readlines(): 9 | corpus.append(line.strip()) 10 | vectorizer = CountVectorizer() 11 | X = vectorizer.fit_transform(corpus) 12 | word = vectorizer.get_feature_names() 13 | for n in range(len(word)): 14 | print(word[n],end=" ") 15 | print('') 16 | print(X.toarray()) 17 | 18 | #第二步 计算TF-IDF值 19 | transformer = TfidfTransformer() 20 | print(transformer) 21 | tfidf = transformer.fit_transform(X) 22 | print(tfidf.toarray()) 23 | weight = tfidf.toarray() 24 | 25 | #第三步 KMeans聚类 26 | from sklearn.cluster import KMeans 27 | clf = KMeans(n_clusters=3) 28 | s = clf.fit(weight) 29 | y_pred = clf.fit_predict(weight) 30 | print(clf) 31 | print(clf.cluster_centers_) #类簇中心 32 | print(clf.inertia_) #距离:用来评估簇的个数是否合适 越小说明簇分的越好 33 | print(y_pred) #预测类标 34 | 35 | #第四步 降维处理 36 | from sklearn.decomposition import PCA 37 | pca = PCA(n_components=2) #降低成两维绘图 38 | newData = pca.fit_transform(weight) 39 | print(newData) 40 | x = [n[0] for n in newData] 41 | y = [n[1] for n in newData] 42 | 43 | #第五步 可视化 44 | import numpy as np 45 | import matplotlib.pyplot as plt 46 | plt.scatter(x, y, c=y_pred, s=100, marker='s') 47 | plt.title("Kmeans") 48 | plt.xlabel("x") 49 | plt.ylabel("y") 50 | plt.show() 51 | 52 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-10-svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | import os 4 | import numpy as np 5 | from sklearn.svm import SVC 6 | from sklearn import metrics 7 | import matplotlib.pyplot as plt 8 | from matplotlib.colors import ListedColormap 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.decomposition import PCA 11 | 12 | #第一步 加载数据集 13 | path = "wine/wine.txt" 14 | data = np.loadtxt(path,dtype=float,delimiter=",") 15 | print(data) 16 | 17 | #第二步 划分数据集 18 | yy, x = np.split(data, (1,), axis=1) #第一列类标yy,后面13列特征为x 19 | print(yy.shape, x.shape) 20 | y = [] 21 | for n in yy: 22 | y.append(int(n)) 23 | y = np.array(y, dtype = int) #list转换数组 24 | #划分数据集 测试集40% 25 | train_data, test_data, train_target, test_target = train_test_split(x, y, test_size=0.4, random_state=42) 26 | print(train_data.shape, train_target.shape) 27 | print(test_data.shape, test_target.shape) 28 | 29 | #第三步 SVC训练 30 | clf = SVC() 31 | clf.fit(train_data, train_target) 32 | result = clf.predict(test_data) 33 | print(result) 34 | print(test_target) 35 | 36 | #第四步 评价算法 37 | print(sum(result==test_target)) #预测结果与真实结果比对 38 | print(metrics.classification_report(test_target, result)) #准确率 召回率 F值 39 | 40 | #第五步 降维操作 41 | pca = PCA(n_components=2) 42 | newData = pca.fit_transform(test_data) 43 | 44 | #第六步 绘图可视化 45 | plt.figure() 46 | cmap_bold = ListedColormap(['#000000', '#00FF00', '#FFFFFF']) 47 | plt.scatter(newData[:,0], newData[:,1], c=test_target, cmap=cmap_bold, s=50) 48 | plt.show() 49 | -------------------------------------------------------------------------------- /chapter09-TopicAnalysis/chapter09_TopicAnalysis_03.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | #By:Eastmount CSDN 3 | from os import path 4 | from scipy.misc import imread 5 | import jieba 6 | import sys 7 | import matplotlib.pyplot as plt 8 | from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator 9 | 10 | # 打开本体TXT文件 11 | text = open('data-fenci.txt').read() 12 | 13 | # 结巴分词 cut_all=True 设置为全模式 14 | wordlist = jieba.cut(text) #cut_all = True 15 | 16 | # 使用空格连接 进行中文分词 17 | wl_space_split = " ".join(wordlist) 18 | print(wl_space_split) 19 | 20 | # 读取mask/color图片 21 | d = path.dirname(__file__) 22 | nana_coloring = imread(path.join(d, "pic.png")) 23 | 24 | # 对分词后的文本生成词云 25 | my_wordcloud = WordCloud( background_color = 'white', 26 | mask = nana_coloring, 27 | max_words = 2000, 28 | stopwords = STOPWORDS, 29 | max_font_size = 50, 30 | random_state = 30, 31 | ) 32 | 33 | # generate word cloud 34 | my_wordcloud.generate(wl_space_split) 35 | 36 | # create coloring from image 37 | image_colors = ImageColorGenerator(nana_coloring) 38 | 39 | # recolor wordcloud and show 40 | my_wordcloud.recolor(color_func=image_colors) 41 | 42 | plt.imshow(my_wordcloud) # 显示词云图 43 | plt.axis("off") # 是否显示x轴、y轴下标 44 | plt.show() 45 | 46 | # save img 47 | my_wordcloud.to_file(path.join(d, "cloudimg.png")) 48 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-03-dtc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | from sklearn.datasets import load_iris 4 | from sklearn.tree import DecisionTreeClassifier 5 | from sklearn import metrics 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | 9 | #导入数据集iris 10 | ''' 11 | 重点:分割数据集 构造训练集/测试集,80/20 12 | 70%训练 0-40 50-90 100-140 13 | 30%预测 40-50 90-100 140-150 14 | ''' 15 | iris = load_iris() 16 | train_data = np.concatenate((iris.data[0:40, :], iris.data[50:90, :], iris.data[100:140, :]), axis = 0) #训练集 17 | train_target = np.concatenate((iris.target[0:40], iris.target[50:90], iris.target[100:140]), axis = 0) #训练集样本类别 18 | test_data = np.concatenate((iris.data[40:50, :], iris.data[90:100, :], iris.data[140:150, :]), axis = 0) #测试集 19 | test_target = np.concatenate((iris.target[40:50], iris.target[90:100], iris.target[140:150]), axis = 0) #测试集样本类别 20 | 21 | #导入决策树DTC包 22 | clf = DecisionTreeClassifier() 23 | clf.fit(train_data, train_target) #注意均使用训练数据集和样本类标 24 | print(clf) 25 | predict_target = clf.predict(test_data) #测试集 26 | print(predict_target) 27 | 28 | #预测结果与真实结果比对 29 | print(sum(predict_target == test_target)) 30 | 31 | #输出准确率 召回率 F值 32 | print(metrics.classification_report(test_target, predict_target)) 33 | print(metrics.confusion_matrix(test_target, predict_target)) 34 | 35 | #获取花卉测试数据集两列数据 36 | X = test_data 37 | L1 = [n[0] for n in X] 38 | L2 = [n[1] for n in X] 39 | 40 | #绘图 41 | plt.scatter(L1, L2, c=predict_target, marker='x') #cmap=plt.cm.Paired 42 | plt.title("DecisionTreeClassifier") 43 | plt.show() 44 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-07.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.linear_model import LinearRegression 4 | from sklearn.preprocessing import PolynomialFeatures 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | #X表示企业成本 Y表示企业利润 9 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]] 10 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]] 11 | print('数据集X: ', X) 12 | print('数据集Y: ', Y) 13 | 14 | #第一步 线性回归分析 15 | clf = LinearRegression() 16 | clf.fit(X, Y) 17 | X2 = [[400], [750], [950]] 18 | Y2 = clf.predict(X2) 19 | print(Y2) 20 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0] 21 | print('预测成本1200元的利润:$%.1f' % res) 22 | plt.plot(X, Y, 'ks') #绘制训练数据集散点图 23 | plt.plot(X2, Y2, 'g-') #绘制预测数据集直线 24 | 25 | #第二步 多项式回归分析 26 | xx = np.linspace(350,950,100) #350到950等差数列 27 | quadratic_featurizer = PolynomialFeatures(degree = 2) #实例化一个二次多项式 28 | x_train_quadratic = quadratic_featurizer.fit_transform(X) #用二次多项式x做变换 29 | X_test_quadratic = quadratic_featurizer.transform(X2) 30 | regressor_quadratic = LinearRegression() 31 | regressor_quadratic.fit(x_train_quadratic, Y) 32 | 33 | #把训练好X值的多项式特征实例应用到一系列点上,形成矩阵 34 | xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1)) 35 | plt.plot(xx, regressor_quadratic.predict(xx_quadratic), "r--", 36 | label="$y = ax^2 + bx + c$",linewidth=2) 37 | plt.legend() 38 | plt.show() 39 | 40 | #评价 41 | print('1 r-squared', clf.score(X, Y)) 42 | print('2 r-squared', regressor_quadratic.score(x_train_quadratic, Y)) 43 | -------------------------------------------------------------------------------- /chapter04-regression/chapter04-regression-08.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | from sklearn.linear_model import LinearRegression 4 | from sklearn.preprocessing import PolynomialFeatures 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | #X表示企业成本 Y表示企业利润 9 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]] 10 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]] 11 | print('数据集X: ', X) 12 | print('数据集Y: ', Y) 13 | 14 | #第一步 线性回归分析 15 | clf = LinearRegression() 16 | clf.fit(X, Y) 17 | X2 = [[400], [750], [950]] 18 | Y2 = clf.predict(X2) 19 | print(Y2) 20 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0] 21 | print('预测成本1200元的利润:$%.1f' % res) 22 | plt.plot(X, Y, 'ks') #绘制训练数据集散点图 23 | plt.plot(X2, Y2, 'g-') #绘制预测数据集直线 24 | 25 | #第二步 多项式回归分析 26 | xx = np.linspace(350,950,100) 27 | quadratic_featurizer = PolynomialFeatures(degree = 5) 28 | x_train_quadratic = quadratic_featurizer.fit_transform(X) 29 | X_test_quadratic = quadratic_featurizer.transform(X2) 30 | regressor_quadratic = LinearRegression() 31 | regressor_quadratic.fit(x_train_quadratic, Y) 32 | #把训练好X值的多项式特征实例应用到一系列点上,形成矩阵 33 | xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1)) 34 | plt.plot(xx, regressor_quadratic.predict(xx_quadratic), "r--", 35 | label="$y = ax^2 + bx + c$",linewidth=2) 36 | plt.legend() 37 | plt.show() 38 | print('1 r-squared', clf.score(X, Y)) 39 | print('5 r-squared', regressor_quadratic.score(x_train_quadratic, Y)) 40 | 41 | # ('1 r-squared', 0.9118311887769025) 42 | # ('5 r-squared', 0.98087802460869788) 43 | 44 | 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Book2-Python-DataAnalysis 2 | 3 | 该资源为杨秀璋作者《Python网络数据爬取及分析从入门到精通(分析篇)》书籍所有源代码,包括可视化分析、聚类分析、回归分析、分类分析、词云和LDA分析等内容。所有代码已修改为Python3实现,希望对您有所帮助,一起加油。 4 | 5 | 6 |
7 | 8 |
9 | 10 | - https://item.jd.com/12363491.html 11 | - https://item.jd.com/12373850.html 12 | 13 | --- 14 | 15 | - **第1章 网络数据分析概述**
16 | 1.1 数据分析
17 | 1.2 相关技术
18 | 1.3 Anaconda开发环境
19 | 1.4 常用数据集
20 | 21 | - **第2章 Python数据分析常用库**
22 | 2.1 常用库
23 | 2.2 NumPy
24 | 2.3 Pandas
25 | 2.4 Matplotlib
26 | 2.5 Sklearn
27 | 28 | - **第3章 Python可视化分析**
29 | 3.1 Matplotlib可视化分析
30 | 3.2 Pandas读取文件可视化分析
31 | 3.3 ECharts可视化技术初识
32 | 33 | - **第4章 Python回归分析**
34 | 4.1 回归
35 | 4.2 线性回归分析
36 | 4.3 多项式回归分析
37 | 4.4 逻辑回归分析
38 | 39 | - **第5章 Python聚类分析**
40 | 5.1 聚类
41 | 5.2 K-Means
42 | 5.3 BIRCH
43 | 5.4 树状图聚类
44 | 5.5 降维处理
45 | 46 | - **第6章 Python分类分析**
47 | 6.1 分 类
48 | 6.2 决策树
49 | 6.3 KNN分类算法
50 | 6.4 SVM 分类算法
51 | 52 | 53 | - **第7章 Python关联规则挖掘分析**
54 | 7.1 基本概念
55 | 7.2 Apriori算法
56 | 7.3 Apriori算法的实现
57 | 58 | - **第8章 Python数据预处理及文本聚类**
59 | 8.1 数据预处理概述
60 | 8.2 中文分词
61 | 8.3 数据清洗
62 | 8.4 特征提取及向量空间模型
63 | 8.5 权重计算
64 | 8.6 文本聚类
65 | 66 | - **第9章 Python词云热点与主题分布分析**
67 | 9.1 词 云
68 | 9.2 WordCloud的安装及基本用法
69 | 9.3 LDA
70 | 71 | - **第10章 复杂网络与基于数据库技术的分析**
72 | 10.1 复杂网络
73 | 10.2 基于数据库技术的数据分析
74 | 10.3 基于数据库技术的博客行为分析
75 | 76 | - **后记** 77 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-16py.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import os 4 | import codecs 5 | from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer 6 | from sklearn.manifold import TSNE 7 | from sklearn.cluster import KMeans 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import pandas as pd 11 | import jieba 12 | from sklearn import metrics 13 | from sklearn.metrics import silhouette_score 14 | from array import array 15 | from numpy import * 16 | from pylab import mpl 17 | from sklearn.metrics.pairwise import cosine_similarity 18 | import matplotlib.pyplot as plt 19 | import matplotlib as mpl 20 | from scipy.cluster.hierarchy import ward, dendrogram 21 | 22 | #---------------------------------------加载语料------------------------------------- 23 | text = open('data-fenci.txt').read() 24 | print(text) 25 | list1=text.split("\n") 26 | print(list1) 27 | print(list1[0]) 28 | print(list1[1]) 29 | mytext_list=list1 30 | 31 | #控制显示数量 32 | count_vec = CountVectorizer(min_df=20, max_df=1000) #最大值忽略 33 | xx1 = count_vec.fit_transform(list1).toarray() 34 | word=count_vec.get_feature_names() 35 | print("word feature length: {}".format(len(word))) 36 | print(word) 37 | print(xx1) 38 | print(type(xx1)) 39 | print(xx1.shape) 40 | print(xx1[0]) 41 | 42 | #---------------------------------------层次聚类------------------------------------- 43 | titles = word 44 | #dist = cosine_similarity(xx1) 45 | 46 | mpl.rcParams['font.sans-serif'] = ['SimHei'] 47 | 48 | df = pd.DataFrame(xx1) 49 | print(df.corr()) 50 | print(df.corr('spearman')) 51 | print(df.corr('kendall')) 52 | dist = df.corr() 53 | print (dist) 54 | print(type(dist)) 55 | print(dist.shape) 56 | 57 | #define the linkage_matrix using ward clustering pre-computed distances 58 | linkage_matrix = ward(dist) 59 | fig, ax = plt.subplots(figsize=(8, 12)) # set size 60 | ax = dendrogram(linkage_matrix, orientation="right", 61 | p=20, labels=titles, leaf_font_size=12 62 | ) #leaf_rotation=90., leaf_font_size=12. 63 | #show plot with tight layout 64 | plt.tight_layout() 65 | #save figure as ward_clusters 66 | plt.savefig('KH.png', dpi=200) 67 | plt.show() 68 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-15.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | import cv2 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | 7 | #读取原始图像 8 | img = cv2.imread('scenery.png') 9 | print(img.shape) 10 | 11 | #图像二维像素转换为一维 12 | data = img.reshape((-1,3)) 13 | data = np.float32(data) 14 | 15 | #定义中心 (type,max_iter,epsilon) 16 | criteria = (cv2.TERM_CRITERIA_EPS + 17 | cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0) 18 | 19 | #设置标签 20 | flags = cv2.KMEANS_RANDOM_CENTERS 21 | 22 | #K-Means聚类 聚集成2类 23 | compactness, labels2, centers2 = cv2.kmeans(data, 2, None, criteria, 10, flags) 24 | 25 | #K-Means聚类 聚集成4类 26 | compactness, labels4, centers4 = cv2.kmeans(data, 4, None, criteria, 10, flags) 27 | 28 | #K-Means聚类 聚集成8类 29 | compactness, labels8, centers8 = cv2.kmeans(data, 8, None, criteria, 10, flags) 30 | 31 | #K-Means聚类 聚集成16类 32 | compactness, labels16, centers16 = cv2.kmeans(data, 16, None, criteria, 10, flags) 33 | 34 | #K-Means聚类 聚集成64类 35 | compactness, labels64, centers64 = cv2.kmeans(data, 64, None, criteria, 10, flags) 36 | 37 | #图像转换回uint8二维类型 38 | centers2 = np.uint8(centers2) 39 | res = centers2[labels2.flatten()] 40 | dst2 = res.reshape((img.shape)) 41 | 42 | centers4 = np.uint8(centers4) 43 | res = centers4[labels4.flatten()] 44 | dst4 = res.reshape((img.shape)) 45 | 46 | centers8 = np.uint8(centers8) 47 | res = centers8[labels8.flatten()] 48 | dst8 = res.reshape((img.shape)) 49 | 50 | centers16 = np.uint8(centers16) 51 | res = centers16[labels16.flatten()] 52 | dst16 = res.reshape((img.shape)) 53 | 54 | centers64 = np.uint8(centers64) 55 | res = centers64[labels64.flatten()] 56 | dst64 = res.reshape((img.shape)) 57 | 58 | #图像转换为RGB显示 59 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 60 | dst2 = cv2.cvtColor(dst2, cv2.COLOR_BGR2RGB) 61 | dst4 = cv2.cvtColor(dst4, cv2.COLOR_BGR2RGB) 62 | dst8 = cv2.cvtColor(dst8, cv2.COLOR_BGR2RGB) 63 | dst16 = cv2.cvtColor(dst16, cv2.COLOR_BGR2RGB) 64 | dst64 = cv2.cvtColor(dst64, cv2.COLOR_BGR2RGB) 65 | 66 | #用来正常显示中文标签 67 | plt.rcParams['font.sans-serif']=['SimHei'] 68 | 69 | #显示图像 70 | titles = [u'原始图像', u'聚类图像 K=2', u'聚类图像 K=4', 71 | u'聚类图像 K=8', u'聚类图像 K=16', u'聚类图像 K=64'] 72 | images = [img, dst2, dst4, dst8, dst16, dst64] 73 | for i in range(6): 74 | plt.subplot(2,3,i+1), plt.imshow(images[i], 'gray'), 75 | plt.title(titles[i]) 76 | plt.xticks([]),plt.yticks([]) 77 | plt.show() 78 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-04.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | 4 | #------------------------------------------------------------------------ 5 | #第一步 读取数据 6 | import os 7 | 8 | data = [] 9 | for line in open("data.txt", "r").readlines(): 10 | line = line.rstrip() 11 | result = ' '.join(line.split()) 12 | #将字符串转换为小数 13 | s = [float(x) for x in result.strip().split(' ')] 14 | print(s) 15 | data.append(s) 16 | print(data) 17 | print(type(data)) 18 | 19 | #------------------------------------------------------------------------ 20 | #第二步 获取两列数据 21 | print('第一列 第五列数据') 22 | L2 = [n[0] for n in data] #第一列表示球员每分钟助攻数:assists_per_minute 23 | L5 = [n[4] for n in data] #第五列表示球员每分钟得分数:points_per_minute 24 | T = dict(zip(L2,L5)) #两列数据生成二维数据 25 | type(T) 26 | print(L2) 27 | 28 | #下述代码将dict类型转换为list 29 | X = list(map(lambda x,y: (x,y), T.keys(),T.values())) 30 | print(type(X)) 31 | print(X) 32 | 33 | #------------------------------------------------------------------------ 34 | #第三步 聚类分析 35 | from sklearn.cluster import KMeans 36 | clf = KMeans(n_clusters=3) 37 | y_pred = clf.fit_predict(X) 38 | print(clf) 39 | print(y_pred) 40 | 41 | #------------------------------------------------------------------------ 42 | #第四步 绘制图形 43 | import numpy as np 44 | import matplotlib.pyplot as plt 45 | 46 | #获取第一列和第二列数据,使用for循环获取,n[0]表示X第一列 47 | x = [n[0] for n in X] 48 | y = [n[1] for n in X] 49 | 50 | #坐标 51 | x1, y1 = [], [] 52 | x2, y2 = [], [] 53 | x3, y3 = [], [] 54 | 55 | #分布获取类标为0、1、2的数据并赋值给(x1,y1) (x2,y2) (x3,y3) 56 | i = 0 57 | while i < len(X): 58 | if y_pred[i]==0: 59 | x1.append(X[i][0]) 60 | y1.append(X[i][1]) 61 | elif y_pred[i]==1: 62 | x2.append(X[i][0]) 63 | y2.append(X[i][1]) 64 | elif y_pred[i]==2: 65 | x3.append(X[i][0]) 66 | y3.append(X[i][1]) 67 | i = i + 1 68 | 69 | #三种颜色 红 绿 蓝,marker='x'表示类型,o表示圆点、*表示星型、x表示点 70 | plot1, = plt.plot(x1, y1, 'or', marker="x") 71 | plot2, = plt.plot(x2, y2, 'og', marker="o") 72 | plot3, = plt.plot(x3, y3, 'ob', marker="*") 73 | 74 | plt.title("Kmeans-Basketball Data") #绘制标题 75 | plt.xlabel("assists_per_minute") #绘制x轴 76 | plt.ylabel("points_per_minute") #绘制y轴 77 | plt.legend((plot1, plot2, plot3), ('A', 'B', 'C'), fontsize=10) #设置右上角图例 78 | plt.show() 79 | -------------------------------------------------------------------------------- /chapter05-cluster/chapter05-cluster-05.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-03 3 | 4 | #------------------------------------------------------------------------ 5 | #第一步 读取数据 6 | import os 7 | 8 | data = [] 9 | for line in open("data.txt", "r").readlines(): 10 | line = line.rstrip() 11 | result = ' '.join(line.split()) 12 | #将字符串转换为小数 13 | s = [float(x) for x in result.strip().split(' ')] 14 | print(s) 15 | data.append(s) 16 | print(data) 17 | print(type(data)) 18 | 19 | #------------------------------------------------------------------------ 20 | #第二步 获取两列数据 21 | print('第一列 第五列数据') 22 | L2 = [n[0] for n in data] #第一列表示球员每分钟助攻数:assists_per_minute 23 | L5 = [n[4] for n in data] #第五列表示球员每分钟得分数:points_per_minute 24 | T = dict(zip(L2,L5)) #两列数据生成二维数据 25 | type(T) 26 | print(L2) 27 | 28 | #下述代码将dict类型转换为list 29 | X = list(map(lambda x,y: (x,y), T.keys(),T.values())) 30 | print(type(X)) 31 | print(X) 32 | 33 | #------------------------------------------------------------------------ 34 | #第三步 聚类分析 35 | from sklearn.cluster import KMeans 36 | clf = KMeans(n_clusters=3) 37 | y_pred = clf.fit_predict(X) 38 | print(clf) 39 | print(y_pred) 40 | 41 | #------------------------------------------------------------------------ 42 | #第四步 绘制图形 43 | import numpy as np 44 | import matplotlib.pyplot as plt 45 | 46 | #获取第一列和第二列数据,使用for循环获取,n[0]表示X第一列 47 | x = [n[0] for n in X] 48 | y = [n[1] for n in X] 49 | 50 | #坐标 51 | x1, y1 = [], [] 52 | x2, y2 = [], [] 53 | x3, y3 = [], [] 54 | 55 | #分布获取类标为0、1、2的数据并赋值给(x1,y1) (x2,y2) (x3,y3) 56 | i = 0 57 | while i < len(X): 58 | if y_pred[i]==0: 59 | x1.append(X[i][0]) 60 | y1.append(X[i][1]) 61 | elif y_pred[i]==1: 62 | x2.append(X[i][0]) 63 | y2.append(X[i][1]) 64 | elif y_pred[i]==2: 65 | x3.append(X[i][0]) 66 | y3.append(X[i][1]) 67 | i = i + 1 68 | 69 | #三种颜色 红 绿 蓝,marker='x'表示类型,o表示圆点、*表示星型、x表示点 70 | plot1, = plt.plot(x1, y1, 'or', marker="x") 71 | plot2, = plt.plot(x2, y2, 'og', marker="o") 72 | plot3, = plt.plot(x3, y3, 'ob', marker="*") 73 | 74 | plt.title("Kmeans-Basketball Data") #绘制标题 75 | plt.xlabel("assists_per_minute") #绘制x轴 76 | plt.ylabel("points_per_minute") #绘制y轴 77 | plt.legend((plot1, plot2, plot3), ('A', 'B', 'C'), fontsize=10) #设置右上角图例 78 | 79 | #------------------------------------------------------------------------ 80 | #第五步 设置类簇中心 81 | centers = clf.cluster_centers_ 82 | print(centers) 83 | plt.plot(centers[:,0],centers[:,1],'r*',markersize=20) #显示三个中心点 84 | plt.show() 85 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-09-svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | import os 4 | import numpy as np 5 | from sklearn.svm import SVC 6 | from sklearn import metrics 7 | import matplotlib.pyplot as plt 8 | from matplotlib.colors import ListedColormap 9 | 10 | #---------------------------------------------------------------------------- 11 | #第一步 加载数据集 12 | path = "wine/wine.txt" 13 | data = np.loadtxt(path,dtype=float,delimiter=",") 14 | print(data) 15 | 16 | #---------------------------------------------------------------------------- 17 | #第二步 划分数据集 18 | yy, x = np.split(data, (1,), axis=1) #第一列为类标yy,后面13列特征为x 19 | print(yy.shape, x.shape) 20 | y = [] 21 | for n in yy: #将类标浮点型转化为整数 22 | y.append(int(n)) 23 | x = x[:, :2] #获取x前两列数据,方便绘图 对应x、y轴 24 | train_data = np.concatenate((x[0:40,:], x[60:100,:], x[140:160,:]), axis = 0) #训练集 25 | train_target = np.concatenate((y[0:40], y[60:100], y[140:160]), axis = 0) #样本类别 26 | test_data = np.concatenate((x[40:60, :], x[100:140, :], x[160:,:]), axis = 0) #测试集 27 | test_target = np.concatenate((y[40:60], y[100:140], y[160:]), axis = 0) #样本类别 28 | print(train_data.shape, train_target.shape) 29 | print(test_data.shape, test_target.shape) 30 | 31 | #---------------------------------------------------------------------------- 32 | #第三步 SVC训练 33 | clf = SVC() 34 | clf.fit(train_data,train_target) 35 | result = clf.predict(test_data) 36 | print(result) 37 | 38 | #---------------------------------------------------------------------------- 39 | #第四步 评价算法 40 | print(sum(result==test_target)) #预测结果与真实结果比对 41 | print(metrics.classification_report(test_target, result)) #准确率 召回率 F值 42 | 43 | #---------------------------------------------------------------------------- 44 | #第五步 创建网格 45 | x1_min, x1_max = test_data[:,0].min()-0.1, test_data[:,0].max()+0.1 #第一列 46 | x2_min, x2_max = test_data[:,1].min()-0.1, test_data[:,1].max()+0.1 #第二列 47 | xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.1), 48 | np.arange(x2_min, x2_max, 0.1)) #生成网格型数据 49 | z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) 50 | 51 | #---------------------------------------------------------------------------- 52 | #第六步 绘图可视化 53 | cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) #颜色Map 54 | cmap_bold = ListedColormap(['#000000', '#00FF00', '#FFFFFF']) 55 | plt.figure() 56 | z = z.reshape(xx.shape) 57 | print(xx.shape, yy.shape, z.shape, test_target.shape) 58 | plt.pcolormesh(xx, yy, z, cmap=cmap_light) 59 | plt.scatter(test_data[:,0], test_data[:,1], c=test_target, 60 | cmap=cmap_bold, s=50) 61 | plt.show() 62 | -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-07-knn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | import os 4 | import numpy as np 5 | from sklearn.neighbors import KNeighborsClassifier 6 | from sklearn import metrics 7 | from sklearn.decomposition import PCA 8 | import matplotlib.pyplot as plt 9 | from matplotlib.colors import ListedColormap 10 | 11 | #---------------------------------------------------------------------------- 12 | #第一步 加载数据集 13 | path = "wine/wine.txt" 14 | data = np.loadtxt(path,dtype=float,delimiter=",") 15 | print(data) 16 | 17 | #---------------------------------------------------------------------------- 18 | #第二步 划分数据集 19 | yy, x = np.split(data, (1,), axis=1) #第一列为类标yy,后面13列特征为x 20 | print(yy.shape, x.shape) 21 | y = [] 22 | for n in yy: #将类标浮点型转化为整数 23 | y.append(int(n)) 24 | x = x[:, :2] #获取x前两列数据,方便绘图 对应x、y轴 25 | train_data = np.concatenate((x[0:40,:], x[60:100,:], x[140:160,:]), axis = 0) #训练集 26 | train_target = np.concatenate((y[0:40], y[60:100], y[140:160]), axis = 0) #样本类别 27 | test_data = np.concatenate((x[40:60, :], x[100:140, :], x[160:,:]), axis = 0) #测试集 28 | test_target = np.concatenate((y[40:60], y[100:140], y[160:]), axis = 0) #样本类别 29 | print(train_data.shape, train_target.shape) 30 | print(test_data.shape, test_target.shape) 31 | 32 | #---------------------------------------------------------------------------- 33 | #第三步 KNN训练 34 | clf = KNeighborsClassifier(n_neighbors=3,algorithm='kd_tree') #K=3 35 | clf.fit(train_data,train_target) 36 | result = clf.predict(test_data) 37 | print(result) 38 | 39 | #---------------------------------------------------------------------------- 40 | #第四步 评价算法 41 | print(sum(result==test_target)) #预测结果与真实结果比对 42 | print(metrics.classification_report(test_target, result)) #准确率 召回率 F值 43 | 44 | #---------------------------------------------------------------------------- 45 | #第五步 创建网格 46 | x1_min, x1_max = test_data[:,0].min()-0.1, test_data[:,0].max()+0.1 #第一列 47 | x2_min, x2_max = test_data[:,1].min()-0.1, test_data[:,1].max()+0.1 #第二列 48 | xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.1), 49 | np.arange(x2_min, x2_max, 0.1)) #生成网格型数据 50 | print(xx.shape, yy.shape) #(53L, 36L) (53L, 36L) 51 | 52 | z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) #ravel()拉直函数 53 | print(xx.ravel().shape, yy.ravel().shape) #(1908L,) (1908L,) 54 | print(np.c_[xx.ravel(), yy.ravel()].shape) #合并 (1908L,2) 55 | 56 | #---------------------------------------------------------------------------- 57 | #第六步 绘图可视化 58 | cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) #颜色Map 59 | cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) 60 | plt.figure() 61 | z = z.reshape(xx.shape) 62 | print(xx.shape, yy.shape, z.shape, test_target.shape) 63 | #(53L, 36L) (53L, 36L) (53L, 36L) (78L,) 64 | plt.pcolormesh(xx, yy, z, cmap=cmap_light) 65 | plt.scatter(test_data[:,0], test_data[:,1], c=test_target, 66 | cmap=cmap_bold, s=50) 67 | plt.show() 68 | -------------------------------------------------------------------------------- /chapter05-cluster/data.txt: -------------------------------------------------------------------------------- 1 | 0.0888 201 36.02 28 0.5885 2 | 0.1399 198 39.32 30 0.8291 3 | 0.0747 198 38.8 26 0.4974 4 | 0.0983 191 40.71 30 0.5772 5 | 0.1276 196 38.4 28 0.5703 6 | 0.1671 201 34.1 31 0.5835 7 | 0.1906 193 36.2 30 0.5276 8 | 0.1061 191 36.75 27 0.5523 9 | 0.2446 185 38.43 29 0.4007 10 | 0.167 203 33.54 24 0.477 11 | 0.2485 188 35.01 27 0.4313 12 | 0.1227 198 36.67 29 0.4909 13 | 0.124 185 33.88 24 0.5668 14 | 0.1461 191 35.59 30 0.5113 15 | 0.2315 191 38.01 28 0.3788 16 | 0.0494 193 32.38 32 0.559 17 | 0.1107 196 35.22 25 0.4799 18 | 0.2521 183 31.73 29 0.5735 19 | 0.1007 193 28.81 34 0.6318 20 | 0.1067 196 35.6 23 0.4326 21 | 0.1956 188 35.28 32 0.428 22 | 0.1828 191 29.54 28 0.4401 23 | 0.1627 196 31.35 28 0.5581 24 | 0.1403 198 33.5 23 0.4866 25 | 0.1563 193 34.56 32 0.5267 26 | 0.2681 183 39.53 27 0.5439 27 | 0.1236 196 26.7 34 0.4419 28 | 0.13 188 30.77 26 0.3998 29 | 0.0896 198 25.67 30 0.4325 30 | 0.2071 178 36.22 30 0.4086 31 | 0.2244 185 36.55 23 0.4624 32 | 0.3437 185 34.91 31 0.4325 33 | 0.1058 191 28.35 28 0.4903 34 | 0.2326 185 33.53 27 0.4802 35 | 0.1577 193 31.07 25 0.4345 36 | 0.2327 185 36.52 32 0.4819 37 | 0.1256 196 27.87 29 0.6244 38 | 0.107 198 24.31 34 0.3991 39 | 0.1343 193 31.26 28 0.4414 40 | 0.0586 196 22.18 23 0.4013 41 | 0.2383 185 35.25 26 0.3801 42 | 0.1006 198 22.87 30 0.3498 43 | 0.2164 193 24.49 32 0.3185 44 | 0.1485 198 23.57 27 0.3097 45 | 0.227 191 31.72 27 0.4319 46 | 0.1649 188 27.9 25 0.3799 47 | 0.1188 191 22.74 24 0.4091 48 | 0.194 193 20.62 27 0.3588 49 | 0.2495 185 30.46 25 0.4727 50 | 0.2378 185 32.38 27 0.3212 51 | 0.1592 191 25.75 31 0.3418 52 | 0.2069 170 33.84 30 0.4285 53 | 0.2084 185 27.83 25 0.3917 54 | 0.0877 193 21.67 26 0.5769 55 | 0.101 193 21.79 24 0.4773 56 | 0.0942 201 20.17 26 0.4512 57 | 0.055 193 29.07 31 0.3096 58 | 0.1071 196 24.28 24 0.3089 59 | 0.0728 193 19.24 27 0.4573 60 | 0.2771 180 27.07 28 0.3214 61 | 0.0528 196 18.95 22 0.5437 62 | 0.213 188 21.59 30 0.4121 63 | 0.1356 193 13.27 31 0.2185 64 | 0.1043 196 16.3 23 0.3313 65 | 0.113 191 23.01 25 0.3302 66 | 0.1477 196 20.31 31 0.4677 67 | 0.1317 188 17.46 33 0.2406 68 | 0.2187 191 21.95 28 0.3007 69 | 0.2127 188 14.57 37 0.2471 70 | 0.2547 160 34.55 28 0.2894 71 | 0.1591 191 22.0 24 0.3682 72 | 0.0898 196 13.37 34 0.389 73 | 0.2146 188 20.51 24 0.512 74 | 0.1871 183 19.78 28 0.4449 75 | 0.1528 191 16.36 33 0.4035 76 | 0.156 191 16.03 23 0.2683 77 | 0.2348 188 24.27 26 0.2719 78 | 0.1623 180 18.49 28 0.3408 79 | 0.1239 180 17.76 26 0.4393 80 | 0.2178 185 13.31 25 0.3004 81 | 0.1608 185 17.41 26 0.3503 82 | 0.0805 193 13.67 25 0.4388 83 | 0.1776 193 17.46 27 0.2578 84 | 0.1668 185 14.38 35 0.2989 85 | 0.1072 188 12.12 31 0.4455 86 | 0.1821 185 12.63 25 0.3087 87 | 0.188 180 12.24 30 0.3678 88 | 0.1167 196 12.0 24 0.3667 89 | 0.2617 185 24.46 27 0.3189 90 | 0.1994 188 20.06 27 0.4187 91 | 0.1706 170 17.0 25 0.5059 92 | 0.1554 183 11.58 24 0.3195 93 | 0.2282 185 10.08 24 0.2381 94 | 0.1778 185 18.56 23 0.2802 95 | 0.1863 185 11.81 23 0.381 96 | 0.1014 193 13.81 32 0.1593 -------------------------------------------------------------------------------- /chapter06-classifier/chapter06-classifier-11-all.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # By:Eastmount CSDN 2021-07-06 3 | # 该部分参考知乎萌弟老师:https://zhuanlan.zhihu.com/p/173945775 4 | import numpy as np 5 | from sklearn import metrics 6 | from sklearn import datasets 7 | import matplotlib.pyplot as plt 8 | from matplotlib.colors import ListedColormap 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.decomposition import PCA 11 | from sklearn.preprocessing import StandardScaler 12 | 13 | #------------------------------------------------------------------------ 14 | #第一步 导入数据 15 | iris = datasets.load_iris() 16 | X = iris.data[:,[2,3]] 17 | y = iris.target 18 | print("Class labels:",np.unique(y)) #打印分类类别的种类 [0 1 2] 19 | 20 | #30%测试数据 70%训练数据 stratify=y表示训练数据和测试数据具有相同的类别比例 21 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1,stratify=y) 22 | 23 | #------------------------------------------------------------------------ 24 | #第二步 数据标准化 25 | sc = StandardScaler() #估算训练数据中的mu和sigma 26 | sc.fit(X_train) #使用训练数据中的mu和sigma对数据进行标准化 27 | X_train_std = sc.transform(X_train) 28 | X_test_std = sc.transform(X_test) 29 | print(X_train_std) 30 | print(X_test_std) 31 | 32 | #------------------------------------------------------------------------ 33 | #第三步 可视化函数 画出决策边界 34 | def plot_decision_region(X,y,classifier,resolution=0.02): 35 | markers = ('s','x','o','^','v') 36 | colors = ('red','blue','lightgreen','gray','cyan') 37 | cmap = ListedColormap(colors[:len(np.unique(y))]) 38 | 39 | # plot the decision surface 40 | x1_min,x1_max = X[:,0].min()-1,X[:,0].max()+1 41 | x2_min,x2_max = X[:,1].min()-1,X[:,1].max()+1 42 | xx1,xx2 = np.meshgrid(np.arange(x1_min,x1_max,resolution), 43 | np.arange(x2_min,x2_max,resolution)) 44 | Z = classifier.predict(np.array([xx1.ravel(),xx2.ravel()]).T) 45 | Z = Z.reshape(xx1.shape) 46 | plt.contourf(xx1,xx2,Z,alpha=0.3,cmap=cmap) 47 | plt.xlim(xx1.min(),xx1.max()) 48 | plt.ylim(xx2.min(),xx2.max()) 49 | 50 | # plot class samples 51 | for idx,cl in enumerate(np.unique(y)): 52 | plt.scatter(x=X[y==cl,0], 53 | y = X[y==cl,1], 54 | alpha=0.8, 55 | c=colors[idx], 56 | marker = markers[idx], 57 | label=cl, 58 | edgecolors='black') 59 | 60 | #------------------------------------------------------------------------ 61 | #第四步 决策树分类 62 | from sklearn.tree import DecisionTreeClassifier 63 | tree = DecisionTreeClassifier(criterion='gini',max_depth=4,random_state=1) 64 | tree.fit(X_train_std,y_train) 65 | print(X_train_std.shape, X_test_std.shape, len(y_train), len(y_test)) #(105, 2) (45, 2) 105 45 66 | res1 = tree.predict(X_test_std) 67 | print(res1) 68 | print(metrics.classification_report(y_test, res1, digits=4)) #四位小数 69 | 70 | plot_decision_region(X_train_std,y_train,classifier=tree,resolution=0.02) 71 | plt.xlabel('petal length [standardized]') 72 | plt.ylabel('petal width [standardized]') 73 | plt.title('DecisionTreeClassifier') 74 | plt.legend(loc='upper left') 75 | plt.show() 76 | 77 | #------------------------------------------------------------------------ 78 | #第五步 KNN分类 79 | from sklearn.neighbors import KNeighborsClassifier 80 | knn = KNeighborsClassifier(n_neighbors=2,p=2,metric="minkowski") 81 | knn.fit(X_train_std,y_train) 82 | res2 = knn.predict(X_test_std) 83 | print(res2) 84 | print(metrics.classification_report(y_test, res2, digits=4)) #四位小数 85 | 86 | plot_decision_region(X_train_std,y_train,classifier=knn,resolution=0.02) 87 | plt.xlabel('petal length [standardized]') 88 | plt.ylabel('petal width [standardized]') 89 | plt.title('KNeighborsClassifier') 90 | plt.legend(loc='upper left') 91 | plt.show() 92 | 93 | #------------------------------------------------------------------------ 94 | #第六步 SVM分类 核函数对非线性分类问题建模(gamma=0.20) 95 | from sklearn.svm import SVC 96 | svm = SVC(kernel='rbf',random_state=1,gamma=0.20,C=1.0) #较小的gamma有较松的决策边界 97 | svm = SVC(kernel='rbf',random_state=1,gamma=100.0,C=1.0,verbose=1) 98 | svm.fit(X_train_std,y_train) 99 | res3 = svm.predict(X_test_std) 100 | print(res3) 101 | print(metrics.classification_report(y_test, res3, digits=4)) 102 | 103 | plot_decision_region(X_train_std,y_train,classifier=svm,resolution=0.02) 104 | plt.xlabel('petal length [standardized]') 105 | plt.ylabel('petal width [standardized]') 106 | plt.title('SVM') 107 | plt.legend(loc='upper left') 108 | plt.show() 109 | 110 | 111 | #------------------------------------------------------------------------ 112 | #第七步 逻辑回归分类 113 | from sklearn.linear_model import LogisticRegression 114 | lr = LogisticRegression(C=100.0,random_state=1) 115 | lr.fit(X_train_std,y_train) 116 | res4 = lr.predict(X_test_std) 117 | print(res4) 118 | print(metrics.classification_report(y_test, res4, digits=4)) 119 | 120 | plot_decision_region(X_train_std,y_train,classifier=lr,resolution=0.02) 121 | plt.xlabel('petal length [standardized]') 122 | plt.ylabel('petal width [standardized]') 123 | plt.title('LogisticRegression') 124 | plt.legend(loc='upper left') 125 | plt.show() 126 | 127 | 128 | #------------------------------------------------------------------------ 129 | #第八步 朴素贝叶斯分类 130 | from sklearn.naive_bayes import GaussianNB 131 | gnb = GaussianNB() 132 | gnb.fit(X_train_std,y_train) 133 | res5 = gnb.predict(X_test_std) 134 | print(res5) 135 | print(metrics.classification_report(y_test, res5, digits=4)) 136 | 137 | plot_decision_region(X_train_std,y_train,classifier=gnb,resolution=0.02) 138 | plt.xlabel('petal length [standardized]') 139 | plt.ylabel('petal width [standardized]') 140 | plt.title('GaussianNB') 141 | plt.legend(loc='upper left') 142 | plt.show() 143 | 144 | #------------------------------------------------------------------------ 145 | #第九步 随机森林分类 146 | from sklearn.ensemble import RandomForestClassifier 147 | forest = RandomForestClassifier(criterion='gini', 148 | n_estimators=25, 149 | random_state=1, 150 | n_jobs=2, 151 | verbose=1) 152 | forest.fit(X_train_std,y_train) 153 | res6 = gnb.predict(X_test_std) 154 | print(res6) 155 | print(metrics.classification_report(y_test, res6, digits=4)) 156 | 157 | plot_decision_region(X_train_std,y_train,classifier=forest,resolution=0.02) 158 | plt.xlabel('petal length [standardized]') 159 | plt.ylabel('petal width [standardized]') 160 | plt.title('RandomForestClassifier') 161 | plt.legend(loc='upper left') 162 | plt.show() 163 | 164 | #------------------------------------------------------------------------ 165 | #第十步 集成学习分类 166 | from sklearn.ensemble import AdaBoostClassifier 167 | ada = AdaBoostClassifier() 168 | ada.fit(X_train_std,y_train) 169 | res7 = ada.predict(X_test_std) 170 | print(res7) 171 | print(metrics.classification_report(y_test, res7, digits=4)) 172 | 173 | plot_decision_region(X_train_std,y_train,classifier=forest,resolution=0.02) 174 | plt.xlabel('petal length [standardized]') 175 | plt.ylabel('petal width [standardized]') 176 | plt.title('AdaBoostClassifier') 177 | plt.legend(loc='upper left') 178 | plt.show() 179 | 180 | #------------------------------------------------------------------------ 181 | #第11步 GradientBoosting分类 182 | from sklearn.ensemble import GradientBoostingClassifier 183 | gb = GradientBoostingClassifier() 184 | ada.fit(X_train_std,y_train) 185 | res8 = ada.predict(X_test_std) 186 | print(res8) 187 | print(metrics.classification_report(y_test, res8, digits=4)) 188 | 189 | plot_decision_region(X_train_std,y_train,classifier=forest,resolution=0.02) 190 | plt.xlabel('petal length [standardized]') 191 | plt.ylabel('petal width [standardized]') 192 | plt.title('GradientBoostingClassifier') 193 | plt.legend(loc='upper left') 194 | plt.show() 195 | -------------------------------------------------------------------------------- /chapter06-classifier/wine/wine.txt: -------------------------------------------------------------------------------- 1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 -------------------------------------------------------------------------------- /chapter05-cluster/glass.csv: -------------------------------------------------------------------------------- 1 | id,ri,na,mg,al,si,k,ca,ba,fe,glass_type 2 | 1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1 3 | 2,1.5176100000000001,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1 4 | 3,1.5161799999999999,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1 5 | 4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1 6 | 5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1 7 | 6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1 8 | 7,1.5174299999999998,13.3,3.6,1.14,73.09,0.58,8.17,0.0,0.0,1 9 | 8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.0,0.0,1 10 | 9,1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0.0,0.0,1 11 | 10,1.51755,13.0,3.6,1.36,72.99,0.57,8.4,0.0,0.11,1 12 | 11,1.5157100000000001,12.72,3.46,1.56,73.2,0.67,8.09,0.0,0.24,1 13 | 12,1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0.0,0.0,1 14 | 13,1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0.0,0.24,1 15 | 14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.0,0.17,1 16 | 15,1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0.0,0.0,1 17 | 16,1.5176100000000001,12.81,3.54,1.23,73.24,0.58,8.39,0.0,0.0,1 18 | 17,1.5178399999999999,12.68,3.67,1.16,73.11,0.61,8.7,0.0,0.0,1 19 | 18,1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.0,0.0,1 20 | 19,1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0.0,0.0,1 21 | 20,1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.0,0.07,1 22 | 21,1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1 23 | 22,1.51966,14.77,3.75,0.29,72.02,0.03,9.0,0.0,0.0,1 24 | 23,1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0.0,0.0,1 25 | 24,1.5175100000000001,12.81,3.57,1.35,73.02,0.62,8.59,0.0,0.0,1 26 | 25,1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0.0,0.0,1 27 | 26,1.5176399999999999,12.98,3.54,1.21,73.0,0.65,8.53,0.0,0.0,1 28 | 27,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.0,1 29 | 28,1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.0,0.0,1 30 | 29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0,1 31 | 30,1.5178399999999999,13.08,3.49,1.28,72.86,0.6,8.49,0.0,0.0,1 32 | 31,1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0.0,0.14,1 33 | 32,1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0.0,0.0,1 34 | 33,1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,1 35 | 34,1.5175299999999998,12.57,3.47,1.38,73.39,0.6,8.55,0.0,0.06,1 36 | 35,1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.0,0.0,1 37 | 36,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.0,0.0,1 38 | 37,1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.0,1 39 | 38,1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.0,0.0,1 40 | 39,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1 41 | 40,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1 42 | 41,1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0.0,0.0,1 43 | 42,1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0.0,0.0,1 44 | 43,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.0,0.0,1 45 | 44,1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0.0,0.0,1 46 | 45,1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.0,0.3,1 47 | 46,1.519,13.49,3.48,1.35,71.95,0.55,9.0,0.0,0.0,1 48 | 47,1.5186899999999999,13.19,3.37,1.18,72.72,0.57,8.83,0.0,0.16,1 49 | 48,1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0.0,0.1,1 50 | 49,1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.0,0.0,1 51 | 50,1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.0,0.0,1 52 | 51,1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0.0,0.16,1 53 | 52,1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0.0,0.11,1 54 | 53,1.5180799999999999,13.43,2.87,1.19,72.84,0.55,9.03,0.0,0.0,1 55 | 54,1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.0,0.0,1 56 | 55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.0,0.09,1 57 | 56,1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0.0,0.24,1 58 | 57,1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.0,0.31,1 59 | 58,1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0.0,0.0,1 60 | 59,1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.0,0.0,1 61 | 60,1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.0,0.11,1 62 | 61,1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0.0,0.0,1 63 | 62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.0,1 64 | 63,1.5217200000000002,13.51,3.86,0.88,71.79,0.23,9.54,0.0,0.11,1 65 | 64,1.52227,14.17,3.81,0.78,71.35,0.0,9.69,0.0,0.0,1 66 | 65,1.5217200000000002,13.48,3.74,0.9,72.01,0.18,9.61,0.0,0.07,1 67 | 66,1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0.0,0.0,1 68 | 67,1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.0,0.17,1 69 | 68,1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.0,0.17,1 70 | 69,1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0.0,0.16,1 71 | 70,1.523,13.31,3.58,0.82,71.99,0.12,10.17,0.0,0.03,1 72 | 71,1.5157399999999999,14.86,3.67,1.74,71.87,0.16,7.36,0.0,0.12,2 73 | 72,1.5184799999999998,13.64,3.87,1.27,71.96,0.54,8.32,0.0,0.32,2 74 | 73,1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0.0,0.0,2 75 | 74,1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.0,0.0,2 76 | 75,1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0.0,0.0,2 77 | 76,1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0.0,0.0,2 78 | 77,1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.0,0.0,2 79 | 78,1.51627,13.0,3.58,1.54,72.83,0.61,8.04,0.0,0.0,2 80 | 79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,2 81 | 80,1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0.0,0.0,2 82 | 81,1.5159200000000002,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.0,2 83 | 82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.0,0.0,2 84 | 83,1.5164600000000001,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0,2 85 | 84,1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.0,0.09,2 86 | 85,1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0.0,0.0,2 87 | 86,1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.0,0.0,2 88 | 87,1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.0,0.0,2 89 | 88,1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0.0,0.1,2 90 | 89,1.5161799999999999,13.01,3.5,1.48,72.89,0.6,8.12,0.0,0.0,2 91 | 90,1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0.0,0.09,2 92 | 91,1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.0,0.22,2 93 | 92,1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0.0,0.0,2 94 | 93,1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.0,0.19,2 95 | 94,1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0.0,0.0,2 96 | 95,1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.0,0.0,2 97 | 96,1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0.0,0.0,2 98 | 97,1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.0,0.15,2 99 | 98,1.5174299999999998,12.2,3.25,1.16,73.55,0.62,8.9,0.0,0.24,2 100 | 99,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.0,2 101 | 100,1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0.0,2 102 | 101,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2 103 | 102,1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0.0,0.0,2 104 | 103,1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0.0,0.2,2 105 | 104,1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0.0,0.0,2 106 | 105,1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0.0,0.0,2 107 | 106,1.52475,11.45,0.0,1.88,72.19,0.81,13.24,0.0,0.34,2 108 | 107,1.53125,10.73,0.0,2.1,69.81,0.58,13.3,3.15,0.28,2 109 | 108,1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24,2 110 | 109,1.5222200000000001,14.43,0.0,1.0,72.67,0.1,11.52,0.0,0.08,2 111 | 110,1.5181799999999999,13.72,0.0,0.56,74.45,0.0,10.99,0.0,0.0,2 112 | 111,1.52664,11.23,0.0,0.77,73.21,0.0,14.68,0.0,0.0,2 113 | 112,1.52739,11.02,0.0,0.75,73.08,0.0,14.96,0.0,0.0,2 114 | 113,1.52777,12.64,0.0,0.67,72.02,0.06,14.4,0.0,0.0,2 115 | 114,1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.0,0.14,2 116 | 115,1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0.0,0.0,2 117 | 116,1.5184600000000001,13.41,3.89,1.33,72.38,0.51,8.28,0.0,0.0,2 118 | 117,1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0.0,0.1,2 119 | 118,1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.0,0.0,2 120 | 119,1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0.0,0.29,2 121 | 120,1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.0,0.0,2 122 | 121,1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0.0,0.0,2 123 | 122,1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.0,0.21,2 124 | 123,1.5168700000000002,13.23,3.54,1.48,72.84,0.56,8.1,0.0,0.0,2 125 | 124,1.5170700000000001,13.48,3.48,1.71,72.52,0.62,7.99,0.0,0.0,2 126 | 125,1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0.0,0.0,2 127 | 126,1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.0,0.12,2 128 | 127,1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0.0,0.0,2 129 | 128,1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.0,0.17,2 130 | 129,1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2 131 | 130,1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0.0,0.18,2 132 | 131,1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.0,0.0,2 133 | 132,1.52614,13.7,0.0,1.36,71.24,0.19,13.44,0.0,0.1,2 134 | 133,1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.0,0.0,2 135 | 134,1.518,13.71,3.93,1.54,71.81,0.54,8.21,0.0,0.15,2 136 | 135,1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.0,0.0,2 137 | 136,1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0.0,0.28,2 138 | 137,1.51806,13.0,3.8,1.08,73.07,0.56,8.38,0.0,0.12,2 139 | 138,1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.0,0.0,2 140 | 139,1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0.0,0.0,2 141 | 140,1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.0,0.0,2 142 | 141,1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0.0,0.0,2 143 | 142,1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2 144 | 143,1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2 145 | 144,1.51709,13.0,3.47,1.79,72.72,0.66,8.18,0.0,0.0,2 146 | 145,1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0.0,0.24,2 147 | 146,1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.0,0.35,2 148 | 147,1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0.0,0.0,3 149 | 148,1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0.0,0.0,3 150 | 149,1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0.0,0.1,3 151 | 150,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.0,3 152 | 151,1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0.0,0.17,3 153 | 152,1.5212700000000001,14.32,3.9,0.83,71.5,0.0,9.49,0.0,0.0,3 154 | 153,1.51779,13.64,3.65,0.65,73.0,0.06,8.93,0.0,0.0,3 155 | 154,1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0.0,0.0,3 156 | 155,1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.0,0.0,3 157 | 156,1.5164600000000001,13.04,3.4,1.26,73.01,0.52,8.58,0.0,0.0,3 158 | 157,1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.0,0.0,3 159 | 158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.0,0.0,3 160 | 159,1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.0,0.0,3 161 | 160,1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0.0,0.09,3 162 | 161,1.5183200000000001,13.33,3.34,1.54,72.14,0.56,8.99,0.0,0.0,3 163 | 162,1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3 164 | 163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.0,0.37,3 165 | 164,1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0.0,5 166 | 165,1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0.0,0.0,5 167 | 166,1.5217100000000001,11.56,1.88,1.56,72.86,0.47,11.41,0.0,0.0,5 168 | 167,1.5215100000000001,11.03,1.71,1.56,73.44,0.58,11.62,0.0,0.0,5 169 | 168,1.51969,12.64,0.0,1.65,73.75,0.38,11.53,0.0,0.0,5 170 | 169,1.5166600000000001,12.86,0.0,1.83,73.88,0.97,10.17,0.0,0.0,5 171 | 170,1.51994,13.27,0.0,1.76,73.03,0.47,11.32,0.0,0.0,5 172 | 171,1.52369,13.44,0.0,1.58,72.22,0.32,12.24,0.0,0.0,5 173 | 172,1.51316,13.02,0.0,3.04,70.48,6.21,6.96,0.0,0.0,5 174 | 173,1.51321,13.0,0.0,3.02,70.7,6.21,6.93,0.0,0.0,5 175 | 174,1.52043,13.38,0.0,1.4,72.25,0.33,12.5,0.0,0.0,5 176 | 175,1.5205799999999998,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,5 177 | 176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5 178 | 177,1.51905,14.0,2.39,1.56,72.37,0.0,9.57,0.0,0.0,6 179 | 178,1.51937,13.79,2.41,1.19,72.76,0.0,9.77,0.0,0.0,6 180 | 179,1.51829,14.46,2.24,1.62,72.38,0.0,9.26,0.0,0.0,6 181 | 180,1.51852,14.09,2.19,1.66,72.67,0.0,9.32,0.0,0.0,6 182 | 181,1.51299,14.4,1.74,1.54,74.55,0.0,7.59,0.0,0.0,6 183 | 182,1.51888,14.99,0.78,1.74,72.5,0.0,9.95,0.0,0.0,6 184 | 183,1.51916,14.15,0.0,2.09,72.74,0.0,10.88,0.0,0.0,6 185 | 184,1.51969,14.56,0.0,0.56,73.48,0.0,11.22,0.0,0.0,6 186 | 185,1.51115,17.38,0.0,0.34,75.41,0.0,6.65,0.0,0.0,6 187 | 186,1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0.0,7 188 | 187,1.5183799999999998,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.0,7 189 | 188,1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0.0,0.0,7 190 | 189,1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0.0,0.0,7 191 | 190,1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.0,7 192 | 191,1.51613,13.88,1.78,1.79,73.1,0.0,8.67,0.76,0.0,7 193 | 192,1.5160200000000001,14.85,0.0,2.38,73.28,0.0,8.76,0.64,0.09,7 194 | 193,1.51623,14.2,0.0,2.79,73.46,0.04,9.04,0.4,0.09,7 195 | 194,1.51719,14.75,0.0,2.0,73.02,0.0,8.53,1.59,0.08,7 196 | 195,1.51683,14.56,0.0,1.98,73.29,0.0,8.52,1.57,0.07,7 197 | 196,1.51545,14.14,0.0,2.68,73.39,0.08,9.07,0.61,0.05,7 198 | 197,1.51556,13.87,0.0,2.54,73.23,0.14,9.41,0.81,0.01,7 199 | 198,1.5172700000000001,14.7,0.0,2.34,73.28,0.0,8.95,0.66,0.0,7 200 | 199,1.51531,14.38,0.0,2.66,73.1,0.04,9.08,0.64,0.0,7 201 | 200,1.51609,15.01,0.0,2.51,73.05,0.05,8.83,0.53,0.0,7 202 | 201,1.51508,15.15,0.0,2.25,73.5,0.0,8.34,0.63,0.0,7 203 | 202,1.51653,11.95,0.0,1.19,75.18,2.7,8.93,0.0,0.0,7 204 | 203,1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7 205 | 204,1.5165799999999998,14.8,0.0,1.99,73.11,0.0,8.28,1.71,0.0,7 206 | 205,1.51617,14.95,0.0,2.27,73.3,0.0,8.71,0.67,0.0,7 207 | 206,1.51732,14.95,0.0,1.8,72.99,0.0,8.61,1.55,0.0,7 208 | 207,1.51645,14.94,0.0,1.87,73.11,0.0,8.67,1.38,0.0,7 209 | 208,1.51831,14.39,0.0,1.82,72.86,1.41,6.47,2.88,0.0,7 210 | 209,1.5164,14.37,0.0,2.74,72.85,0.0,9.45,0.54,0.0,7 211 | 210,1.51623,14.14,0.0,2.88,72.61,0.08,9.18,1.06,0.0,7 212 | 211,1.51685,14.92,0.0,1.99,73.06,0.0,8.4,1.59,0.0,7 213 | 212,1.52065,14.36,0.0,2.02,73.42,0.0,8.44,1.64,0.0,7 214 | 213,1.51651,14.38,0.0,1.94,73.61,0.0,8.48,1.57,0.0,7 215 | 214,1.51711,14.23,0.0,2.08,73.36,0.0,8.62,1.67,0.0,7 216 | --------------------------------------------------------------------------------