├── chapter05-cluster
    ├── KH.png
    ├── 18.20.png
    ├── scenery.png
    ├── Basketball.zip
    ├── data-fenci.txt
    ├── chapter05-cluster-06.py
    ├── chapter05-cluster-08.py
    ├── chapter05-cluster-02.py
    ├── chapter05-cluster-09.py
    ├── chapter05-cluster-01.py
    ├── chapter05-cluster-12.py
    ├── chapter05-cluster-10.py
    ├── chapter05-cluster-13.py
    ├── chapter05-cluster-14.py
    ├── chapter05-cluster-07.py
    ├── chapter05-cluster-03.py
    ├── chapter05-cluster-11.py
    ├── chapter05-cluster-16py.py
    ├── chapter05-cluster-15.py
    ├── chapter05-cluster-04.py
    ├── chapter05-cluster-05.py
    ├── data.txt
    └── glass.csv
├── chapter06-classifier
    ├── wine.rar
    ├── 实验结果.xls
    ├── chapter06-classifier-01-dtc.py
    ├── chapter06-classifier-08-svm.py
    ├── chapter06-classifier-05-knn.py
    ├── chapter06-classifier-06-knn.py
    ├── chapter06-classifier-02-dtc.py
    ├── chapter06-classifier-04-dtc.py
    ├── wine
    │   ├── wine Data Set Information.txt
    │   └── wine.txt
    ├── chapter06-classifier-10-svm.py
    ├── chapter06-classifier-03-dtc.py
    ├── chapter06-classifier-09-svm.py
    ├── chapter06-classifier-07-knn.py
    └── chapter06-classifier-11-all.py
├── chapter08-textcluster
    ├── 08.10.png
    ├── 08.11.png
    ├── 08.3.png
    ├── 08.5.png
    ├── 08.9.png
    ├── test.txt
    ├── test3.txt
    ├── test2.txt
    ├── result.txt
    ├── chapter08_textcluster_02.py
    ├── chapter08_textcluster_05.py
    ├── chapter08_textcluster_01.py
    ├── chapter08_textcluster_03.py
    ├── chapter08_textcluster_06.py
    ├── chapter08_textcluster_04.py
    └── chapter08_textcluster_07.py
├── chapter09-TopicAnalysis
    ├── pic.png
    ├── test.txt
    ├── result.png
    ├── cloudimg.png
    ├── result2.png
    ├── data-fenci.txt
    ├── lda-1.1.0-cp37-cp37m-win32.whl
    ├── lda-1.1.0-cp37-cp37m-win_amd64.whl
    ├── chapter09_TopicAnalysis_01.py
    ├── chapter09_TopicAnalysis_04.py
    ├── chapter09_TopicAnalysis_02.py
    ├── chapter09_TopicAnalysis_06.py
    ├── chapter09_TopicAnalysis_05.py
    ├── chapter09_TopicAnalysis_08.py
    ├── chapter09_TopicAnalysis_09.py
    ├── chapter09_TopicAnalysis_07.py
    └── chapter09_TopicAnalysis_03.py
├── chapter02-dataming-based
    ├── data.xls
    ├── chapter02-09-sklearn.py
    ├── data.csv
    ├── chapter02-01-numpy.py
    ├── chapter02-03-numpy.py
    ├── chapter02-05-pandas.py
    ├── chapter02-04-numpy.py
    ├── chapter02-06-pandas.py
    ├── chapter02-02-numpy.py
    ├── chapter02-07-pandas.py
    └── chapter02-08-matplotlib.py
├── chapter04-regression
    ├── chapter04-01.png
    ├── chapter04-regression-01.py
    ├── chapter04-regression-10.py
    ├── chapter04-regression-09.py
    ├── chapter04-regression-03.py
    ├── chapter04-regression-12.py
    ├── chapter04-regression-11.py
    ├── chapter04-regression-02.py
    ├── chapter04-regression-04.py
    ├── chapter04-regression-05.py
    ├── chapter04-regression-13.py
    ├── chapter04-regression-06.py
    ├── chapter04-regression-07.py
    └── chapter04-regression-08.py
└── README.md


/chapter05-cluster/KH.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/KH.png


--------------------------------------------------------------------------------
/chapter05-cluster/18.20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/18.20.png


--------------------------------------------------------------------------------
/chapter05-cluster/scenery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/scenery.png


--------------------------------------------------------------------------------
/chapter06-classifier/wine.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter06-classifier/wine.rar


--------------------------------------------------------------------------------
/chapter06-classifier/实验结果.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter06-classifier/实验结果.xls


--------------------------------------------------------------------------------
/chapter05-cluster/Basketball.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/Basketball.zip


--------------------------------------------------------------------------------
/chapter05-cluster/data-fenci.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter05-cluster/data-fenci.txt


--------------------------------------------------------------------------------
/chapter08-textcluster/08.10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.10.png


--------------------------------------------------------------------------------
/chapter08-textcluster/08.11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.11.png


--------------------------------------------------------------------------------
/chapter08-textcluster/08.3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.3.png


--------------------------------------------------------------------------------
/chapter08-textcluster/08.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.5.png


--------------------------------------------------------------------------------
/chapter08-textcluster/08.9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/08.9.png


--------------------------------------------------------------------------------
/chapter08-textcluster/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/test.txt


--------------------------------------------------------------------------------
/chapter08-textcluster/test3.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter08-textcluster/test3.txt


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/pic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/pic.png


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/test.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/test.txt


--------------------------------------------------------------------------------
/chapter02-dataming-based/data.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter02-dataming-based/data.xls


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/result.png


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter04-regression/chapter04-01.png


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/cloudimg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/cloudimg.png


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/result2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/result2.png


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/data-fenci.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/data-fenci.txt


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-01-dtc.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import load_iris 
2 | iris = load_iris()
3 | print(iris.data)
4 | print(iris.target)
5 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-01.py:
--------------------------------------------------------------------------------
1 | from sklearn import linear_model          #导入线性模型  
2 | regr = linear_model.LinearRegression()    #使用线性回归  
3 | print(regr)
4 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win32.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win32.whl


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win_amd64.whl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eastmountyxz/Book2-Python-DataAnalysis/HEAD/chapter09-TopicAnalysis/lda-1.1.0-cp37-cp37m-win_amd64.whl


--------------------------------------------------------------------------------
/chapter08-textcluster/test2.txt:
--------------------------------------------------------------------------------
1 | 贵州省 位于 中国 西南地区 简称 黔 贵
2 | 走遍 神州大地 醉美 多彩 贵州
3 | 贵阳市 是 贵州省 省会 有 林城 美誉
4 | 数据分析 是 数学 计算机科学 相结合 产物 
5 | 回归 聚类 分类 算法 广泛应用 数据分析
6 | 数据 爬取 数据 存储 数据分析 紧密 相关 过程
7 | 最 甜美 爱情 最 苦涩 爱情
8 | 一只 鸡蛋 可以 画 无数次 一场 爱情
9 | 真爱 往往 珍藏 最 平凡 普通 生活


--------------------------------------------------------------------------------
/chapter08-textcluster/result.txt:
--------------------------------------------------------------------------------
 1 | 贵州省 位于 中国 西南地区 简称 黔 贵
 2 | 走遍 神州大地 醉美 多彩 贵州
 3 | 贵阳市 贵州省 省会 林城 美誉
 4 | 数据分析 数学 计算机科学 相结合 产物
 5 | 回归 聚类 分类 算法 广泛应用 数据分析
 6 | 数据 爬取 数据 存储 数据分析 紧密 相关 过程
 7 | 甜美 爱情 苦涩 爱情
 8 | 一只 鸡蛋 可以 画 无数次 一场 爱情 能
 9 | 真 爱 往往 珍藏 平凡 普通 生活
10 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-06.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # By:Eastmount CSDN 2021-07-03
3 | from sklearn.cluster import Birch
4 | X = [[1,1],[2,1],[1,3],[6,6],[8,5],[7,8]]
5 | y = [0,0,0,1,1,1]
6 | clf = Birch(n_clusters=2)
7 | clf.fit(X,y)
8 | print(clf.labels_)
9 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-09-sklearn.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | from sklearn.cluster import KMeans
 4 | 
 5 | X = [[1],[2],[3],[4],[5]]
 6 | y = [4,2,6,1,3]
 7 | clf = KMeans(n_clusters=2)
 8 | clf.fit(X,y)
 9 | print(clf)
10 | print(clf.labels_) 
11 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/data.csv:
--------------------------------------------------------------------------------
 1 | 1,235.83,324.03,478.32
 2 | 2,236.27,325.63,515.45
 3 | 3,238.05,328.08,517.09
 4 | 4,235.9,,514.89
 5 | 5,236.76,268.82,
 6 | 6,,404.04,486.09
 7 | 7,237.41,391.26,516.23
 8 | 8,238.65,380.81,
 9 | 9,237.61,388.02,435.35
10 | 10,238.03,206.43,487.675
11 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-10.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # By:Eastmount CSDN 2021-07-03
3 | from sklearn.linear_model import LogisticRegression  #导入逻辑回归模型 
4 | clf = LogisticRegression()
5 | print(clf)
6 | clf.fit(train_feature,label)
7 | predict['label'] = clf.predict(predict_feature)
8 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-01-numpy.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | 
 4 | #导入包并重命名np
 5 | import numpy as np
 6 | 
 7 | #定义一维数组
 8 | a = np.array([2, 0, 1, 5, 8, 3])
 9 | print('原始数据:', a)
10 | 
11 | #输出最大、最小值及形状
12 | print('最小值:', a.min())
13 | print('最大值:', a.max())
14 | print('形状', a.shape)
15 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-08-svm.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.svm import SVC
 3 | 
 4 | X = np.array([[-1, -1], [-2, -2], [1, 3], [4, 6]])  
 5 | y = np.array([1, 1, 2, 2])
 6 | clf = SVC()  
 7 | clf.fit(X, y)   
 8 | print(clf)
 9 | print(clf.predict([[-0.8,-1], [2,1]]))
10 | 
11 | #输出结果：[1, 2]
12 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-08.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import numpy as np
 4 | from sklearn.decomposition import PCA
 5 | X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
 6 | pca = PCA(n_components=2)
 7 | print(pca)
 8 | pca.fit(X)
 9 | print(pca.explained_variance_ratio_) 
10 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-03-numpy.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | 
 4 | #定义二维数组  
 5 | import numpy as np  
 6 | c = np.array([[1, 2, 3, 4],[4, 5, 6, 7], [7, 8, 9, 10]])
 7 | 
 8 | print('形状:', c.shape)
 9 | print('获取值:', c[1][0])
10 | print('获取某行:')
11 | print(c[1][:])
12 | print('获取某行并切片:')
13 | print(c[0][:-1])
14 | print(c[0][-1:]) 
15 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-02.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn.cluster import KMeans
 4 | X = [[1,1],[2,1],[1,3],[6,6],[8,5],[7,8]]
 5 | y = [0,0,0,1,1,1]
 6 | clf = KMeans(n_clusters=2)
 7 | clf.fit(X,y)
 8 | print(clf)
 9 | print(clf.labels_)
10 | 
11 | import matplotlib.pyplot as plt
12 | a = [n[0] for n in X]  
13 | b = [n[1] for n in X]
14 | plt.scatter(a, b, c=clf.labels_)
15 | plt.show()
16 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-05-pandas.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | import pandas as pd
 4 | 
 5 | #读取数据，其中参数header设置Excel无标题头
 6 | data = pd.read_excel("data.xls", header=None) 
 7 | print(data)
 8 | 
 9 | #计算数据长度
10 | print('行数', len(data))
11 | 
12 | #计算用户A\B\C消费求和
13 | print(data.sum())
14 | 
15 | #计算用户A\B\C消费算术平均数
16 | mm = data.sum()
17 | print(mm)
18 | 
19 | #输出预览前5行数据
20 | print('预览前5行数据')
21 | print(data.head())
22 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-04-numpy.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | import numpy as np  
 4 | 
 5 | #调用sin函数和2的3次方  
 6 | print(np.sin(np.pi/6))
 7 | print(type(np.sin(0.5)))
 8 | f = np.power(2, 3)
 9 | print(f)
10 | 
11 | #范围定义  
12 | print(np.arange(0,4))
13 | print(type(np.arange(0,4)))
14 | 
15 | #调用求和函数、平均值函数、标准差函数
16 | print(np.sum([1, 2, 3, 4]))
17 | print(np.mean([4, 5, 6, 7]))
18 | print(np.std([1, 2, 3, 2, 1, 3, 2, 0]))
19 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-09.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | #载入数据集
 4 | from sklearn.datasets import load_boston
 5 | d = load_boston()
 6 | x = d.data
 7 | y = d.target
 8 | print(x[:2])
 9 | print('形状:', x.shape)
10 | 
11 | #降维
12 | import numpy as np
13 | from sklearn.decomposition import PCA
14 | pca = PCA(n_components=2)
15 | newData = pca.fit_transform(x)
16 | print('降维后数据:')
17 | print(newData[:4])
18 | print('形状:', newData.shape)
19 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_01.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | import jieba  
 4 | import sys  
 5 | import matplotlib.pyplot as plt  
 6 | from wordcloud import WordCloud  
 7 | 
 8 | text = open('test.txt').read()  
 9 | print(type(text)) 
10 | wordlist = jieba.cut(text, cut_all = True)  
11 | wl_space_split = " ".join(wordlist)  
12 | print(wl_space_split)   
13 | my_wordcloud = WordCloud().generate(wl_space_split)   
14 | plt.imshow(my_wordcloud)   
15 | plt.axis("off")  
16 | plt.show()
17 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-01.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn.cluster import MiniBatchKMeans
 4 | X= [[1],[2],[3],[4],[3],[2]]
 5 | mbk = MiniBatchKMeans(init='k-means++', n_clusters=3, n_init=10)
 6 | clf = mbk.fit(X)
 7 | print(clf.labels_)
 8 | #输出：[0 2 1 1 1 2]
 9 | 
10 | from sklearn.cluster import Birch
11 | X = [[1],[2],[3],[4],[3],[2]]
12 | clf = Birch(n_clusters=2)
13 | clf.fit(X)
14 | y_pred = clf.fit_predict(X)
15 | print(clf)
16 | print(y_pred)
17 | #输出：[1 1 0 0 0 1]
18 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-06-pandas.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | import pandas as pd
 4 | 
 5 | #读取数据，其中参数header设置Excel无标题头
 6 | data = pd.read_excel("data.xls", header=None) 
 7 | print(data)
 8 | 
 9 | #计算数据长度
10 | print('行数', len(data))
11 | 
12 | #计算用户A\B\C消费求和
13 | print(data.sum())
14 | 
15 | #计算用户A\B\C消费算术平均数
16 | mm = data.sum()
17 | print(mm)
18 | 
19 | #输出预览前5行数据
20 | print('预览前5行数据')
21 | print(data.head())
22 | 
23 | #输出数据基本统计量  
24 | print('输出数据基本统计量')
25 | print(data.describe())
26 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-05-knn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-06
 3 | import numpy as np  
 4 | from sklearn.neighbors import KNeighborsClassifier  
 5 | 
 6 | X = np.array([[-1,-1],[-2,-2],[1,2], [1,1],[-3,-4],[3,2]])
 7 | Y = [0,0,1,1,0,1]
 8 | x = [[4,5],[-4,-3],[2,6]]
 9 | knn = KNeighborsClassifier(n_neighbors=3, algorithm="ball_tree")
10 | knn.fit(X,Y)
11 | pre = knn.predict(x)
12 | print(pre)
13 | 
14 | distances, indices = knn.kneighbors(X)  
15 | print(indices)
16 | print(distances)
17 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_02.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | import os  
 4 | import codecs
 5 | import jieba  
 6 | import jieba.analyse
 7 | 
 8 | source = open("test.txt", 'r')
 9 | line = source.readline().rstrip('\n')
10 | content = []
11 | while line!="":
12 |     seglist = jieba.cut(line,cut_all=False)  #精确模式  
13 |     output = ' '.join(list(seglist))         #空格拼接  
14 |     print(output)
15 |     content.append(output)
16 |     line = source.readline().rstrip('\n')
17 | else:
18 |     source.close() 
19 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-02-numpy.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | 
 4 | #导入包并重命名np
 5 | import numpy as np
 6 | 
 7 | #定义一维数组
 8 | a = np.array([2, 0, 1, 5, 8, 3])
 9 | print('原始数据:', a)
10 | 
11 | #输出最大、最小值及形状
12 | print('最小值:', a.min())
13 | print('最大值:', a.max())
14 | print('形状', a.shape)
15 | 
16 | #数据切片
17 | print('切片操作:')
18 | print(a[:-2])
19 | print(a[-2:])
20 | print(a[:1])
21 | 
22 | #排序  
23 | print(type(a)) 
24 | a.sort()  
25 | print('排序后:', a)
26 | # <type 'numpy.ndarray'>  
27 | # 排序后: [0 1 2 3 5 8] 
28 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-12.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import cv2
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | #读取原始图像灰度颜色
 8 | img = cv2.imread('scenery.png') 
 9 | 
10 | spatialRad = 100   #空间窗口大小
11 | colorRad = 100     #色彩窗口大小
12 | maxPyrLevel = 2    #金字塔层数
13 | 
14 | #图像均值漂移分割
15 | dst = cv2.pyrMeanShiftFiltering( img, spatialRad, colorRad, maxPyrLevel)
16 | 
17 | #显示图像
18 | cv2.imshow('src', img)
19 | cv2.imshow('dst', dst)
20 | cv2.waitKey()
21 | cv2.destroyAllWindows()
22 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-09.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | def Sigmoid(x):
 7 |     return 1.0 / (1.0 + np.exp(-x))
 8 | 
 9 | x= np.arange(-10, 10, 0.1)
10 | h = Sigmoid(x)            #Sigmoid函数
11 | plt.plot(x, h)
12 | plt.axvline(0.0, color='k')   #坐标轴上加一条竖直的线（0位置）
13 | plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1.0, ls='dotted')  
14 | plt.axhline(y=0.5, ls='dotted', color='k') 
15 | plt.yticks([0.0, 0.5, 1.0])  #y轴标度
16 | plt.ylim(-0.1, 1.1)       #y轴范围
17 | plt.show()
18 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-03.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn import datasets
 4 | diabetes = datasets.load_diabetes()                           #载入数据  
 5 | print(diabetes.data)                                          #数据  
 6 | print(diabetes.target)                                        #类标  
 7 | print('总行数: ', len(diabetes.data), len(diabetes.target))         
 8 | print('特征数: ', len(diabetes.data[0]))                      #每行数据集维数  
 9 | print('数据类型: ', diabetes.data.shape)                     
10 | print(type(diabetes.data), type(diabetes.target))     
11 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-07-pandas.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | from pandas import Series, DataFrame
 4 | 
 5 | a = Series([4, 7, -5, 3])  
 6 | print('创建Series:')
 7 | print(a)   
 8 | 
 9 | b = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])  
10 | print('创建带有索引的Series:')
11 | print(b) 
12 | 
13 | sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}  
14 | c = Series(sdata)  
15 | print('通过传递字典创建Series:')
16 | print(c)  
17 | 
18 | states = ['California', 'Ohio', 'Oregon', 'Texas']  
19 | d = Series(sdata, index=states)  
20 | print('California没有字典为空:')
21 | print(d)  
22 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_05.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer  
 4 |   
 5 | #存储读取语料 一行预料为一个文档
 6 | corpus = []  
 7 | for line in open('result.txt', 'r', encoding="utf-8").readlines():  
 8 |     corpus.append(line.strip())
 9 |     
10 | #将文本中的词语转换为词频矩阵  
11 | vectorizer = CountVectorizer()
12 | 
13 | #计算个词语出现的次数  
14 | X = vectorizer.fit_transform(corpus)
15 | 
16 | #获取词袋中所有文本关键词  
17 | word = vectorizer.get_feature_names()  
18 | for n in range(len(word)):  
19 |     print(word[n],end=" ")
20 | print('')
21 |     
22 | #查看词频结果  
23 | print(X.toarray())
24 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_01.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | import jieba  
 4 |   
 5 | text = "小杨毕业于北京理工大学，从事Python人工智能相关工作。"  
 6 | 
 7 | #全模式
 8 | data = jieba.cut(text,cut_all=True)
 9 | print(type(data))
10 | print(u"[全模式]: ", "/".join(data))
11 | 
12 | #精确模式  
13 | data = jieba.cut(text,cut_all=False)
14 | print(u"[精确模式]: ", "/".join(data))
15 | 
16 | #默认是精确模式 
17 | data = jieba.cut(text)  
18 | print(u"[默认模式]: ", "/".join(data))
19 | 
20 | #搜索引擎模式 
21 | data = jieba.cut_for_search(text)    
22 | print(u"[搜索引擎模式]: ", "/".join(data))
23 | 
24 | #返回列表
25 | seg_list = jieba.lcut(text, cut_all=False)
26 | print("[返回列表]: {0}".format(seg_list))
27 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_04.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 |   
 6 | #读取语料
 7 | corpus = []  
 8 | for line in open('test.txt', 'r').readlines():  
 9 |     corpus.append(line.strip())
10 | 
11 | #将文本中的词语转换为词频矩阵
12 | vectorizer = CountVectorizer()
13 | 
14 | #计算个词语出现的次数
15 | X = vectorizer.fit_transform(corpus)
16 | 
17 | #获取词袋中所有文本关键词 
18 | word = vectorizer.get_feature_names()
19 | 
20 | print('特征个数:', len(word))
21 | for n in range(len(word)):  
22 |     print(word[n],end=" ")
23 | print('')
24 | 
25 | #查看词频结果  
26 | print(X.toarray())
27 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_02.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | import jieba  
 4 |   
 5 | #全模式  
 6 | text = "我来到北京清华大学"  
 7 | seg_list = jieba.cut(text, cut_all=True)  
 8 | print("[全模式]: ", "/ ".join(seg_list))
 9 | #[全模式]: 我 / 来到 / 北京 / 清华 / 清华大学 / 华大 /大学
10 |   
11 | #精确模式  
12 | seg_list = jieba.cut(text, cut_all=False)  
13 | print("[精确模式]: ", "/ ".join(seg_list))
14 | #[精确模式]: 我 / 来到 / 北京 / 清华大学
15 |   
16 | #默认是精确模式  
17 | seg_list = jieba.cut(text)  
18 | print("[默认模式]: ", "/ ".join(seg_list)) 
19 | #[默认模式]: 我 / 来到 / 北京 / 清华大学 
20 |  
21 |  #搜索引擎模式  
22 | seg_list = jieba.cut_for_search(text)   
23 | print("[搜索引擎模式]: ", "/ ".join(seg_list))
24 | #[搜索引擎模式]: 我 / 来到 / 北京 / 清华 / 华大 / 大学 / 清华大学 
25 | 


--------------------------------------------------------------------------------
/chapter02-dataming-based/chapter02-08-matplotlib.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By：Eastmount CSDN 2021-06-28
 3 | import pandas as pd
 4 | import numpy as np  
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | data = pd.read_csv("data.csv", header=None) 
 8 | print(data)
 9 | mm = data.sum()  #求和
10 | print(mm[1:])    #第一列为序号,取后面三列值
11 | 
12 | ind = np.arange(3)  #3个用户 0 1 2    
13 | width = 0.35       #设置宽度          
14 | x = [u'用户A', u'用户B', u'用户C']
15 | plt.rc('font', family='SimHei', size=13) #中文字体显示 
16 | 
17 | #绘图  
18 | plt.bar(ind, mm[1:], width, color='r', label='sum num')  
19 | plt.xlabel(u"用户")  
20 | plt.ylabel(u"消费数据")  
21 | plt.title(u"用户消费数据对比柱状图")  
22 | plt.legend()  
23 | #设置底部名称  
24 | plt.xticks(ind+width/2, x, rotation=40) #旋转40度  
25 | plt.show()  
26 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-10.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | #第一步 数据获取
 4 | import pandas as pd
 5 | glass = pd.read_csv("glass.csv")
 6 | print(glass[:4])
 7 | 
 8 | #第二步 聚类
 9 | from sklearn.cluster import Birch
10 | clf = Birch(n_clusters=3)
11 | clf.fit(glass)
12 | pre = clf.predict(glass)
13 | print(pre)
14 | 
15 | #第三步 降维
16 | from sklearn.decomposition import PCA  
17 | pca = PCA(n_components=2)  
18 | newData = pca.fit_transform(glass)  
19 | print(newData[:4])
20 | x1 = [n[0] for n in newData]  
21 | x2 = [n[1] for n in newData]
22 | 
23 | #第四步 绘图
24 | import matplotlib.pyplot as plt
25 | plt.xlabel("x feature")  
26 | plt.ylabel("y feature")  
27 | plt.scatter(x1, x2, c=pre, marker='x')   
28 | plt.show()  
29 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-12.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | 
 4 | #第一步 导入数据集
 5 | from sklearn.datasets import load_iris
 6 | hua = load_iris()
 7 | 
 8 | #获取花瓣的长和宽
 9 | x = [n[0] for n in hua.data]
10 | y = [n[1] for n in hua.data]
11 | import numpy as np #转换成数组
12 | x = np.array(x).reshape(len(x),1)
13 | y = np.array(y).reshape(len(y),1)
14 | 
15 | #第二步 线性回归分析
16 | from sklearn.linear_model import LinearRegression
17 | clf = LinearRegression()
18 | clf.fit(x,y)
19 | pre = clf.predict(x)
20 | print(pre)
21 | 
22 | #第三步 画图
23 | import matplotlib.pyplot as plt
24 | plt.scatter(x,y,s=100)
25 | plt.plot(x,pre,"r-",linewidth=4)
26 | for idx, m in enumerate(x):
27 |     plt.plot([m,m],[y[idx],pre[idx]], 'g-')
28 | plt.show()
29 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-06-knn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-  
 2 | import os 
 3 | import numpy as np
 4 | path = "wine/wine.txt"
 5 | data = np.loadtxt(path,dtype=float,delimiter=",")
 6 | print(data)
 7 | 
 8 | yy, x = np.split(data, (1,), axis=1)
 9 | print(yy.shape, x.shape)
10 | y = []
11 | for n in yy:
12 |     y.append(int(n))
13 | 
14 | train_data = np.concatenate((x[0:40,:], x[60:100,:], x[140:160,:]), axis = 0)  #训练集
15 | train_target = np.concatenate((y[0:40], y[60:100], y[140:160]), axis = 0)      #样本类别
16 | test_data = np.concatenate((x[40:60, :], x[100:140, :], x[160:,:]), axis = 0)  #测试集
17 | test_target = np.concatenate((y[40:60], y[100:140], y[160:]), axis = 0)        #样本类别
18 | 
19 | print(train_data.shape, train_target.shape)
20 | print(test_data.shape, test_target.shape)
21 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-11.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from sklearn.datasets import load_iris    #导入数据集iris
 6 |   
 7 | #载入数据集  
 8 | iris = load_iris()  
 9 | print(iris.data)           #输出数据集  
10 | print(iris.target)         #输出真实标签
11 | 
12 | #获取花卉两列数据集  
13 | DD = iris.data  
14 | X = [x[0] for x in DD]  
15 | print(X)  
16 | Y = [x[1] for x in DD]  
17 | print(Y)  
18 |   
19 | #plt.scatter(X, Y, c=iris.target, marker='x')
20 | plt.scatter(X[:50], Y[:50], color='red', marker='o', label='setosa') #前50个样本
21 | plt.scatter(X[50:100], Y[50:100], color='blue', marker='x', label='versicolor') #中间50个
22 | plt.scatter(X[100:], Y[100:],color='green', marker='+', label='Virginica') #后50个样本
23 | plt.legend(loc=2) #左上角
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-02-dtc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-06
 3 | 
 4 | #导入数据集iris
 5 | from sklearn.datasets import load_iris 
 6 | iris = load_iris()
 7 | print(iris.data)           #输出数据集
 8 | print(iris.target)         #输出真实标签
 9 | print(len(iris.target))
10 | print(iris.data.shape)     #150个样本 每个样本4个特征
11 | 
12 | #导入决策树DTC包
13 | from sklearn.tree import DecisionTreeClassifier
14 | clf = DecisionTreeClassifier()
15 | clf.fit(iris.data, iris.target)        #训练
16 | print(clf)
17 | predicted = clf.predict(iris.data)     #预测
18 | 
19 | #获取花卉两列数据集
20 | X = iris.data
21 | L1 = [x[0] for x in X]
22 | L2 = [x[1] for x in X]
23 | 
24 | #绘图
25 | import numpy as np
26 | import matplotlib.pyplot as plt
27 | plt.scatter(L1, L2, c=predicted, marker='x')  #cmap=plt.cm.Paired
28 | plt.title("DTC")
29 | plt.show()
30 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-13.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import cv2
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | #读取原始图像灰度颜色
 8 | img = cv2.imread('scenery.png') 
 9 | 
10 | #获取图像行和列
11 | rows, cols = img.shape[:2]
12 | 
13 | #mask必须行和列都加2且必须为uint8单通道阵列
14 | mask = np.zeros([rows+2, cols+2], np.uint8) 
15 | 
16 | spatialRad = 100 #空间窗口大小
17 | colorRad = 100   #色彩窗口大小
18 | maxPyrLevel = 2  #金字塔层数
19 | 
20 | #图像均值漂移分割
21 | dst = cv2.pyrMeanShiftFiltering( img, spatialRad, colorRad, maxPyrLevel)
22 | 
23 | #图像漫水填充处理
24 | cv2.floodFill(dst, mask, (30, 30), (0, 255, 255),
25 |               (100, 100, 100), (50, 50, 50),
26 |               cv2.FLOODFILL_FIXED_RANGE)
27 | 
28 | #显示图像
29 | cv2.imshow('src', img)
30 | cv2.imshow('dst', dst)
31 | cv2.waitKey()
32 | cv2.destroyAllWindows()
33 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_06.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 | import lda
 6 | import numpy as np
 7 | 
 8 | #生成词频矩阵
 9 | corpus = []  
10 | for line in open('test.txt', 'r').readlines():  
11 |     corpus.append(line.strip())
12 | vectorizer = CountVectorizer()  
13 | X = vectorizer.fit_transform(corpus)    
14 | word = vectorizer.get_feature_names()
15 | 
16 | #LDA分布
17 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1)  
18 | model.fit(X)
19 | 
20 | #文档-主题（Document-Topic）分布 
21 | doc_topic = model.doc_topic_
22 | print("shape: {}".format(doc_topic.shape))  
23 | for n in range(9):  
24 |     topic_most_pr = doc_topic[n].argmax()  
25 |     print(u"文档: {} 主题: {}".format(n,topic_most_pr))  
26 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_03.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | import os  
 4 | import codecs
 5 | import jieba  
 6 | import jieba.analyse
 7 | 
 8 | #停用词表
 9 | stopwords = {}.fromkeys(['的', '或', '等', '是', '有', '之', '与',
10 |                          '和', '也', '被', '吗', '于', '中', '最'])
11 | 
12 | source = open("test.txt", 'r')
13 | line = source.readline().rstrip('\n')
14 | content = []                                 #完整文本
15 | 
16 | while line!="":
17 |     seglist = jieba.cut(line,cut_all=False)  #精确模式
18 |     final = []                               #存储去除停用词内容
19 |     for seg in seglist: 
20 |         if seg not in stopwords:  
21 |             final.append(seg)
22 |     output = ' '.join(list(final))           #空格拼接
23 |     print(output)
24 |     content.append(output)
25 |     line = source.readline().rstrip('\n')
26 | else:
27 |     source.close()
28 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_06.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer  
 4 | from sklearn.feature_extraction.text import TfidfTransformer
 5 | 
 6 | #存储读取语料
 7 | corpus = []  
 8 | for line in open('result.txt', 'r', encoding="utf-8").readlines():  
 9 |     corpus.append(line.strip())
10 | vectorizer = CountVectorizer()        #将文本中的词语转换为词频矩阵 
11 | X = vectorizer.fit_transform(corpus)  #计算个词语出现的次数  
12 | word = vectorizer.get_feature_names() #获取词袋中所有文本关键词   
13 | for n in range(len(word)):  
14 |     print(word[n],end=" ")
15 | print('')  
16 | print(X.toarray())                    #查看词频结果 
17 | 
18 | #计算TF-IDF值
19 | transformer = TfidfTransformer()  
20 | print(transformer)
21 | tfidf = transformer.fit_transform(X) #将词频矩阵X统计成TF-IDF值  
22 | #查看数据结构 
23 | print(tfidf.toarray())               #tfidf[i][j]表示i类文本中的tf-idf权重
24 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_05.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 |   
 6 | #读取语料
 7 | corpus = []  
 8 | for line in open('test.txt', 'r').readlines():  
 9 |     corpus.append(line.strip())
10 |     
11 | #将文本中的词语转换为词频矩阵
12 | vectorizer = CountVectorizer()  
13 | X = vectorizer.fit_transform(corpus)  #计算个词语出现的次数  
14 | word = vectorizer.get_feature_names() #获取词袋中所有文本关键词
15 | print('特征个数:', len(word))
16 | for n in range(len(word)):  
17 |     print(word[n],end=" ")
18 | print('')  
19 | print(X.toarray())                    #查看词频结果 
20 | 
21 | #计算TF-IDF值
22 | transformer = TfidfTransformer()  
23 | print(transformer)
24 | tfidf = transformer.fit_transform(X) #将词频矩阵X统计成TF-IDF值
25 | 
26 | #查看数据结构 输出tf-idf权重
27 | print(tfidf.toarray())
28 | weight = tfidf.toarray()
29 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_04.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #By:Eastmount CSDN
 3 | import os  
 4 | import codecs
 5 | import jieba  
 6 | import jieba.analyse
 7 | 
 8 | #停用词表
 9 | stopwords = {}.fromkeys(['的', '或', '等', '是', '有', '之', '与',
10 |                          '和', '也', '被', '吗', '于', '中', '最',
11 |                          '“', '”', '。', '，', '？', '、', '；'])
12 | 
13 | source = open("test.txt", 'r')
14 | result = codecs.open("result.txt", 'w', 'utf-8')
15 | line = source.readline().rstrip('\n')
16 | content = []                                 #完整文本
17 | 
18 | while line!="":
19 |     seglist = jieba.cut(line,cut_all=False)  #精确模式
20 |     final = []                               #存储去除停用词内容
21 |     for seg in seglist:   
22 |         if seg not in stopwords:  
23 |             final.append(seg)
24 |     output = ' '.join(list(final))           #空格拼接
25 |     print(output)
26 |     content.append(output)
27 |     result.write(output + '\r\n')
28 |     line = source.readline().rstrip('\n')
29 | else:
30 |     source.close()
31 |     result.close()
32 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-14.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import cv2
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | #读取原始图像灰度颜色
 8 | img = cv2.imread('scenery.png', 0) 
 9 | print(img.shape)
10 | 
11 | #获取图像高度、宽度和深度
12 | rows, cols = img.shape[:]
13 | 
14 | #图像二维像素转换为一维
15 | data = img.reshape((rows * cols, 1))
16 | data = np.float32(data)
17 | 
18 | #定义中心 (type,max_iter,epsilon)
19 | criteria = (cv2.TERM_CRITERIA_EPS +
20 |             cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
21 | 
22 | #设置标签
23 | flags = cv2.KMEANS_RANDOM_CENTERS
24 | 
25 | #K-Means聚类 聚集成4类
26 | compactness, labels, centers = cv2.kmeans(data, 4, None, criteria, 10, flags)
27 | 
28 | #生成最终图像
29 | dst = labels.reshape((img.shape[0], img.shape[1]))
30 | 
31 | #用来正常显示中文标签
32 | plt.rcParams['font.sans-serif']=['SimHei']
33 | 
34 | #显示图像
35 | titles = [u'原始图像', u'聚类图像']  
36 | images = [img, dst]  
37 | for i in range(2):  
38 |    plt.subplot(1,2,i+1), plt.imshow(images[i], 'gray'), 
39 |    plt.title(titles[i])  
40 |    plt.xticks([]),plt.yticks([])  
41 | plt.show()
42 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-02.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn import linear_model     #导入线性模型
 4 | import matplotlib.pyplot as plt       
 5 | import numpy as np
 6 | 
 7 | #X表示企业成本 Y表示企业利润
 8 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]]
 9 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]]
10 | print('数据集X: ', X)
11 | print('数据集Y: ', Y)
12 | 
13 | #回归训练
14 | clf = linear_model.LinearRegression() 
15 | clf.fit(X, Y)
16 | 
17 | #预测结果
18 | X2 = [[400], [750], [950]]
19 | Y2 = clf.predict(X2)
20 | print(Y2)
21 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0]   
22 | print('预测成本1200元的利润：$%.1f' % res) 
23 | 
24 | #绘制线性回归图形
25 | plt.plot(X, Y, 'ks')                 #绘制训练数据集散点图
26 | plt.plot(X2, Y2, 'g-')               #绘制预测数据集直线
27 | plt.show()
28 | 
29 | print('系数', clf.coef_)
30 | print('截距', clf.intercept_)
31 | print('评分函数', clf.score(X, Y))
32 | 
33 | '''
34 | 系数 [[ 0.62402912]]
35 | 截距 [-173.70433885]
36 | 评分函数 0.911831188777
37 | '''
38 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-04.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn import datasets  
 4 | import matplotlib.pyplot as plt  
 5 | from sklearn import linear_model
 6 | import numpy as np  
 7 | 
 8 | #数据集划分
 9 | diabetes = datasets.load_diabetes()                #载入数据  
10 | diabetes_x_temp = diabetes.data[:, np.newaxis, 2]  #获取其中一个特征  
11 | diabetes_x_train = diabetes_x_temp[:-20]           #训练样本  
12 | diabetes_x_test = diabetes_x_temp[-20:]            #测试样本 后20行  
13 | diabetes_y_train = diabetes.target[:-20]           #训练标记  
14 | diabetes_y_test = diabetes.target[-20:]            #预测对比标记
15 | 
16 | #回归训练及预测  
17 | clf = linear_model.LinearRegression()  
18 | clf.fit(diabetes_x_train, diabetes_y_train)        #训练数据集  
19 | pre = clf.predict(diabetes_x_test)
20 | 
21 | #绘图  
22 | plt.title(u'LinearRegression Diabetes')            #标题  
23 | plt.xlabel(u'Attributes')                          #x轴坐标  
24 | plt.ylabel(u'Measure of disease')                  #y轴坐标    
25 | plt.scatter(diabetes_x_test, diabetes_y_test, color = 'black')  #散点图   
26 | plt.plot(diabetes_x_test, pre, color='blue', linewidth = 2)     #预测直线
27 | plt.show()          
28 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-07.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import pandas as pd
 4 | import matplotlib.pyplot as plt
 5 | from sklearn.cluster import Birch
 6 | 
 7 | #数据获取
 8 | glass=pd.read_csv("glass.csv")
 9 | X1 = glass.al
10 | X2 = glass.ri
11 | T = dict(zip(X1,X2)) #生成二维数组   
12 | X = list(map(lambda x,y: (x,y), T.keys(),T.values())) #dict类型转换为list 
13 | y = glass.glass_type
14 | 
15 | #聚类
16 | clf = Birch(n_clusters=3)
17 | clf.fit(X, y)
18 | y_pred = clf.predict(X)
19 | print(y_pred)
20 | 
21 | #分别获取不同类别数据点 
22 | x1, y1 = [], []   
23 | x2, y2 = [], [] 
24 | x3, y3 = [], []
25 | i = 0  
26 | while i < len(X):  
27 |     if y_pred[i]==0:  
28 |         x1.append(X[i][0])  
29 |         y1.append(X[i][1])  
30 |     elif y_pred[i]==1:  
31 |         x2.append(X[i][0])  
32 |         y2.append(X[i][1])  
33 |     elif y_pred[i]==2:  
34 |         x3.append(X[i][0])  
35 |         y3.append(X[i][1])  
36 |     i = i + 1  
37 |   
38 | #三种颜色 红 绿 蓝，marker='x'表示类型，o表示圆点 *表示星型 x表示点   
39 | plot1, = plt.plot(x1, y1, 'or', marker="x")    
40 | plot2, = plt.plot(x2, y2, 'og', marker="o")    
41 | plot3, = plt.plot(x3, y3, 'ob', marker="*")
42 | plt.xlabel('al')
43 | plt.ylabel('ri')
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-03.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn.cluster import KMeans  
 4 |   
 5 | X = [[0.0888, 0.5885],  
 6 |      [0.1399, 0.8291],  
 7 |      [0.0747, 0.4974],  
 8 |      [0.0983, 0.5772],  
 9 |      [0.1276, 0.5703],  
10 |      [0.1671, 0.5835],  
11 |      [0.1906, 0.5276],  
12 |      [0.1061, 0.5523],  
13 |      [0.2446, 0.4007],  
14 |      [0.1670, 0.4770],  
15 |      [0.2485, 0.4313],  
16 |      [0.1227, 0.4909],  
17 |      [0.1240, 0.5668],  
18 |      [0.1461, 0.5113],  
19 |      [0.2315, 0.3788],  
20 |      [0.0494, 0.5590],  
21 |      [0.1107, 0.4799],  
22 |      [0.2521, 0.5735],  
23 |      [0.1007, 0.6318],  
24 |      [0.1067, 0.4326],  
25 |      [0.1956, 0.4280]     
26 |     ]  
27 | print(X)
28 | 
29 | # Kmeans聚类
30 | clf = KMeans(n_clusters=3)  
31 | y_pred = clf.fit_predict(X)  
32 | print(clf)   
33 | print(y_pred)  
34 | 
35 | # 可视化操作
36 | import numpy as np  
37 | import matplotlib.pyplot as plt  
38 | x = [n[0] for n in X]  
39 | y = [n[1] for n in X]
40 | 
41 | plt.scatter(x, y, c=y_pred, marker='x')   
42 | plt.title("Kmeans-Basketball Data")   
43 | plt.xlabel("assists_per_minute")  
44 | plt.ylabel("points_per_minute")  
45 | plt.legend(["Rank"])   
46 | plt.show()  
47 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-05.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn import datasets
 4 | import numpy as np
 5 | from sklearn import linear_model
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | #第一步 数据集划分
 9 | d = datasets.load_diabetes()  #数据 10*442
10 | x = d.data
11 | x_one = x[:,np.newaxis, 2]    #获取一个特征 第3列数据
12 | y = d.target                  #获取的正确结果
13 | x_train = x_one[:-42]         #训练集X [  0:400]
14 | x_test = x_one[-42:]          #预测集X [401:442]
15 | y_train = y[:-42]             #训练集Y [  0:400]
16 | y_test = y[-42:]              #预测集Y [401:442]
17 | 
18 | #第二步 线性回归实现
19 | clf = linear_model.LinearRegression()
20 | print(clf)
21 | clf.fit(x_train, y_train)
22 | pre = clf.predict(x_test)
23 | print('预测结果', pre)
24 | print('真实结果', y_test)
25 |    
26 | #第三步 评价结果
27 | cost = np.mean(y_test-pre)**2   #2次方
28 | print('平方和计算:', cost)
29 | print('系数', clf.coef_) 
30 | print('截距', clf.intercept_)  
31 | print('方差', clf.score(x_test, y_test))
32 | 
33 | #第四步 绘图
34 | plt.plot(x_test, y_test, 'k.')      #散点图
35 | plt.plot(x_test, pre, 'g-')        #预测回归直线
36 | #绘制点到直线距离
37 | for idx, m in enumerate(x_test):
38 |     plt.plot([m, m],[y_test[idx], pre[idx]], 'r-')
39 | 
40 | plt.savefig('blog12-01.png', dpi=300) #保存图片
41 | plt.show()      
42 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-04-dtc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-06
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from sklearn.datasets import load_iris   
 6 | from sklearn.tree import DecisionTreeClassifier 
 7 | 
 8 | #载入鸢尾花数据集
 9 | iris = load_iris()         
10 | X = X = iris.data[:, :2]   #获取花卉前两列数据
11 | Y = iris.target           
12 | lr = DecisionTreeClassifier()  
13 | lr.fit(X,Y)
14 | 
15 | #meshgrid函数生成两个网格矩阵
16 | h = .02
17 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
18 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
19 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
20 | 
21 | #pcolormesh函数将xx,yy两个网格矩阵和对应的预测结果Z绘制在图片上
22 | Z = lr.predict(np.c_[xx.ravel(), yy.ravel()])
23 | Z = Z.reshape(xx.shape)
24 | plt.figure(1, figsize=(8,6))
25 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
26 | 
27 | #绘制散点图
28 | plt.scatter(X[:50,0], X[:50,1], color='red',marker='o', label='setosa')
29 | plt.scatter(X[50:100,0], X[50:100,1], color='blue', marker='x', label='versicolor')
30 | plt.scatter(X[100:,0], X[100:,1], color='green', marker='s', label='Virginica') 
31 | plt.xlabel('Sepal length')
32 | plt.ylabel('Sepal width')
33 | plt.xlim(xx.min(), xx.max())
34 | plt.ylim(yy.min(), yy.max())
35 | plt.xticks(())
36 | plt.yticks(())
37 | plt.legend(loc=2) 
38 | plt.show()
39 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_08.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN 
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 | import lda
 6 | import numpy as np
 7 | 
 8 | #生词频矩阵
 9 | corpus = []  
10 | for line in open('test.txt', 'r').readlines():  
11 |     corpus.append(line.strip())
12 | vectorizer = CountVectorizer()  
13 | X = vectorizer.fit_transform(corpus)  
14 |   
15 | #LDA分布
16 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1)  
17 | model.fit_transform(X)
18 | 
19 | #文档-主题（Document-Topic）分布 
20 | doc_topic = model.doc_topic_
21 | print("shape: {}".format(doc_topic.shape))  
22 | for n in range(9):  
23 |     topic_most_pr = doc_topic[n].argmax()  
24 |     print("文档: {} 主题: {}".format(n+1,topic_most_pr))
25 |     
26 | #可视化分析
27 | import matplotlib.pyplot as plt  
28 | f, ax= plt.subplots(9, 1, figsize=(10, 10), sharex=True)  
29 | for i, k in enumerate([0,1,2,3,4,5,6,7,8]):  
30 |     ax[i].stem(doc_topic[k,:], linefmt='r-',  
31 |                markerfmt='ro', basefmt='w-')  
32 |     ax[i].set_xlim(-1, 3)      #三个主题
33 |     ax[i].set_ylim(0, 1.0)     #权重0-1之间
34 |     ax[i].set_ylabel("y")  
35 |     ax[i].set_title("Document {}".format(k+1))  
36 | ax[4].set_xlabel("Topic")  
37 | plt.tight_layout()
38 | plt.savefig("result.png")
39 | plt.show() 
40 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_09.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN 
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 | import lda
 6 | import numpy as np
 7 | 
 8 | #生词频矩阵
 9 | corpus = []  
10 | for line in open('test.txt', 'r').readlines():  
11 |     corpus.append(line.strip())
12 | vectorizer = CountVectorizer()  
13 | X = vectorizer.fit_transform(corpus)
14 | 
15 | #LDA分布
16 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1)  
17 | model.fit_transform(X)
18 | 
19 | #文档-主题（Document-Topic）分布 
20 | doc_topic = model.doc_topic_
21 | print("shape: {}".format(doc_topic.shape))  
22 | for n in range(9):  
23 |     topic_most_pr = doc_topic[n].argmax()  
24 |     print(u"文档: {} 主题: {}".format(n+1,topic_most_pr))
25 | topic_word = model.topic_word_
26 | 
27 | #可视化分析
28 | import matplotlib.pyplot as plt
29 | f, ax= plt.subplots(3, 1, figsize=(8,6), sharex=True) #三个主题
30 | for i, k in enumerate([0, 1, 2]):
31 |     ax[i].stem(topic_word[k,:], linefmt='b-',
32 |                markerfmt='bo', basefmt='w-')
33 |     ax[i].set_xlim(-1, 43)      #单词43个
34 |     ax[i].set_ylim(0, 0.5)      #单词出现频率
35 |     ax[i].set_ylabel("y")
36 |     ax[i].set_title("Topic {}".format(k))
37 | ax[1].set_xlabel("word")
38 | plt.tight_layout()
39 | plt.savefig("result2.png")
40 | plt.show() 
41 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-13.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | from sklearn.datasets import load_iris   
 6 | from sklearn.linear_model import LogisticRegression 
 7 | 
 8 | #载入数据集
 9 | iris = load_iris()         
10 | X = X = iris.data[:, :2]   #获取花卉两列数据集
11 | Y = iris.target           
12 | 
13 | #逻辑回归模型
14 | lr = LogisticRegression(C=1e5)  
15 | lr.fit(X,Y)
16 | 
17 | #meshgrid函数生成两个网格矩阵
18 | h = .02
19 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
20 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
21 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
22 | 
23 | #pcolormesh函数将xx,yy两个网格矩阵和对应的预测结果Z绘制在图片上
24 | Z = lr.predict(np.c_[xx.ravel(), yy.ravel()])
25 | Z = Z.reshape(xx.shape)
26 | plt.figure(1, figsize=(8,6))
27 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
28 | 
29 | #绘制散点图
30 | plt.scatter(X[:50,0], X[:50,1], color='red',marker='o', label='setosa')
31 | plt.scatter(X[50:100,0], X[50:100,1], color='blue', marker='x', label='versicolor')
32 | plt.scatter(X[100:,0], X[100:,1], color='green', marker='s', label='Virginica') 
33 | 
34 | plt.xlabel('Sepal length')
35 | plt.ylabel('Sepal width')
36 | plt.xlim(xx.min(), xx.max())
37 | plt.ylim(yy.min(), yy.max())
38 | plt.xticks(())
39 | plt.yticks(())
40 | plt.legend(loc=2) 
41 | plt.show()
42 | 


--------------------------------------------------------------------------------
/chapter06-classifier/wine/wine Data Set Information.txt:
--------------------------------------------------------------------------------
 1 | Data Set Information
 2 | 
 3 | These data are the results of a chemical analysis of wines grown in the same region in Italy but derived from three different cultivars. The analysis determined the quantities of 13 constituents found in each of the three types of wines. 
 4 | 
 5 | I think that the initial data set had around 30 variables, but for some reason I only have the 13 dimensional version. I had a list of what the 30 or so variables were, but a.) I lost it, and b.), I would not know which 13 variables are included in the set. 
 6 | 
 7 | The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it ) 
 8 | 1) Alcohol 
 9 | 2) Malic acid 
10 | 3) Ash 
11 | 4) Alcalinity of ash 
12 | 5) Magnesium 
13 | 6) Total phenols 
14 | 7) Flavanoids 
15 | 8) Nonflavanoid phenols 
16 | 9) Proanthocyanins 
17 | 10)Color intensity 
18 | 11)Hue 
19 | 12)OD280OD315 of diluted wines 
20 | 13)Proline 
21 | 
22 | In a classification context, this is a well posed problem with well behaved class structures. A good data set for first testing of a new classifier, but not very challenging. 
23 | 
24 | 
25 | 
26 | Attribute Information
27 | 
28 | All attributes are continuous 
29 | 
30 | No statistics available, but suggest to standardise variables for certain uses (e.g. for us with classifiers which are NOT scale invariant) 
31 | 
32 | NOTE 1st attribute is class identifier (1-3)
33 | 
34 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_07.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 | import lda
 6 | import numpy as np
 7 | 
 8 | #生成词频矩阵
 9 | corpus = []  
10 | for line in open('test.txt', 'r').readlines():  
11 |     corpus.append(line.strip())
12 | vectorizer = CountVectorizer()  
13 | X = vectorizer.fit_transform(corpus)    
14 | word = vectorizer.get_feature_names()
15 | 
16 | #LDA分布
17 | model = lda.LDA(n_topics=3, n_iter=500, random_state=1)  
18 | model.fit(X)
19 | 
20 | #文档-主题（Document-Topic）分布 
21 | doc_topic = model.doc_topic_
22 | print("shape: {}".format(doc_topic.shape))  
23 | for n in range(9):  
24 |     topic_most_pr = doc_topic[n].argmax()  
25 |     print(u"文档: {} 主题: {}".format(n,topic_most_pr))  
26 | 
27 | #主题-单词（Topic-Word）分布
28 | word = vectorizer.get_feature_names()
29 | topic_word = model.topic_word_  
30 | for w in word:  
31 |     print(w,end=" ")
32 | print('')
33 | 
34 | n = 5    
35 | for i, topic_dist in enumerate(topic_word):    
36 |     topic_words = np.array(word)[np.argsort(topic_dist)][:-(n+1):-1]    
37 |     print(u'*Topic {}\n- {}'.format(i, ' '.join(topic_words)))
38 | 
39 | #主题-单词（Topic-Word）分布
40 | print("shape: {}".format(topic_word.shape))  
41 | print(topic_word[:, :3])  
42 | for n in range(3):  
43 |     sum_pr = sum(topic_word[n,:])  
44 |     print("topic: {} sum: {}".format(n,  sum_pr))  
45 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-11.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import pandas as pd
 4 | import matplotlib.pyplot as plt
 5 | from sklearn.decomposition import PCA
 6 | from sklearn.cluster import Birch
 7 | 
 8 | #获取数据集及降维
 9 | glass = pd.read_csv("glass.csv")
10 | pca = PCA(n_components=2)  
11 | newData = pca.fit_transform(glass)  
12 | print(newData[:4]) 
13 | L1 = [n[0] for n in newData]  
14 | L2 = [n[1] for n in newData]
15 | plt.rc('font', family='SimHei', size=8) #设置字体
16 | plt.rcParams['axes.unicode_minus'] = False #负号
17 | 
18 | #聚类 类簇数=2
19 | clf = Birch(n_clusters=2)
20 | clf.fit(glass)
21 | pre = clf.predict(glass)
22 | p1 = plt.subplot(221)  
23 | plt.title(u"Birch聚类 n=2")  
24 | plt.scatter(L1,L2,c=pre,marker="s")  
25 | plt.sca(p1)  
26 | 
27 | #聚类 类簇数=3
28 | clf = Birch(n_clusters=3)
29 | clf.fit(glass)
30 | pre = clf.predict(glass)
31 | p2 = plt.subplot(222)  
32 | plt.title(u"Birch聚类 n=3")  
33 | plt.scatter(L1,L2,c=pre,marker="o")
34 | plt.sca(p2)  
35 | 
36 | #聚类 类簇数=4
37 | clf = Birch(n_clusters=4)
38 | clf.fit(glass)
39 | pre = clf.predict(glass)
40 | p3 = plt.subplot(223)  
41 | plt.title(u"Birch聚类 n=4")  
42 | plt.scatter(L1,L2,c=pre,marker="o")
43 | plt.sca(p3)  
44 | 
45 | #聚类 类簇数=5
46 | clf = Birch(n_clusters=5)
47 | clf.fit(glass)
48 | pre = clf.predict(glass)
49 | p4 = plt.subplot(224)  
50 | plt.title(u"Birch聚类 n=5")  
51 | plt.scatter(L1,L2,c=pre,marker="s")
52 | plt.sca(p4)  
53 | plt.savefig('18.20.png', dpi=300) 
54 | plt.show()  
55 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-06.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn.linear_model import LinearRegression     
 4 | from sklearn.preprocessing import PolynomialFeatures
 5 | import matplotlib.pyplot as plt       
 6 | import numpy as np
 7 | 
 8 | #X表示企业成本 Y表示企业利润
 9 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]]
10 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]]
11 | print('数据集X: ', X)
12 | print('数据集Y: ', Y)
13 | 
14 | #第一步 线性回归分析
15 | clf = LinearRegression() 
16 | clf.fit(X, Y)                     
17 | X2 = [[400], [750], [950]]
18 | Y2 = clf.predict(X2)
19 | print(Y2)
20 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0]   
21 | print('预测成本1200元的利润：$%.1f' % res) 
22 | plt.plot(X, Y, 'ks')    #绘制训练数据集散点图
23 | plt.plot(X2, Y2, 'g-')  #绘制预测数据集直线
24 | 
25 | #第二步 多项式回归分析
26 | xx = np.linspace(350,950,100) #350到950等差数列
27 | quadratic_featurizer = PolynomialFeatures(degree = 2) #实例化一个二次多项式
28 | x_train_quadratic = quadratic_featurizer.fit_transform(X) #用二次多项式x做变换
29 | X_test_quadratic = quadratic_featurizer.transform(X2)
30 | regressor_quadratic = LinearRegression()
31 | regressor_quadratic.fit(x_train_quadratic, Y)
32 | 
33 | #把训练好X值的多项式特征实例应用到一系列点上,形成矩阵
34 | xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
35 | plt.plot(xx, regressor_quadratic.predict(xx_quadratic), "r--",
36 |          label="$y = ax^2 + bx + c$",linewidth=2)
37 | plt.legend()
38 | plt.show()    
39 | 


--------------------------------------------------------------------------------
/chapter08-textcluster/chapter08_textcluster_07.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | #By:Eastmount CSDN
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.feature_extraction.text import TfidfTransformer  
 5 |   
 6 | #第一步 生成词频矩阵
 7 | corpus = []  
 8 | for line in open('result.txt', 'r', encoding="utf-8").readlines():  
 9 |     corpus.append(line.strip())
10 | vectorizer = CountVectorizer() 
11 | X = vectorizer.fit_transform(corpus) 
12 | word = vectorizer.get_feature_names()    
13 | for n in range(len(word)):  
14 |     print(word[n],end=" ")
15 | print('')  
16 | print(X.toarray())
17 | 
18 | #第二步 计算TF-IDF值
19 | transformer = TfidfTransformer()  
20 | print(transformer)
21 | tfidf = transformer.fit_transform(X)
22 | print(tfidf.toarray())
23 | weight = tfidf.toarray()
24 | 
25 | #第三步 KMeans聚类
26 | from sklearn.cluster import KMeans  
27 | clf = KMeans(n_clusters=3)  
28 | s = clf.fit(weight) 
29 | y_pred = clf.fit_predict(weight)
30 | print(clf)
31 | print(clf.cluster_centers_) #类簇中心
32 | print(clf.inertia_)         #距离:用来评估簇的个数是否合适 越小说明簇分的越好
33 | print(y_pred)               #预测类标
34 | 
35 | #第四步 降维处理
36 | from sklearn.decomposition import PCA  
37 | pca = PCA(n_components=2)   #降低成两维绘图 
38 | newData = pca.fit_transform(weight)  
39 | print(newData)
40 | x = [n[0] for n in newData]  
41 | y = [n[1] for n in newData]  
42 | 
43 | #第五步 可视化
44 | import numpy as np  
45 | import matplotlib.pyplot as plt   
46 | plt.scatter(x, y, c=y_pred, s=100, marker='s')  
47 | plt.title("Kmeans")    
48 | plt.xlabel("x")  
49 | plt.ylabel("y")    
50 | plt.show() 
51 | 
52 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-10-svm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-06
 3 | import os 
 4 | import numpy as np
 5 | from sklearn.svm import SVC  
 6 | from sklearn import metrics
 7 | import matplotlib.pyplot as plt
 8 | from matplotlib.colors import ListedColormap
 9 | from sklearn.model_selection import train_test_split
10 | from sklearn.decomposition import PCA
11 | 
12 | #第一步 加载数据集
13 | path = "wine/wine.txt"
14 | data = np.loadtxt(path,dtype=float,delimiter=",")
15 | print(data)
16 | 
17 | #第二步 划分数据集
18 | yy, x = np.split(data, (1,), axis=1) #第一列类标yy,后面13列特征为x
19 | print(yy.shape, x.shape)
20 | y = []
21 | for n in yy: 
22 |     y.append(int(n))
23 | y =  np.array(y, dtype = int) #list转换数组
24 | #划分数据集 测试集40%
25 | train_data, test_data, train_target, test_target = train_test_split(x, y, test_size=0.4, random_state=42)
26 | print(train_data.shape, train_target.shape)
27 | print(test_data.shape, test_target.shape)
28 | 
29 | #第三步 SVC训练
30 | clf = SVC()
31 | clf.fit(train_data, train_target)
32 | result = clf.predict(test_data)
33 | print(result)
34 | print(test_target)
35 | 
36 | #第四步 评价算法 
37 | print(sum(result==test_target))                            #预测结果与真实结果比对
38 | print(metrics.classification_report(test_target, result))  #准确率 召回率 F值
39 | 
40 | #第五步 降维操作
41 | pca = PCA(n_components=2)      
42 | newData = pca.fit_transform(test_data)
43 |                   
44 | #第六步 绘图可视化
45 | plt.figure()
46 | cmap_bold = ListedColormap(['#000000', '#00FF00', '#FFFFFF'])
47 | plt.scatter(newData[:,0], newData[:,1], c=test_target, cmap=cmap_bold, s=50)
48 | plt.show()
49 | 


--------------------------------------------------------------------------------
/chapter09-TopicAnalysis/chapter09_TopicAnalysis_03.py:
--------------------------------------------------------------------------------
 1 | #coding=utf-8
 2 | #By:Eastmount CSDN
 3 | from os import path  
 4 | from scipy.misc import imread    
 5 | import jieba  
 6 | import sys  
 7 | import matplotlib.pyplot as plt  
 8 | from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator    
 9 |   
10 | # 打开本体TXT文件  
11 | text = open('data-fenci.txt').read()  
12 |   
13 | # 结巴分词 cut_all=True 设置为全模式   
14 | wordlist = jieba.cut(text)     #cut_all = True  
15 |   
16 | # 使用空格连接 进行中文分词  
17 | wl_space_split = " ".join(wordlist)  
18 | print(wl_space_split)
19 |   
20 | # 读取mask/color图片  
21 | d = path.dirname(__file__)  
22 | nana_coloring = imread(path.join(d, "pic.png"))  
23 |   
24 | # 对分词后的文本生成词云  
25 | my_wordcloud = WordCloud( background_color = 'white',    
26 |                             mask = nana_coloring,         
27 |                             max_words = 2000,            
28 |                             stopwords = STOPWORDS,       
29 |                             max_font_size = 50,          
30 |                             random_state = 30,          
31 |                             )  
32 |   
33 | # generate word cloud   
34 | my_wordcloud.generate(wl_space_split)  
35 |   
36 | # create coloring from image    
37 | image_colors = ImageColorGenerator(nana_coloring)  
38 |   
39 | # recolor wordcloud and show    
40 | my_wordcloud.recolor(color_func=image_colors)  
41 |   
42 | plt.imshow(my_wordcloud)    # 显示词云图  
43 | plt.axis("off")             # 是否显示x轴、y轴下标  
44 | plt.show()  
45 |   
46 | # save img    
47 | my_wordcloud.to_file(path.join(d, "cloudimg.png"))  
48 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-03-dtc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-06
 3 | from sklearn.datasets import load_iris
 4 | from sklearn.tree import DecisionTreeClassifier
 5 | from sklearn import metrics
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | #导入数据集iris
10 | '''
11 | 重点：分割数据集 构造训练集/测试集，80/20
12 |      70%训练  0-40  50-90  100-140
13 |      30%预测  40-50 90-100 140-150
14 | '''
15 | iris = load_iris()
16 | train_data = np.concatenate((iris.data[0:40, :], iris.data[50:90, :], iris.data[100:140, :]), axis = 0)  #训练集
17 | train_target = np.concatenate((iris.target[0:40], iris.target[50:90], iris.target[100:140]), axis = 0)  #训练集样本类别
18 | test_data = np.concatenate((iris.data[40:50, :], iris.data[90:100, :], iris.data[140:150, :]), axis = 0)  #测试集
19 | test_target = np.concatenate((iris.target[40:50], iris.target[90:100], iris.target[140:150]), axis = 0)  #测试集样本类别
20 | 
21 | #导入决策树DTC包
22 | clf = DecisionTreeClassifier()
23 | clf.fit(train_data, train_target)        #注意均使用训练数据集和样本类标
24 | print(clf)
25 | predict_target = clf.predict(test_data)  #测试集
26 | print(predict_target)
27 | 
28 | #预测结果与真实结果比对
29 | print(sum(predict_target == test_target))
30 | 
31 | #输出准确率 召回率 F值
32 | print(metrics.classification_report(test_target, predict_target))
33 | print(metrics.confusion_matrix(test_target, predict_target))
34 | 
35 | #获取花卉测试数据集两列数据
36 | X = test_data
37 | L1 = [n[0] for n in X]
38 | L2 = [n[1] for n in X]
39 | 
40 | #绘图
41 | plt.scatter(L1, L2, c=predict_target, marker='x')  #cmap=plt.cm.Paired
42 | plt.title("DecisionTreeClassifier")
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-07.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn.linear_model import LinearRegression     
 4 | from sklearn.preprocessing import PolynomialFeatures
 5 | import matplotlib.pyplot as plt       
 6 | import numpy as np
 7 | 
 8 | #X表示企业成本 Y表示企业利润
 9 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]]
10 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]]
11 | print('数据集X: ', X)
12 | print('数据集Y: ', Y)
13 | 
14 | #第一步 线性回归分析
15 | clf = LinearRegression() 
16 | clf.fit(X, Y)                     
17 | X2 = [[400], [750], [950]]
18 | Y2 = clf.predict(X2)
19 | print(Y2)
20 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0]   
21 | print('预测成本1200元的利润：$%.1f' % res) 
22 | plt.plot(X, Y, 'ks')    #绘制训练数据集散点图
23 | plt.plot(X2, Y2, 'g-')  #绘制预测数据集直线
24 | 
25 | #第二步 多项式回归分析
26 | xx = np.linspace(350,950,100) #350到950等差数列
27 | quadratic_featurizer = PolynomialFeatures(degree = 2) #实例化一个二次多项式
28 | x_train_quadratic = quadratic_featurizer.fit_transform(X) #用二次多项式x做变换
29 | X_test_quadratic = quadratic_featurizer.transform(X2)
30 | regressor_quadratic = LinearRegression()
31 | regressor_quadratic.fit(x_train_quadratic, Y)
32 | 
33 | #把训练好X值的多项式特征实例应用到一系列点上,形成矩阵
34 | xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
35 | plt.plot(xx, regressor_quadratic.predict(xx_quadratic), "r--",
36 |          label="$y = ax^2 + bx + c$",linewidth=2)
37 | plt.legend()
38 | plt.show()
39 | 
40 | #评价
41 | print('1 r-squared', clf.score(X, Y))
42 | print('2 r-squared', regressor_quadratic.score(x_train_quadratic, Y))
43 | 


--------------------------------------------------------------------------------
/chapter04-regression/chapter04-regression-08.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | from sklearn.linear_model import LinearRegression     
 4 | from sklearn.preprocessing import PolynomialFeatures
 5 | import matplotlib.pyplot as plt       
 6 | import numpy as np
 7 | 
 8 | #X表示企业成本 Y表示企业利润
 9 | X = [[400], [450], [486], [500], [510], [525], [540], [549], [558], [590], [610], [640], [680], [750], [900]]
10 | Y = [[80], [89], [92], [102], [121], [160], [180], [189], [199], [203], [247], [250], [259], [289], [356]]
11 | print('数据集X: ', X)
12 | print('数据集Y: ', Y)
13 | 
14 | #第一步 线性回归分析
15 | clf = LinearRegression() 
16 | clf.fit(X, Y)                     
17 | X2 = [[400], [750], [950]]
18 | Y2 = clf.predict(X2)
19 | print(Y2)
20 | res = clf.predict(np.array([1200]).reshape(-1, 1))[0]   
21 | print('预测成本1200元的利润：$%.1f' % res) 
22 | plt.plot(X, Y, 'ks')    #绘制训练数据集散点图
23 | plt.plot(X2, Y2, 'g-')  #绘制预测数据集直线
24 | 
25 | #第二步 多项式回归分析
26 | xx = np.linspace(350,950,100) 
27 | quadratic_featurizer = PolynomialFeatures(degree = 5) 
28 | x_train_quadratic = quadratic_featurizer.fit_transform(X) 
29 | X_test_quadratic = quadratic_featurizer.transform(X2)
30 | regressor_quadratic = LinearRegression()
31 | regressor_quadratic.fit(x_train_quadratic, Y)
32 | #把训练好X值的多项式特征实例应用到一系列点上,形成矩阵
33 | xx_quadratic = quadratic_featurizer.transform(xx.reshape(xx.shape[0], 1))
34 | plt.plot(xx, regressor_quadratic.predict(xx_quadratic), "r--",
35 |          label="$y = ax^2 + bx + c$",linewidth=2)
36 | plt.legend()
37 | plt.show()
38 | print('1 r-squared', clf.score(X, Y))
39 | print('5 r-squared', regressor_quadratic.score(x_train_quadratic, Y))
40 | 
41 | # ('1 r-squared', 0.9118311887769025)
42 | # ('5 r-squared', 0.98087802460869788)
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Book2-Python-DataAnalysis
 2 | 
 3 | 该资源为杨秀璋作者《Python网络数据爬取及分析从入门到精通（分析篇）》书籍所有源代码，包括可视化分析、聚类分析、回归分析、分类分析、词云和LDA分析等内容。所有代码已修改为Python3实现，希望对您有所帮助，一起加油。
 4 | 
 5 | 
 6 | <div align=center>
 7 |     <img src="https://img-blog.csdn.net/20180611095441156" width="40%" height="40%" />
 8 | </div>
 9 | 
10 | - https://item.jd.com/12363491.html
11 | - https://item.jd.com/12373850.html
12 | 
13 | ---
14 | 
15 | - **第1章 网络数据分析概述** <br />
16 | 1.1 数据分析 <br />
17 | 1.2 相关技术 <br />
18 | 1.3 Anaconda开发环境 <br />
19 | 1.4 常用数据集 <br />
20 | 
21 | - **第2章 Python数据分析常用库**  <br />
22 | 2.1 常用库 <br />
23 | 2.2 NumPy <br />
24 | 2.3 Pandas <br />
25 | 2.4 Matplotlib <br />
26 | 2.5 Sklearn <br />
27 | 
28 | - **第3章 Python可视化分析**  <br /> 
29 | 3.1 Matplotlib可视化分析 <br /> 
30 | 3.2 Pandas读取文件可视化分析 <br /> 
31 | 3.3 ECharts可视化技术初识 <br /> 
32 | 
33 | - **第4章 Python回归分析** <br /> 
34 | 4.1 回归 <br /> 
35 | 4.2 线性回归分析 <br /> 
36 | 4.3 多项式回归分析 <br /> 
37 | 4.4 逻辑回归分析 <br /> 
38 | 
39 | - **第5章 Python聚类分析** <br /> 
40 | 5.1 聚类 <br /> 
41 | 5.2 K-Means  <br /> 
42 | 5.3 BIRCH  <br /> 
43 | 5.4 树状图聚类 <br /> 
44 | 5.5 降维处理  <br /> 
45 | 
46 | - **第6章 Python分类分析**  <br /> 
47 | 6.1 分 类  <br /> 
48 | 6.2 决策树  <br /> 
49 | 6.3 KNN分类算法  <br /> 
50 | 6.4 SVM 分类算法  <br /> 
51 | 
52 | 
53 | - **第7章 Python关联规则挖掘分析** <br /> 
54 | 7.1 基本概念 <br /> 
55 | 7.2 Apriori算法 <br /> 
56 | 7.3 Apriori算法的实现 <br /> 
57 | 
58 | - **第8章 Python数据预处理及文本聚类** <br /> 
59 | 8.1 数据预处理概述 <br /> 
60 | 8.2 中文分词 <br /> 
61 | 8.3 数据清洗 <br /> 
62 | 8.4 特征提取及向量空间模型 <br /> 
63 | 8.5 权重计算 <br /> 
64 | 8.6 文本聚类 <br /> 
65 | 
66 | - **第9章 Python词云热点与主题分布分析** <br /> 
67 | 9.1 词 云 <br /> 
68 | 9.2 WordCloud的安装及基本用法 <br /> 
69 | 9.3 LDA <br /> 
70 | 
71 | - **第10章 复杂网络与基于数据库技术的分析** <br /> 
72 | 10.1 复杂网络 <br /> 
73 | 10.2 基于数据库技术的数据分析 <br /> 
74 | 10.3 基于数据库技术的博客行为分析 <br /> 
75 | 
76 | - **后记**
77 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-16py.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import os
 4 | import codecs
 5 | from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 6 | from sklearn.manifold import TSNE
 7 | from sklearn.cluster import KMeans
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | import pandas as pd
11 | import jieba
12 | from sklearn import metrics
13 | from sklearn.metrics import silhouette_score
14 | from array import array
15 | from numpy import *
16 | from pylab import mpl
17 | from sklearn.metrics.pairwise import cosine_similarity
18 | import matplotlib.pyplot as plt
19 | import matplotlib as mpl
20 | from scipy.cluster.hierarchy import ward, dendrogram
21 | 
22 | #---------------------------------------加载语料-------------------------------------
23 | text = open('data-fenci.txt').read()
24 | print(text)
25 | list1=text.split("\n")
26 | print(list1)
27 | print(list1[0])
28 | print(list1[1])
29 | mytext_list=list1
30 | 
31 | #控制显示数量
32 | count_vec = CountVectorizer(min_df=20, max_df=1000)  #最大值忽略
33 | xx1 = count_vec.fit_transform(list1).toarray()
34 | word=count_vec.get_feature_names() 
35 | print("word feature length: {}".format(len(word)))
36 | print(word)
37 | print(xx1)
38 | print(type(xx1))
39 | print(xx1.shape)
40 | print(xx1[0])
41 | 
42 | #---------------------------------------层次聚类-------------------------------------
43 | titles = word
44 | #dist = cosine_similarity(xx1)
45 | 
46 | mpl.rcParams['font.sans-serif'] = ['SimHei']
47 | 
48 | df = pd.DataFrame(xx1)
49 | print(df.corr())
50 | print(df.corr('spearman'))
51 | print(df.corr('kendall'))
52 | dist = df.corr()
53 | print (dist)
54 | print(type(dist))
55 | print(dist.shape)
56 | 
57 | #define the linkage_matrix using ward clustering pre-computed distances
58 | linkage_matrix = ward(dist) 
59 | fig, ax = plt.subplots(figsize=(8, 12)) # set size
60 | ax = dendrogram(linkage_matrix, orientation="right",
61 |                 p=20, labels=titles, leaf_font_size=12
62 |                 ) #leaf_rotation=90., leaf_font_size=12.
63 | #show plot with tight layout
64 | plt.tight_layout() 
65 | #save figure as ward_clusters
66 | plt.savefig('KH.png', dpi=200)
67 | plt.show()
68 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-15.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | import cv2
 4 | import numpy as np
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | #读取原始图像
 8 | img = cv2.imread('scenery.png') 
 9 | print(img.shape)
10 | 
11 | #图像二维像素转换为一维
12 | data = img.reshape((-1,3))
13 | data = np.float32(data)
14 | 
15 | #定义中心 (type,max_iter,epsilon)
16 | criteria = (cv2.TERM_CRITERIA_EPS +
17 |             cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
18 | 
19 | #设置标签
20 | flags = cv2.KMEANS_RANDOM_CENTERS
21 | 
22 | #K-Means聚类 聚集成2类
23 | compactness, labels2, centers2 = cv2.kmeans(data, 2, None, criteria, 10, flags)
24 | 
25 | #K-Means聚类 聚集成4类
26 | compactness, labels4, centers4 = cv2.kmeans(data, 4, None, criteria, 10, flags)
27 | 
28 | #K-Means聚类 聚集成8类
29 | compactness, labels8, centers8 = cv2.kmeans(data, 8, None, criteria, 10, flags)
30 | 
31 | #K-Means聚类 聚集成16类
32 | compactness, labels16, centers16 = cv2.kmeans(data, 16, None, criteria, 10, flags)
33 | 
34 | #K-Means聚类 聚集成64类
35 | compactness, labels64, centers64 = cv2.kmeans(data, 64, None, criteria, 10, flags)
36 | 
37 | #图像转换回uint8二维类型
38 | centers2 = np.uint8(centers2)
39 | res = centers2[labels2.flatten()]
40 | dst2 = res.reshape((img.shape))
41 | 
42 | centers4 = np.uint8(centers4)
43 | res = centers4[labels4.flatten()]
44 | dst4 = res.reshape((img.shape))
45 | 
46 | centers8 = np.uint8(centers8)
47 | res = centers8[labels8.flatten()]
48 | dst8 = res.reshape((img.shape))
49 | 
50 | centers16 = np.uint8(centers16)
51 | res = centers16[labels16.flatten()]
52 | dst16 = res.reshape((img.shape))
53 | 
54 | centers64 = np.uint8(centers64)
55 | res = centers64[labels64.flatten()]
56 | dst64 = res.reshape((img.shape))
57 | 
58 | #图像转换为RGB显示
59 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
60 | dst2 = cv2.cvtColor(dst2, cv2.COLOR_BGR2RGB)
61 | dst4 = cv2.cvtColor(dst4, cv2.COLOR_BGR2RGB)
62 | dst8 = cv2.cvtColor(dst8, cv2.COLOR_BGR2RGB)
63 | dst16 = cv2.cvtColor(dst16, cv2.COLOR_BGR2RGB)
64 | dst64 = cv2.cvtColor(dst64, cv2.COLOR_BGR2RGB)
65 | 
66 | #用来正常显示中文标签
67 | plt.rcParams['font.sans-serif']=['SimHei']
68 | 
69 | #显示图像
70 | titles = [u'原始图像', u'聚类图像 K=2', u'聚类图像 K=4',
71 |           u'聚类图像 K=8', u'聚类图像 K=16',  u'聚类图像 K=64']  
72 | images = [img, dst2, dst4, dst8, dst16, dst64]  
73 | for i in range(6):  
74 |    plt.subplot(2,3,i+1), plt.imshow(images[i], 'gray'), 
75 |    plt.title(titles[i])  
76 |    plt.xticks([]),plt.yticks([])  
77 | plt.show()
78 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-04.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | 
 4 | #------------------------------------------------------------------------
 5 | #第一步 读取数据
 6 | import os
 7 | 
 8 | data = []
 9 | for line in open("data.txt", "r").readlines():  
10 |     line = line.rstrip()    
11 |     result = ' '.join(line.split())
12 |     #将字符串转换为小数  
13 |     s = [float(x) for x in result.strip().split(' ')]  
14 |     print(s)  
15 |     data.append(s)
16 | print(data) 
17 | print(type(data))
18 | 
19 | #------------------------------------------------------------------------
20 | #第二步 获取两列数据
21 | print('第一列 第五列数据')  
22 | L2 = [n[0] for n in data]  #第一列表示球员每分钟助攻数：assists_per_minute  
23 | L5 = [n[4] for n in data]  #第五列表示球员每分钟得分数：points_per_minute    
24 | T = dict(zip(L2,L5))       #两列数据生成二维数据
25 | type(T)
26 | print(L2)
27 | 
28 | #下述代码将dict类型转换为list    
29 | X = list(map(lambda x,y: (x,y), T.keys(),T.values()))  
30 | print(type(X)) 
31 | print(X)
32 | 
33 | #------------------------------------------------------------------------
34 | #第三步 聚类分析
35 | from sklearn.cluster import KMeans 
36 | clf = KMeans(n_clusters=3)  
37 | y_pred = clf.fit_predict(X)  
38 | print(clf)   
39 | print(y_pred)
40 | 
41 | #------------------------------------------------------------------------
42 | #第四步 绘制图形
43 | import numpy as np  
44 | import matplotlib.pyplot as plt  
45 | 
46 | #获取第一列和第二列数据，使用for循环获取，n[0]表示X第一列  
47 | x = [n[0] for n in X]  
48 | y = [n[1] for n in X]
49 | 
50 | #坐标  
51 | x1, y1 = [], []   
52 | x2, y2 = [], [] 
53 | x3, y3 = [], []   
54 |   
55 | #分布获取类标为0、1、2的数据并赋值给(x1,y1) (x2,y2) (x3,y3)  
56 | i = 0  
57 | while i < len(X):  
58 |     if y_pred[i]==0:  
59 |         x1.append(X[i][0])  
60 |         y1.append(X[i][1])  
61 |     elif y_pred[i]==1:  
62 |         x2.append(X[i][0])
63 |         y2.append(X[i][1])  
64 |     elif y_pred[i]==2:  
65 |         x3.append(X[i][0])  
66 |         y3.append(X[i][1])  
67 |     i = i + 1  
68 |   
69 | #三种颜色 红 绿 蓝，marker='x'表示类型，o表示圆点、*表示星型、x表示点   
70 | plot1, = plt.plot(x1, y1, 'or', marker="x")    
71 | plot2, = plt.plot(x2, y2, 'og', marker="o")    
72 | plot3, = plt.plot(x3, y3, 'ob', marker="*")    
73 |   
74 | plt.title("Kmeans-Basketball Data")  #绘制标题
75 | plt.xlabel("assists_per_minute")     #绘制x轴 
76 | plt.ylabel("points_per_minute")      #绘制y轴
77 | plt.legend((plot1, plot2, plot3), ('A', 'B', 'C'), fontsize=10)    #设置右上角图例  
78 | plt.show()   
79 | 


--------------------------------------------------------------------------------
/chapter05-cluster/chapter05-cluster-05.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-03
 3 | 
 4 | #------------------------------------------------------------------------
 5 | #第一步 读取数据
 6 | import os
 7 | 
 8 | data = []
 9 | for line in open("data.txt", "r").readlines():  
10 |     line = line.rstrip()    
11 |     result = ' '.join(line.split())
12 |     #将字符串转换为小数  
13 |     s = [float(x) for x in result.strip().split(' ')]  
14 |     print(s)  
15 |     data.append(s)
16 | print(data) 
17 | print(type(data))
18 | 
19 | #------------------------------------------------------------------------
20 | #第二步 获取两列数据
21 | print('第一列 第五列数据')  
22 | L2 = [n[0] for n in data]  #第一列表示球员每分钟助攻数：assists_per_minute  
23 | L5 = [n[4] for n in data]  #第五列表示球员每分钟得分数：points_per_minute    
24 | T = dict(zip(L2,L5))       #两列数据生成二维数据
25 | type(T)
26 | print(L2)
27 | 
28 | #下述代码将dict类型转换为list    
29 | X = list(map(lambda x,y: (x,y), T.keys(),T.values()))  
30 | print(type(X)) 
31 | print(X)
32 | 
33 | #------------------------------------------------------------------------
34 | #第三步 聚类分析
35 | from sklearn.cluster import KMeans 
36 | clf = KMeans(n_clusters=3)  
37 | y_pred = clf.fit_predict(X)  
38 | print(clf)   
39 | print(y_pred)
40 | 
41 | #------------------------------------------------------------------------
42 | #第四步 绘制图形
43 | import numpy as np  
44 | import matplotlib.pyplot as plt  
45 | 
46 | #获取第一列和第二列数据，使用for循环获取，n[0]表示X第一列  
47 | x = [n[0] for n in X]  
48 | y = [n[1] for n in X]
49 | 
50 | #坐标  
51 | x1, y1 = [], []   
52 | x2, y2 = [], [] 
53 | x3, y3 = [], []   
54 |   
55 | #分布获取类标为0、1、2的数据并赋值给(x1,y1) (x2,y2) (x3,y3)  
56 | i = 0  
57 | while i < len(X):  
58 |     if y_pred[i]==0:  
59 |         x1.append(X[i][0])  
60 |         y1.append(X[i][1])  
61 |     elif y_pred[i]==1:  
62 |         x2.append(X[i][0])
63 |         y2.append(X[i][1])  
64 |     elif y_pred[i]==2:  
65 |         x3.append(X[i][0])  
66 |         y3.append(X[i][1])  
67 |     i = i + 1  
68 |   
69 | #三种颜色 红 绿 蓝，marker='x'表示类型，o表示圆点、*表示星型、x表示点   
70 | plot1, = plt.plot(x1, y1, 'or', marker="x")    
71 | plot2, = plt.plot(x2, y2, 'og', marker="o")    
72 | plot3, = plt.plot(x3, y3, 'ob', marker="*")    
73 |   
74 | plt.title("Kmeans-Basketball Data")  #绘制标题
75 | plt.xlabel("assists_per_minute")     #绘制x轴 
76 | plt.ylabel("points_per_minute")      #绘制y轴
77 | plt.legend((plot1, plot2, plot3), ('A', 'B', 'C'), fontsize=10)    #设置右上角图例  
78 | 
79 | #------------------------------------------------------------------------
80 | #第五步 设置类簇中心 
81 | centers = clf.cluster_centers_
82 | print(centers)
83 | plt.plot(centers[:,0],centers[:,1],'r*',markersize=20)  #显示三个中心点
84 | plt.show()  
85 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-09-svm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # By:Eastmount CSDN 2021-07-06
 3 | import os 
 4 | import numpy as np
 5 | from sklearn.svm import SVC  
 6 | from sklearn import metrics
 7 | import matplotlib.pyplot as plt
 8 | from matplotlib.colors import ListedColormap
 9 | 
10 | #----------------------------------------------------------------------------
11 | #第一步 加载数据集
12 | path = "wine/wine.txt"
13 | data = np.loadtxt(path,dtype=float,delimiter=",")
14 | print(data)
15 | 
16 | #----------------------------------------------------------------------------
17 | #第二步 划分数据集
18 | yy, x = np.split(data, (1,), axis=1) #第一列为类标yy,后面13列特征为x
19 | print(yy.shape, x.shape)
20 | y = []
21 | for n in yy:  #将类标浮点型转化为整数
22 |     y.append(int(n))
23 | x = x[:, :2]  #获取x前两列数据,方便绘图 对应x、y轴
24 | train_data = np.concatenate((x[0:40,:], x[60:100,:], x[140:160,:]), axis = 0) #训练集
25 | train_target = np.concatenate((y[0:40], y[60:100], y[140:160]), axis = 0)     #样本类别
26 | test_data = np.concatenate((x[40:60, :], x[100:140, :], x[160:,:]), axis = 0) #测试集
27 | test_target = np.concatenate((y[40:60], y[100:140], y[160:]), axis = 0)       #样本类别
28 | print(train_data.shape, train_target.shape)
29 | print(test_data.shape, test_target.shape)
30 | 
31 | #----------------------------------------------------------------------------
32 | #第三步 SVC训练
33 | clf = SVC()
34 | clf.fit(train_data,train_target)
35 | result = clf.predict(test_data)
36 | print(result)
37 | 
38 | #----------------------------------------------------------------------------
39 | #第四步 评价算法 
40 | print(sum(result==test_target))                            #预测结果与真实结果比对
41 | print(metrics.classification_report(test_target, result))  #准确率 召回率 F值
42 | 
43 | #----------------------------------------------------------------------------
44 | #第五步 创建网格 
45 | x1_min, x1_max = test_data[:,0].min()-0.1, test_data[:,0].max()+0.1    #第一列
46 | x2_min, x2_max = test_data[:,1].min()-0.1, test_data[:,1].max()+0.1    #第二列
47 | xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.1),  
48 |                      np.arange(x2_min, x2_max, 0.1))                   #生成网格型数据
49 | z = clf.predict(np.c_[xx.ravel(), yy.ravel()])                        
50 | 
51 | #----------------------------------------------------------------------------
52 | #第六步 绘图可视化
53 | cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])         #颜色Map
54 | cmap_bold = ListedColormap(['#000000', '#00FF00', '#FFFFFF'])
55 | plt.figure()
56 | z = z.reshape(xx.shape)
57 | print(xx.shape, yy.shape, z.shape, test_target.shape)
58 | plt.pcolormesh(xx, yy, z, cmap=cmap_light)
59 | plt.scatter(test_data[:,0], test_data[:,1], c=test_target,
60 |             cmap=cmap_bold, s=50)
61 | plt.show()
62 | 


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-07-knn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-  
 2 | # By:Eastmount CSDN 2021-07-06
 3 | import os 
 4 | import numpy as np
 5 | from sklearn.neighbors import KNeighborsClassifier  
 6 | from sklearn import metrics
 7 | from sklearn.decomposition import PCA 
 8 | import matplotlib.pyplot as plt
 9 | from matplotlib.colors import ListedColormap
10 | 
11 | #----------------------------------------------------------------------------
12 | #第一步 加载数据集
13 | path = "wine/wine.txt"
14 | data = np.loadtxt(path,dtype=float,delimiter=",")
15 | print(data)
16 | 
17 | #----------------------------------------------------------------------------
18 | #第二步 划分数据集
19 | yy, x = np.split(data, (1,), axis=1) #第一列为类标yy,后面13列特征为x
20 | print(yy.shape, x.shape)
21 | y = []
22 | for n in yy:  #将类标浮点型转化为整数
23 |     y.append(int(n))
24 | x = x[:, :2]  #获取x前两列数据,方便绘图 对应x、y轴
25 | train_data = np.concatenate((x[0:40,:], x[60:100,:], x[140:160,:]), axis = 0)  #训练集
26 | train_target = np.concatenate((y[0:40], y[60:100], y[140:160]), axis = 0)      #样本类别
27 | test_data = np.concatenate((x[40:60, :], x[100:140, :], x[160:,:]), axis = 0)  #测试集
28 | test_target = np.concatenate((y[40:60], y[100:140], y[160:]), axis = 0)        #样本类别
29 | print(train_data.shape, train_target.shape)
30 | print(test_data.shape, test_target.shape)
31 | 
32 | #----------------------------------------------------------------------------
33 | #第三步 KNN训练
34 | clf = KNeighborsClassifier(n_neighbors=3,algorithm='kd_tree') #K=3
35 | clf.fit(train_data,train_target)
36 | result = clf.predict(test_data)
37 | print(result)
38 | 
39 | #----------------------------------------------------------------------------
40 | #第四步 评价算法 
41 | print(sum(result==test_target))                             #预测结果与真实结果比对
42 | print(metrics.classification_report(test_target, result))   #准确率 召回率 F值
43 | 
44 | #----------------------------------------------------------------------------
45 | #第五步 创建网格
46 | x1_min, x1_max = test_data[:,0].min()-0.1, test_data[:,0].max()+0.1    #第一列
47 | x2_min, x2_max = test_data[:,1].min()-0.1, test_data[:,1].max()+0.1    #第二列
48 | xx, yy = np.meshgrid(np.arange(x1_min, x1_max, 0.1),  
49 |                      np.arange(x2_min, x2_max, 0.1))                   #生成网格型数据
50 | print(xx.shape, yy.shape)                                               #(53L, 36L) (53L, 36L)
51 | 
52 | z = clf.predict(np.c_[xx.ravel(), yy.ravel()])                         #ravel()拉直函数
53 | print(xx.ravel().shape, yy.ravel().shape)                              #(1908L,) (1908L,)
54 | print(np.c_[xx.ravel(), yy.ravel()].shape)                             #合并 (1908L,2)
55 | 
56 | #----------------------------------------------------------------------------
57 | #第六步 绘图可视化
58 | cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])         #颜色Map
59 | cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
60 | plt.figure()
61 | z = z.reshape(xx.shape)
62 | print(xx.shape, yy.shape, z.shape, test_target.shape)                 
63 | #(53L, 36L) (53L, 36L) (53L, 36L)  (78L,)
64 | plt.pcolormesh(xx, yy, z, cmap=cmap_light)
65 | plt.scatter(test_data[:,0], test_data[:,1], c=test_target,
66 |             cmap=cmap_bold, s=50)
67 | plt.show()
68 | 


--------------------------------------------------------------------------------
/chapter05-cluster/data.txt:
--------------------------------------------------------------------------------
 1 | 0.0888  201  36.02  28  0.5885
 2 | 0.1399  198  39.32  30  0.8291
 3 | 0.0747  198  38.8  26  0.4974
 4 | 0.0983  191  40.71  30  0.5772
 5 | 0.1276  196  38.4  28  0.5703
 6 | 0.1671  201  34.1  31  0.5835
 7 | 0.1906  193  36.2  30  0.5276
 8 | 0.1061  191  36.75  27  0.5523
 9 | 0.2446  185  38.43  29  0.4007
10 | 0.167  203  33.54  24  0.477
11 | 0.2485  188  35.01  27  0.4313
12 | 0.1227  198  36.67  29  0.4909
13 | 0.124  185  33.88  24  0.5668
14 | 0.1461  191  35.59  30  0.5113
15 | 0.2315  191  38.01  28  0.3788
16 | 0.0494  193  32.38  32  0.559
17 | 0.1107  196  35.22  25  0.4799
18 | 0.2521  183  31.73  29  0.5735
19 | 0.1007  193  28.81  34  0.6318
20 | 0.1067  196  35.6  23  0.4326
21 | 0.1956  188  35.28  32  0.428
22 | 0.1828  191  29.54  28  0.4401
23 | 0.1627  196  31.35  28  0.5581
24 | 0.1403  198  33.5  23  0.4866
25 | 0.1563  193  34.56  32  0.5267
26 | 0.2681  183  39.53  27  0.5439
27 | 0.1236  196  26.7  34  0.4419
28 | 0.13  188  30.77  26  0.3998
29 | 0.0896  198  25.67  30  0.4325
30 | 0.2071  178  36.22  30  0.4086
31 | 0.2244  185  36.55  23  0.4624
32 | 0.3437  185  34.91  31  0.4325
33 | 0.1058  191  28.35  28  0.4903
34 | 0.2326  185  33.53  27  0.4802
35 | 0.1577  193  31.07  25  0.4345
36 | 0.2327  185  36.52  32  0.4819
37 | 0.1256  196  27.87  29  0.6244
38 | 0.107  198  24.31  34  0.3991
39 | 0.1343  193  31.26  28  0.4414
40 | 0.0586  196  22.18  23  0.4013
41 | 0.2383  185  35.25  26  0.3801
42 | 0.1006  198  22.87  30  0.3498
43 | 0.2164  193  24.49  32  0.3185
44 | 0.1485  198  23.57  27  0.3097
45 | 0.227  191  31.72  27  0.4319
46 | 0.1649  188  27.9  25  0.3799
47 | 0.1188  191  22.74  24  0.4091
48 | 0.194  193  20.62  27  0.3588
49 | 0.2495  185  30.46  25  0.4727
50 | 0.2378  185  32.38  27  0.3212
51 | 0.1592  191  25.75  31  0.3418
52 | 0.2069  170  33.84  30  0.4285
53 | 0.2084  185  27.83  25  0.3917
54 | 0.0877  193  21.67  26  0.5769
55 | 0.101  193  21.79  24  0.4773
56 | 0.0942  201  20.17  26  0.4512
57 | 0.055  193  29.07  31  0.3096
58 | 0.1071  196  24.28  24  0.3089
59 | 0.0728  193  19.24  27  0.4573
60 | 0.2771  180  27.07  28  0.3214
61 | 0.0528  196  18.95  22  0.5437
62 | 0.213  188  21.59  30  0.4121
63 | 0.1356  193  13.27  31  0.2185
64 | 0.1043  196  16.3  23  0.3313
65 | 0.113  191  23.01  25  0.3302
66 | 0.1477  196  20.31  31  0.4677
67 | 0.1317  188  17.46  33  0.2406
68 | 0.2187  191  21.95  28  0.3007
69 | 0.2127  188  14.57  37  0.2471
70 | 0.2547  160  34.55  28  0.2894
71 | 0.1591  191  22.0  24  0.3682
72 | 0.0898  196  13.37  34  0.389
73 | 0.2146  188  20.51  24  0.512
74 | 0.1871  183  19.78  28  0.4449
75 | 0.1528  191  16.36  33  0.4035
76 | 0.156  191  16.03  23  0.2683
77 | 0.2348  188  24.27  26  0.2719
78 | 0.1623  180  18.49  28  0.3408
79 | 0.1239  180  17.76  26  0.4393
80 | 0.2178  185  13.31  25  0.3004
81 | 0.1608  185  17.41  26  0.3503
82 | 0.0805  193  13.67  25  0.4388
83 | 0.1776  193  17.46  27  0.2578
84 | 0.1668  185  14.38  35  0.2989
85 | 0.1072  188  12.12  31  0.4455
86 | 0.1821  185  12.63  25  0.3087
87 | 0.188  180  12.24  30  0.3678
88 | 0.1167  196  12.0  24  0.3667
89 | 0.2617  185  24.46  27  0.3189
90 | 0.1994  188  20.06  27  0.4187
91 | 0.1706  170  17.0  25  0.5059
92 | 0.1554  183  11.58  24  0.3195
93 | 0.2282  185  10.08  24  0.2381
94 | 0.1778  185  18.56  23  0.2802
95 | 0.1863  185  11.81  23  0.381
96 | 0.1014  193  13.81  32  0.1593


--------------------------------------------------------------------------------
/chapter06-classifier/chapter06-classifier-11-all.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # By:Eastmount CSDN 2021-07-06
  3 | # 该部分参考知乎萌弟老师：https://zhuanlan.zhihu.com/p/173945775
  4 | import numpy as np
  5 | from sklearn import metrics
  6 | from sklearn import datasets
  7 | import matplotlib.pyplot as plt
  8 | from matplotlib.colors import ListedColormap
  9 | from sklearn.model_selection import train_test_split
 10 | from sklearn.decomposition import PCA
 11 | from sklearn.preprocessing import StandardScaler
 12 | 
 13 | #------------------------------------------------------------------------
 14 | #第一步 导入数据
 15 | iris = datasets.load_iris()
 16 | X = iris.data[:,[2,3]]
 17 | y = iris.target
 18 | print("Class labels:",np.unique(y))  #打印分类类别的种类 [0 1 2]
 19 |  
 20 | #30%测试数据 70%训练数据 stratify=y表示训练数据和测试数据具有相同的类别比例
 21 | X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1,stratify=y)
 22 | 
 23 | #------------------------------------------------------------------------
 24 | #第二步 数据标准化
 25 | sc = StandardScaler()      #估算训练数据中的mu和sigma
 26 | sc.fit(X_train)            #使用训练数据中的mu和sigma对数据进行标准化
 27 | X_train_std = sc.transform(X_train)
 28 | X_test_std = sc.transform(X_test)
 29 | print(X_train_std)
 30 | print(X_test_std)
 31 | 
 32 | #------------------------------------------------------------------------
 33 | #第三步 可视化函数 画出决策边界
 34 | def plot_decision_region(X,y,classifier,resolution=0.02):
 35 |     markers = ('s','x','o','^','v')
 36 |     colors = ('red','blue','lightgreen','gray','cyan')
 37 |     cmap = ListedColormap(colors[:len(np.unique(y))])
 38 |     
 39 |     # plot the decision surface
 40 |     x1_min,x1_max = X[:,0].min()-1,X[:,0].max()+1
 41 |     x2_min,x2_max = X[:,1].min()-1,X[:,1].max()+1
 42 |     xx1,xx2 = np.meshgrid(np.arange(x1_min,x1_max,resolution),
 43 |                          np.arange(x2_min,x2_max,resolution))
 44 |     Z = classifier.predict(np.array([xx1.ravel(),xx2.ravel()]).T)
 45 |     Z = Z.reshape(xx1.shape)
 46 |     plt.contourf(xx1,xx2,Z,alpha=0.3,cmap=cmap)
 47 |     plt.xlim(xx1.min(),xx1.max())
 48 |     plt.ylim(xx2.min(),xx2.max())
 49 |     
 50 |     # plot class samples
 51 |     for idx,cl in enumerate(np.unique(y)):
 52 |         plt.scatter(x=X[y==cl,0],
 53 |                    y = X[y==cl,1],
 54 |                    alpha=0.8,
 55 |                    c=colors[idx],
 56 |                    marker = markers[idx],
 57 |                    label=cl,
 58 |                    edgecolors='black')
 59 | 
 60 | #------------------------------------------------------------------------
 61 | #第四步 决策树分类
 62 | from sklearn.tree import DecisionTreeClassifier
 63 | tree = DecisionTreeClassifier(criterion='gini',max_depth=4,random_state=1)
 64 | tree.fit(X_train_std,y_train)
 65 | print(X_train_std.shape, X_test_std.shape, len(y_train), len(y_test)) #(105, 2) (45, 2) 105 45
 66 | res1 = tree.predict(X_test_std)
 67 | print(res1)
 68 | print(metrics.classification_report(y_test, res1, digits=4)) #四位小数
 69 | 
 70 | plot_decision_region(X_train_std,y_train,classifier=tree,resolution=0.02)
 71 | plt.xlabel('petal length [standardized]')
 72 | plt.ylabel('petal width [standardized]')
 73 | plt.title('DecisionTreeClassifier')
 74 | plt.legend(loc='upper left')
 75 | plt.show()
 76 | 
 77 | #------------------------------------------------------------------------
 78 | #第五步 KNN分类
 79 | from sklearn.neighbors import KNeighborsClassifier
 80 | knn = KNeighborsClassifier(n_neighbors=2,p=2,metric="minkowski")
 81 | knn.fit(X_train_std,y_train)
 82 | res2 = knn.predict(X_test_std)
 83 | print(res2)
 84 | print(metrics.classification_report(y_test, res2, digits=4)) #四位小数
 85 | 
 86 | plot_decision_region(X_train_std,y_train,classifier=knn,resolution=0.02)
 87 | plt.xlabel('petal length [standardized]')
 88 | plt.ylabel('petal width [standardized]')
 89 | plt.title('KNeighborsClassifier')
 90 | plt.legend(loc='upper left')
 91 | plt.show()
 92 |         
 93 | #------------------------------------------------------------------------
 94 | #第六步 SVM分类 核函数对非线性分类问题建模(gamma=0.20)
 95 | from sklearn.svm import SVC
 96 | svm = SVC(kernel='rbf',random_state=1,gamma=0.20,C=1.0) #较小的gamma有较松的决策边界
 97 | svm = SVC(kernel='rbf',random_state=1,gamma=100.0,C=1.0,verbose=1)
 98 | svm.fit(X_train_std,y_train)
 99 | res3 = svm.predict(X_test_std)
100 | print(res3)
101 | print(metrics.classification_report(y_test, res3, digits=4))
102 | 
103 | plot_decision_region(X_train_std,y_train,classifier=svm,resolution=0.02)
104 | plt.xlabel('petal length [standardized]')
105 | plt.ylabel('petal width [standardized]')
106 | plt.title('SVM')
107 | plt.legend(loc='upper left')
108 | plt.show()
109 | 
110 | 
111 | #------------------------------------------------------------------------
112 | #第七步 逻辑回归分类
113 | from sklearn.linear_model import LogisticRegression
114 | lr = LogisticRegression(C=100.0,random_state=1)
115 | lr.fit(X_train_std,y_train)
116 | res4 = lr.predict(X_test_std)
117 | print(res4)
118 | print(metrics.classification_report(y_test, res4, digits=4))
119 | 
120 | plot_decision_region(X_train_std,y_train,classifier=lr,resolution=0.02)
121 | plt.xlabel('petal length [standardized]')
122 | plt.ylabel('petal width [standardized]')
123 | plt.title('LogisticRegression')
124 | plt.legend(loc='upper left')
125 | plt.show()
126 | 
127 | 
128 | #------------------------------------------------------------------------
129 | #第八步 朴素贝叶斯分类
130 | from sklearn.naive_bayes import GaussianNB
131 | gnb = GaussianNB()
132 | gnb.fit(X_train_std,y_train)
133 | res5 = gnb.predict(X_test_std)
134 | print(res5)
135 | print(metrics.classification_report(y_test, res5, digits=4))
136 | 
137 | plot_decision_region(X_train_std,y_train,classifier=gnb,resolution=0.02)
138 | plt.xlabel('petal length [standardized]')
139 | plt.ylabel('petal width [standardized]')
140 | plt.title('GaussianNB')
141 | plt.legend(loc='upper left')
142 | plt.show()
143 | 
144 | #------------------------------------------------------------------------
145 | #第九步 随机森林分类
146 | from sklearn.ensemble import RandomForestClassifier
147 | forest = RandomForestClassifier(criterion='gini',
148 |                                 n_estimators=25,
149 |                                 random_state=1,
150 |                                 n_jobs=2,
151 |                                 verbose=1)
152 | forest.fit(X_train_std,y_train)
153 | res6 = gnb.predict(X_test_std)
154 | print(res6)
155 | print(metrics.classification_report(y_test, res6, digits=4))
156 | 
157 | plot_decision_region(X_train_std,y_train,classifier=forest,resolution=0.02)
158 | plt.xlabel('petal length [standardized]')
159 | plt.ylabel('petal width [standardized]')
160 | plt.title('RandomForestClassifier')
161 | plt.legend(loc='upper left')
162 | plt.show()
163 | 
164 | #------------------------------------------------------------------------
165 | #第十步 集成学习分类
166 | from sklearn.ensemble import AdaBoostClassifier
167 | ada = AdaBoostClassifier()
168 | ada.fit(X_train_std,y_train)
169 | res7 = ada.predict(X_test_std)
170 | print(res7)
171 | print(metrics.classification_report(y_test, res7, digits=4))
172 | 
173 | plot_decision_region(X_train_std,y_train,classifier=forest,resolution=0.02)
174 | plt.xlabel('petal length [standardized]')
175 | plt.ylabel('petal width [standardized]')
176 | plt.title('AdaBoostClassifier')
177 | plt.legend(loc='upper left')
178 | plt.show()
179 | 
180 | #------------------------------------------------------------------------
181 | #第11步 GradientBoosting分类
182 | from sklearn.ensemble import GradientBoostingClassifier
183 | gb = GradientBoostingClassifier()
184 | ada.fit(X_train_std,y_train)
185 | res8 = ada.predict(X_test_std)
186 | print(res8)
187 | print(metrics.classification_report(y_test, res8, digits=4))
188 | 
189 | plot_decision_region(X_train_std,y_train,classifier=forest,resolution=0.02)
190 | plt.xlabel('petal length [standardized]')
191 | plt.ylabel('petal width [standardized]')
192 | plt.title('GradientBoostingClassifier')
193 | plt.legend(loc='upper left')
194 | plt.show()
195 | 


--------------------------------------------------------------------------------
/chapter06-classifier/wine/wine.txt:
--------------------------------------------------------------------------------
  1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
  2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
  3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
  4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
  5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
  6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
  7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
  8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
  9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560


--------------------------------------------------------------------------------
/chapter05-cluster/glass.csv:
--------------------------------------------------------------------------------
  1 | id,ri,na,mg,al,si,k,ca,ba,fe,glass_type
  2 | 1,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
  3 | 2,1.5176100000000001,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
  4 | 3,1.5161799999999999,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
  5 | 4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
  6 | 5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1
  7 | 6,1.51596,12.79,3.61,1.62,72.97,0.64,8.07,0.0,0.26,1
  8 | 7,1.5174299999999998,13.3,3.6,1.14,73.09,0.58,8.17,0.0,0.0,1
  9 | 8,1.51756,13.15,3.61,1.05,73.24,0.57,8.24,0.0,0.0,1
 10 | 9,1.51918,14.04,3.58,1.37,72.08,0.56,8.3,0.0,0.0,1
 11 | 10,1.51755,13.0,3.6,1.36,72.99,0.57,8.4,0.0,0.11,1
 12 | 11,1.5157100000000001,12.72,3.46,1.56,73.2,0.67,8.09,0.0,0.24,1
 13 | 12,1.51763,12.8,3.66,1.27,73.01,0.6,8.56,0.0,0.0,1
 14 | 13,1.51589,12.88,3.43,1.4,73.28,0.69,8.05,0.0,0.24,1
 15 | 14,1.51748,12.86,3.56,1.27,73.21,0.54,8.38,0.0,0.17,1
 16 | 15,1.51763,12.61,3.59,1.31,73.29,0.58,8.5,0.0,0.0,1
 17 | 16,1.5176100000000001,12.81,3.54,1.23,73.24,0.58,8.39,0.0,0.0,1
 18 | 17,1.5178399999999999,12.68,3.67,1.16,73.11,0.61,8.7,0.0,0.0,1
 19 | 18,1.52196,14.36,3.85,0.89,71.36,0.15,9.15,0.0,0.0,1
 20 | 19,1.51911,13.9,3.73,1.18,72.12,0.06,8.89,0.0,0.0,1
 21 | 20,1.51735,13.02,3.54,1.69,72.73,0.54,8.44,0.0,0.07,1
 22 | 21,1.5175,12.82,3.55,1.49,72.75,0.54,8.52,0.0,0.19,1
 23 | 22,1.51966,14.77,3.75,0.29,72.02,0.03,9.0,0.0,0.0,1
 24 | 23,1.51736,12.78,3.62,1.29,72.79,0.59,8.7,0.0,0.0,1
 25 | 24,1.5175100000000001,12.81,3.57,1.35,73.02,0.62,8.59,0.0,0.0,1
 26 | 25,1.5172,13.38,3.5,1.15,72.85,0.5,8.43,0.0,0.0,1
 27 | 26,1.5176399999999999,12.98,3.54,1.21,73.0,0.65,8.53,0.0,0.0,1
 28 | 27,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.0,1
 29 | 28,1.51721,12.87,3.48,1.33,73.04,0.56,8.43,0.0,0.0,1
 30 | 29,1.51768,12.56,3.52,1.43,73.15,0.57,8.54,0.0,0.0,1
 31 | 30,1.5178399999999999,13.08,3.49,1.28,72.86,0.6,8.49,0.0,0.0,1
 32 | 31,1.51768,12.65,3.56,1.3,73.08,0.61,8.69,0.0,0.14,1
 33 | 32,1.51747,12.84,3.5,1.14,73.27,0.56,8.55,0.0,0.0,1
 34 | 33,1.51775,12.85,3.48,1.23,72.97,0.61,8.56,0.09,0.22,1
 35 | 34,1.5175299999999998,12.57,3.47,1.38,73.39,0.6,8.55,0.0,0.06,1
 36 | 35,1.51783,12.69,3.54,1.34,72.95,0.57,8.75,0.0,0.0,1
 37 | 36,1.51567,13.29,3.45,1.21,72.74,0.56,8.57,0.0,0.0,1
 38 | 37,1.51909,13.89,3.53,1.32,71.81,0.51,8.78,0.11,0.0,1
 39 | 38,1.51797,12.74,3.48,1.35,72.96,0.64,8.68,0.0,0.0,1
 40 | 39,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1
 41 | 40,1.52213,14.21,3.82,0.47,71.77,0.11,9.57,0.0,0.0,1
 42 | 41,1.51793,12.79,3.5,1.12,73.03,0.64,8.77,0.0,0.0,1
 43 | 42,1.51755,12.71,3.42,1.2,73.2,0.59,8.64,0.0,0.0,1
 44 | 43,1.51779,13.21,3.39,1.33,72.76,0.59,8.59,0.0,0.0,1
 45 | 44,1.5221,13.73,3.84,0.72,71.76,0.17,9.74,0.0,0.0,1
 46 | 45,1.51786,12.73,3.43,1.19,72.95,0.62,8.76,0.0,0.3,1
 47 | 46,1.519,13.49,3.48,1.35,71.95,0.55,9.0,0.0,0.0,1
 48 | 47,1.5186899999999999,13.19,3.37,1.18,72.72,0.57,8.83,0.0,0.16,1
 49 | 48,1.52667,13.99,3.7,0.71,71.57,0.02,9.82,0.0,0.1,1
 50 | 49,1.52223,13.21,3.77,0.79,71.99,0.13,10.02,0.0,0.0,1
 51 | 50,1.51898,13.58,3.35,1.23,72.08,0.59,8.91,0.0,0.0,1
 52 | 51,1.5232,13.72,3.72,0.51,71.75,0.09,10.06,0.0,0.16,1
 53 | 52,1.51926,13.2,3.33,1.28,72.36,0.6,9.14,0.0,0.11,1
 54 | 53,1.5180799999999999,13.43,2.87,1.19,72.84,0.55,9.03,0.0,0.0,1
 55 | 54,1.51837,13.14,2.84,1.28,72.85,0.55,9.07,0.0,0.0,1
 56 | 55,1.51778,13.21,2.81,1.29,72.98,0.51,9.02,0.0,0.09,1
 57 | 56,1.51769,12.45,2.71,1.29,73.7,0.56,9.06,0.0,0.24,1
 58 | 57,1.51215,12.99,3.47,1.12,72.98,0.62,8.35,0.0,0.31,1
 59 | 58,1.51824,12.87,3.48,1.29,72.95,0.6,8.43,0.0,0.0,1
 60 | 59,1.51754,13.48,3.74,1.17,72.99,0.59,8.03,0.0,0.0,1
 61 | 60,1.51754,13.39,3.66,1.19,72.79,0.57,8.27,0.0,0.11,1
 62 | 61,1.51905,13.6,3.62,1.11,72.64,0.14,8.76,0.0,0.0,1
 63 | 62,1.51977,13.81,3.58,1.32,71.72,0.12,8.67,0.69,0.0,1
 64 | 63,1.5217200000000002,13.51,3.86,0.88,71.79,0.23,9.54,0.0,0.11,1
 65 | 64,1.52227,14.17,3.81,0.78,71.35,0.0,9.69,0.0,0.0,1
 66 | 65,1.5217200000000002,13.48,3.74,0.9,72.01,0.18,9.61,0.0,0.07,1
 67 | 66,1.52099,13.69,3.59,1.12,71.96,0.09,9.4,0.0,0.0,1
 68 | 67,1.52152,13.05,3.65,0.87,72.22,0.19,9.85,0.0,0.17,1
 69 | 68,1.52152,13.05,3.65,0.87,72.32,0.19,9.85,0.0,0.17,1
 70 | 69,1.52152,13.12,3.58,0.9,72.2,0.23,9.82,0.0,0.16,1
 71 | 70,1.523,13.31,3.58,0.82,71.99,0.12,10.17,0.0,0.03,1
 72 | 71,1.5157399999999999,14.86,3.67,1.74,71.87,0.16,7.36,0.0,0.12,2
 73 | 72,1.5184799999999998,13.64,3.87,1.27,71.96,0.54,8.32,0.0,0.32,2
 74 | 73,1.51593,13.09,3.59,1.52,73.1,0.67,7.83,0.0,0.0,2
 75 | 74,1.51631,13.34,3.57,1.57,72.87,0.61,7.89,0.0,0.0,2
 76 | 75,1.51596,13.02,3.56,1.54,73.11,0.72,7.9,0.0,0.0,2
 77 | 76,1.5159,13.02,3.58,1.51,73.12,0.69,7.96,0.0,0.0,2
 78 | 77,1.51645,13.44,3.61,1.54,72.39,0.66,8.03,0.0,0.0,2
 79 | 78,1.51627,13.0,3.58,1.54,72.83,0.61,8.04,0.0,0.0,2
 80 | 79,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,2
 81 | 80,1.5159,12.82,3.52,1.9,72.86,0.69,7.97,0.0,0.0,2
 82 | 81,1.5159200000000002,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.0,2
 83 | 82,1.51593,13.25,3.45,1.43,73.17,0.61,7.86,0.0,0.0,2
 84 | 83,1.5164600000000001,13.41,3.55,1.25,72.81,0.68,8.1,0.0,0.0,2
 85 | 84,1.51594,13.09,3.52,1.55,72.87,0.68,8.05,0.0,0.09,2
 86 | 85,1.51409,14.25,3.09,2.08,72.28,1.1,7.08,0.0,0.0,2
 87 | 86,1.51625,13.36,3.58,1.49,72.72,0.45,8.21,0.0,0.0,2
 88 | 87,1.51569,13.24,3.49,1.47,73.25,0.38,8.03,0.0,0.0,2
 89 | 88,1.51645,13.4,3.49,1.52,72.65,0.67,8.08,0.0,0.1,2
 90 | 89,1.5161799999999999,13.01,3.5,1.48,72.89,0.6,8.12,0.0,0.0,2
 91 | 90,1.5164,12.55,3.48,1.87,73.23,0.63,8.08,0.0,0.09,2
 92 | 91,1.51841,12.93,3.74,1.11,72.28,0.64,8.96,0.0,0.22,2
 93 | 92,1.51605,12.9,3.44,1.45,73.06,0.44,8.27,0.0,0.0,2
 94 | 93,1.51588,13.12,3.41,1.58,73.26,0.07,8.39,0.0,0.19,2
 95 | 94,1.5159,13.24,3.34,1.47,73.1,0.39,8.22,0.0,0.0,2
 96 | 95,1.51629,12.71,3.33,1.49,73.28,0.67,8.24,0.0,0.0,2
 97 | 96,1.5186,13.36,3.43,1.43,72.26,0.51,8.6,0.0,0.0,2
 98 | 97,1.51841,13.02,3.62,1.06,72.34,0.64,9.13,0.0,0.15,2
 99 | 98,1.5174299999999998,12.2,3.25,1.16,73.55,0.62,8.9,0.0,0.24,2
100 | 99,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.0,2
101 | 100,1.51811,12.96,2.96,1.43,72.92,0.6,8.79,0.14,0.0,2
102 | 101,1.51655,12.75,2.85,1.44,73.27,0.57,8.79,0.11,0.22,2
103 | 102,1.5173,12.35,2.72,1.63,72.87,0.7,9.23,0.0,0.0,2
104 | 103,1.5182,12.62,2.76,0.83,73.81,0.35,9.42,0.0,0.2,2
105 | 104,1.52725,13.8,3.15,0.66,70.57,0.08,11.64,0.0,0.0,2
106 | 105,1.5241,13.83,2.9,1.17,71.15,0.08,10.79,0.0,0.0,2
107 | 106,1.52475,11.45,0.0,1.88,72.19,0.81,13.24,0.0,0.34,2
108 | 107,1.53125,10.73,0.0,2.1,69.81,0.58,13.3,3.15,0.28,2
109 | 108,1.53393,12.3,0.0,1.0,70.16,0.12,16.19,0.0,0.24,2
110 | 109,1.5222200000000001,14.43,0.0,1.0,72.67,0.1,11.52,0.0,0.08,2
111 | 110,1.5181799999999999,13.72,0.0,0.56,74.45,0.0,10.99,0.0,0.0,2
112 | 111,1.52664,11.23,0.0,0.77,73.21,0.0,14.68,0.0,0.0,2
113 | 112,1.52739,11.02,0.0,0.75,73.08,0.0,14.96,0.0,0.0,2
114 | 113,1.52777,12.64,0.0,0.67,72.02,0.06,14.4,0.0,0.0,2
115 | 114,1.51892,13.46,3.83,1.26,72.55,0.57,8.21,0.0,0.14,2
116 | 115,1.51847,13.1,3.97,1.19,72.44,0.6,8.43,0.0,0.0,2
117 | 116,1.5184600000000001,13.41,3.89,1.33,72.38,0.51,8.28,0.0,0.0,2
118 | 117,1.51829,13.24,3.9,1.41,72.33,0.55,8.31,0.0,0.1,2
119 | 118,1.51708,13.72,3.68,1.81,72.06,0.64,7.88,0.0,0.0,2
120 | 119,1.51673,13.3,3.64,1.53,72.53,0.65,8.03,0.0,0.29,2
121 | 120,1.51652,13.56,3.57,1.47,72.45,0.64,7.96,0.0,0.0,2
122 | 121,1.51844,13.25,3.76,1.32,72.4,0.58,8.42,0.0,0.0,2
123 | 122,1.51663,12.93,3.54,1.62,72.96,0.64,8.03,0.0,0.21,2
124 | 123,1.5168700000000002,13.23,3.54,1.48,72.84,0.56,8.1,0.0,0.0,2
125 | 124,1.5170700000000001,13.48,3.48,1.71,72.52,0.62,7.99,0.0,0.0,2
126 | 125,1.52177,13.2,3.68,1.15,72.75,0.54,8.52,0.0,0.0,2
127 | 126,1.51872,12.93,3.66,1.56,72.51,0.58,8.55,0.0,0.12,2
128 | 127,1.51667,12.94,3.61,1.26,72.75,0.56,8.6,0.0,0.0,2
129 | 128,1.52081,13.78,2.28,1.43,71.99,0.49,9.85,0.0,0.17,2
130 | 129,1.52068,13.55,2.09,1.67,72.18,0.53,9.57,0.27,0.17,2
131 | 130,1.5202,13.98,1.35,1.63,71.76,0.39,10.56,0.0,0.18,2
132 | 131,1.52177,13.75,1.01,1.36,72.19,0.33,11.14,0.0,0.0,2
133 | 132,1.52614,13.7,0.0,1.36,71.24,0.19,13.44,0.0,0.1,2
134 | 133,1.51813,13.43,3.98,1.18,72.49,0.58,8.15,0.0,0.0,2
135 | 134,1.518,13.71,3.93,1.54,71.81,0.54,8.21,0.0,0.15,2
136 | 135,1.51811,13.33,3.85,1.25,72.78,0.52,8.12,0.0,0.0,2
137 | 136,1.51789,13.19,3.9,1.3,72.33,0.55,8.44,0.0,0.28,2
138 | 137,1.51806,13.0,3.8,1.08,73.07,0.56,8.38,0.0,0.12,2
139 | 138,1.51711,12.89,3.62,1.57,72.96,0.61,8.11,0.0,0.0,2
140 | 139,1.51674,12.79,3.52,1.54,73.36,0.66,7.9,0.0,0.0,2
141 | 140,1.51674,12.87,3.56,1.64,73.14,0.65,7.99,0.0,0.0,2
142 | 141,1.5169,13.33,3.54,1.61,72.54,0.68,8.11,0.0,0.0,2
143 | 142,1.51851,13.2,3.63,1.07,72.83,0.57,8.41,0.09,0.17,2
144 | 143,1.51662,12.85,3.51,1.44,73.01,0.68,8.23,0.06,0.25,2
145 | 144,1.51709,13.0,3.47,1.79,72.72,0.66,8.18,0.0,0.0,2
146 | 145,1.5166,12.99,3.18,1.23,72.97,0.58,8.81,0.0,0.24,2
147 | 146,1.51839,12.85,3.67,1.24,72.57,0.62,8.68,0.0,0.35,2
148 | 147,1.51769,13.65,3.66,1.11,72.77,0.11,8.6,0.0,0.0,3
149 | 148,1.5161,13.33,3.53,1.34,72.67,0.56,8.33,0.0,0.0,3
150 | 149,1.5167,13.24,3.57,1.38,72.7,0.56,8.44,0.0,0.1,3
151 | 150,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.0,3
152 | 151,1.51665,13.14,3.45,1.76,72.48,0.6,8.38,0.0,0.17,3
153 | 152,1.5212700000000001,14.32,3.9,0.83,71.5,0.0,9.49,0.0,0.0,3
154 | 153,1.51779,13.64,3.65,0.65,73.0,0.06,8.93,0.0,0.0,3
155 | 154,1.5161,13.42,3.4,1.22,72.69,0.59,8.32,0.0,0.0,3
156 | 155,1.51694,12.86,3.58,1.31,72.61,0.61,8.79,0.0,0.0,3
157 | 156,1.5164600000000001,13.04,3.4,1.26,73.01,0.52,8.58,0.0,0.0,3
158 | 157,1.51655,13.41,3.39,1.28,72.64,0.52,8.65,0.0,0.0,3
159 | 158,1.52121,14.03,3.76,0.58,71.79,0.11,9.65,0.0,0.0,3
160 | 159,1.51776,13.53,3.41,1.52,72.04,0.58,8.79,0.0,0.0,3
161 | 160,1.51796,13.5,3.36,1.63,71.94,0.57,8.81,0.0,0.09,3
162 | 161,1.5183200000000001,13.33,3.34,1.54,72.14,0.56,8.99,0.0,0.0,3
163 | 162,1.51934,13.64,3.54,0.75,72.65,0.16,8.89,0.15,0.24,3
164 | 163,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.0,0.37,3
165 | 164,1.51514,14.01,2.68,3.5,69.89,1.68,5.87,2.2,0.0,5
166 | 165,1.51915,12.73,1.85,1.86,72.69,0.6,10.09,0.0,0.0,5
167 | 166,1.5217100000000001,11.56,1.88,1.56,72.86,0.47,11.41,0.0,0.0,5
168 | 167,1.5215100000000001,11.03,1.71,1.56,73.44,0.58,11.62,0.0,0.0,5
169 | 168,1.51969,12.64,0.0,1.65,73.75,0.38,11.53,0.0,0.0,5
170 | 169,1.5166600000000001,12.86,0.0,1.83,73.88,0.97,10.17,0.0,0.0,5
171 | 170,1.51994,13.27,0.0,1.76,73.03,0.47,11.32,0.0,0.0,5
172 | 171,1.52369,13.44,0.0,1.58,72.22,0.32,12.24,0.0,0.0,5
173 | 172,1.51316,13.02,0.0,3.04,70.48,6.21,6.96,0.0,0.0,5
174 | 173,1.51321,13.0,0.0,3.02,70.7,6.21,6.93,0.0,0.0,5
175 | 174,1.52043,13.38,0.0,1.4,72.25,0.33,12.5,0.0,0.0,5
176 | 175,1.5205799999999998,12.85,1.61,2.17,72.18,0.76,9.7,0.24,0.51,5
177 | 176,1.52119,12.97,0.33,1.51,73.39,0.13,11.27,0.0,0.28,5
178 | 177,1.51905,14.0,2.39,1.56,72.37,0.0,9.57,0.0,0.0,6
179 | 178,1.51937,13.79,2.41,1.19,72.76,0.0,9.77,0.0,0.0,6
180 | 179,1.51829,14.46,2.24,1.62,72.38,0.0,9.26,0.0,0.0,6
181 | 180,1.51852,14.09,2.19,1.66,72.67,0.0,9.32,0.0,0.0,6
182 | 181,1.51299,14.4,1.74,1.54,74.55,0.0,7.59,0.0,0.0,6
183 | 182,1.51888,14.99,0.78,1.74,72.5,0.0,9.95,0.0,0.0,6
184 | 183,1.51916,14.15,0.0,2.09,72.74,0.0,10.88,0.0,0.0,6
185 | 184,1.51969,14.56,0.0,0.56,73.48,0.0,11.22,0.0,0.0,6
186 | 185,1.51115,17.38,0.0,0.34,75.41,0.0,6.65,0.0,0.0,6
187 | 186,1.51131,13.69,3.2,1.81,72.81,1.76,5.43,1.19,0.0,7
188 | 187,1.5183799999999998,14.32,3.26,2.22,71.25,1.46,5.79,1.63,0.0,7
189 | 188,1.52315,13.44,3.34,1.23,72.38,0.6,8.83,0.0,0.0,7
190 | 189,1.52247,14.86,2.2,2.06,70.26,0.76,9.76,0.0,0.0,7
191 | 190,1.52365,15.79,1.83,1.31,70.43,0.31,8.61,1.68,0.0,7
192 | 191,1.51613,13.88,1.78,1.79,73.1,0.0,8.67,0.76,0.0,7
193 | 192,1.5160200000000001,14.85,0.0,2.38,73.28,0.0,8.76,0.64,0.09,7
194 | 193,1.51623,14.2,0.0,2.79,73.46,0.04,9.04,0.4,0.09,7
195 | 194,1.51719,14.75,0.0,2.0,73.02,0.0,8.53,1.59,0.08,7
196 | 195,1.51683,14.56,0.0,1.98,73.29,0.0,8.52,1.57,0.07,7
197 | 196,1.51545,14.14,0.0,2.68,73.39,0.08,9.07,0.61,0.05,7
198 | 197,1.51556,13.87,0.0,2.54,73.23,0.14,9.41,0.81,0.01,7
199 | 198,1.5172700000000001,14.7,0.0,2.34,73.28,0.0,8.95,0.66,0.0,7
200 | 199,1.51531,14.38,0.0,2.66,73.1,0.04,9.08,0.64,0.0,7
201 | 200,1.51609,15.01,0.0,2.51,73.05,0.05,8.83,0.53,0.0,7
202 | 201,1.51508,15.15,0.0,2.25,73.5,0.0,8.34,0.63,0.0,7
203 | 202,1.51653,11.95,0.0,1.19,75.18,2.7,8.93,0.0,0.0,7
204 | 203,1.51514,14.85,0.0,2.42,73.72,0.0,8.39,0.56,0.0,7
205 | 204,1.5165799999999998,14.8,0.0,1.99,73.11,0.0,8.28,1.71,0.0,7
206 | 205,1.51617,14.95,0.0,2.27,73.3,0.0,8.71,0.67,0.0,7
207 | 206,1.51732,14.95,0.0,1.8,72.99,0.0,8.61,1.55,0.0,7
208 | 207,1.51645,14.94,0.0,1.87,73.11,0.0,8.67,1.38,0.0,7
209 | 208,1.51831,14.39,0.0,1.82,72.86,1.41,6.47,2.88,0.0,7
210 | 209,1.5164,14.37,0.0,2.74,72.85,0.0,9.45,0.54,0.0,7
211 | 210,1.51623,14.14,0.0,2.88,72.61,0.08,9.18,1.06,0.0,7
212 | 211,1.51685,14.92,0.0,1.99,73.06,0.0,8.4,1.59,0.0,7
213 | 212,1.52065,14.36,0.0,2.02,73.42,0.0,8.44,1.64,0.0,7
214 | 213,1.51651,14.38,0.0,1.94,73.61,0.0,8.48,1.57,0.0,7
215 | 214,1.51711,14.23,0.0,2.08,73.36,0.0,8.62,1.67,0.0,7
216 | 


--------------------------------------------------------------------------------