├── LICENSE
├── README.md
├── chapters
    ├── Bayesian
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── bayesian.cpython-35.pyc
    │   │   ├── bernoulliNB.cpython-35.pyc
    │   │   ├── gaussianNB.cpython-35.pyc
    │   │   └── multinomialNB.cpython-35.pyc
    │   ├── bayesian.py
    │   ├── bernoulliNB.py
    │   ├── gaussianNB.py
    │   └── multinomialNB.py
    ├── Cluster_EM
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── agglomerative_clustering.cpython-35.pyc
    │   │   ├── cluster.cpython-35.pyc
    │   │   ├── dbscan.cpython-35.pyc
    │   │   ├── gmm.cpython-35.pyc
    │   │   └── kmeans.cpython-35.pyc
    │   ├── agglomerative_clustering.py
    │   ├── cluster.py
    │   ├── dbscan.py
    │   ├── gmm.py
    │   └── kmeans.py
    ├── Decision_Tree
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── decisiontree_classifier.cpython-35.pyc
    │   │   └── decisiontree_regressor.cpython-35.pyc
    │   ├── decisiontree_classifier.py
    │   └── decisiontree_regressor.py
    ├── Ensemble
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── adaboost_classifier.cpython-35.pyc
    │   │   ├── adaboost_regressor.cpython-35.pyc
    │   │   ├── gradientboosting_classifier.cpython-35.pyc
    │   │   ├── gradientboosting_regressor.cpython-35.pyc
    │   │   ├── randomforest_classifier.cpython-35.pyc
    │   │   └── randomforest_regressor.cpython-35.pyc
    │   ├── adaboost_classifier.py
    │   ├── adaboost_regressor.py
    │   ├── gradientboosting_classifier.py
    │   ├── gradientboosting_regressor.py
    │   ├── randomforest_classifier.py
    │   └── randomforest_regressor.py
    ├── KNN_Dimension_Reduction
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── isomap.cpython-35.pyc
    │   │   ├── kneighbors_classifier.cpython-35.pyc
    │   │   ├── kneighbors_regressor.cpython-35.pyc
    │   │   ├── kpca.cpython-35.pyc
    │   │   ├── lle.cpython-35.pyc
    │   │   ├── mds.cpython-35.pyc
    │   │   └── pca.cpython-35.pyc
    │   ├── isomap.py
    │   ├── kneighbors_classifier.py
    │   ├── kneighbors_regressor.py
    │   ├── kpca.py
    │   ├── lle.py
    │   ├── mds.py
    │   └── pca.py
    ├── Kaggle
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── data_clean.cpython-35.pyc
    │   │   ├── data_preprocess.cpython-35.pyc
    │   │   ├── grid_search.cpython-35.pyc
    │   │   └── learning_validation_curve.cpython-35.pyc
    │   ├── check_data.ipynb
    │   ├── data_clean.py
    │   ├── data_preprocess.py
    │   ├── grid_search.py
    │   └── learning_validation_curve.py
    ├── Linear
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── elasticnet.cpython-35.pyc
    │   │   ├── lasso.cpython-35.pyc
    │   │   ├── lda.cpython-35.pyc
    │   │   ├── linear_regression.cpython-35.pyc
    │   │   ├── logistic_regression.cpython-35.pyc
    │   │   └── ridge.cpython-35.pyc
    │   ├── elasticnet.py
    │   ├── lasso.py
    │   ├── lda.py
    │   ├── linear_regression.py
    │   ├── logistic_regression.py
    │   └── ridge.py
    ├── Model_Selection
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── classification_metrics.cpython-35.pyc
    │   │   ├── data_splittion.cpython-35.pyc
    │   │   ├── grid_search.cpython-35.pyc
    │   │   ├── learning_curve.cpython-35.pyc
    │   │   ├── loss_function.cpython-35.pyc
    │   │   ├── regression_metrics.cpython-35.pyc
    │   │   └── validation_curve.cpython-35.pyc
    │   ├── classification_metrics.py
    │   ├── data_splittion.py
    │   ├── grid_search.py
    │   ├── learning_curve.py
    │   ├── loss_function.py
    │   ├── regression_metrics.py
    │   └── validation_curve.py
    ├── Perceptron_Neural_Network
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── neural_network.cpython-35.pyc
    │   │   ├── neural_network_iris.cpython-35.pyc
    │   │   └── perceptron.cpython-35.pyc
    │   ├── neural_network.py
    │   ├── neural_network_iris.py
    │   └── perceptron.py
    ├── PreProcessing
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── binarize.cpython-35.pyc
    │   │   ├── dictionary_learning.cpython-35.pyc
    │   │   ├── feature_selection_bagging.cpython-35.pyc
    │   │   ├── feature_selection_embeded.cpython-35.pyc
    │   │   ├── feature_selection_filter.cpython-35.pyc
    │   │   ├── normalize.cpython-35.pyc
    │   │   ├── onehot_encode.cpython-35.pyc
    │   │   ├── pipeline.cpython-35.pyc
    │   │   └── standardize.cpython-35.pyc
    │   ├── binarize.py
    │   ├── dictionary_learning.py
    │   ├── feature_selection_bagging.py
    │   ├── feature_selection_embeded.py
    │   ├── feature_selection_filter.py
    │   ├── normalize.py
    │   ├── onehot_encode.py
    │   ├── pipeline.py
    │   └── standardize.py
    ├── SVM
    │   ├── SVC.py
    │   ├── SVR.py
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── SVC.cpython-35.pyc
    │   │   ├── SVR.cpython-35.pyc
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── linearSVC.cpython-35.pyc
    │   │   └── linearSVR.cpython-35.pyc
    │   ├── linearSVC.py
    │   └── linearSVR.py
    ├── Semi_Supervised_Learning
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   ├── labelPropagation.cpython-35.pyc
    │   │   └── labelSpreading.cpython-35.pyc
    │   ├── labelPropagation.py
    │   └── labelSpreading.py
    ├── __init__.py
    └── __pycache__
    │   └── __init__.cpython-35.pyc
├── docs
    ├── Makefile
    ├── build
    │   ├── doctrees
    │   │   ├── chapters.Bayesian.doctree
    │   │   ├── chapters.Cluster_EM.doctree
    │   │   ├── chapters.Decision_Tree.doctree
    │   │   ├── chapters.Ensemble.doctree
    │   │   ├── chapters.KNN_Dimension_Reduction.doctree
    │   │   ├── chapters.Kaggle.doctree
    │   │   ├── chapters.Linear.doctree
    │   │   ├── chapters.Model_Selection.doctree
    │   │   ├── chapters.Perceptron_Neural_Network.doctree
    │   │   ├── chapters.PreProcessing.doctree
    │   │   ├── chapters.SVM.doctree
    │   │   ├── chapters.Semi_Supervised_Learning.doctree
    │   │   ├── chapters.doctree
    │   │   ├── environment.pickle
    │   │   ├── index.doctree
    │   │   └── modules.doctree
    │   └── html
    │   │   ├── .buildinfo
    │   │   ├── _sources
    │   │       ├── chapters.Bayesian.txt
    │   │       ├── chapters.Cluster_EM.txt
    │   │       ├── chapters.Decision_Tree.txt
    │   │       ├── chapters.Ensemble.txt
    │   │       ├── chapters.KNN_Dimension_Reduction.txt
    │   │       ├── chapters.Kaggle.txt
    │   │       ├── chapters.Linear.txt
    │   │       ├── chapters.Model_Selection.txt
    │   │       ├── chapters.Perceptron_Neural_Network.txt
    │   │       ├── chapters.PreProcessing.txt
    │   │       ├── chapters.SVM.txt
    │   │       ├── chapters.Semi_Supervised_Learning.txt
    │   │       ├── chapters.txt
    │   │       ├── index.txt
    │   │       └── modules.txt
    │   │   ├── _static
    │   │       ├── ajax-loader.gif
    │   │       ├── basic.css
    │   │       ├── classic.css
    │   │       ├── comment-bright.png
    │   │       ├── comment-close.png
    │   │       ├── comment.png
    │   │       ├── doctools.js
    │   │       ├── down-pressed.png
    │   │       ├── down.png
    │   │       ├── file.png
    │   │       ├── jquery-1.11.1.js
    │   │       ├── jquery.js
    │   │       ├── minus.png
    │   │       ├── plus.png
    │   │       ├── pygments.css
    │   │       ├── searchtools.js
    │   │       ├── sidebar.js
    │   │       ├── underscore-1.3.1.js
    │   │       ├── underscore.js
    │   │       ├── up-pressed.png
    │   │       ├── up.png
    │   │       └── websupport.js
    │   │   ├── chapters.Bayesian.html
    │   │   ├── chapters.Cluster_EM.html
    │   │   ├── chapters.Decision_Tree.html
    │   │   ├── chapters.Ensemble.html
    │   │   ├── chapters.KNN_Dimension_Reduction.html
    │   │   ├── chapters.Kaggle.html
    │   │   ├── chapters.Linear.html
    │   │   ├── chapters.Model_Selection.html
    │   │   ├── chapters.Perceptron_Neural_Network.html
    │   │   ├── chapters.PreProcessing.html
    │   │   ├── chapters.SVM.html
    │   │   ├── chapters.Semi_Supervised_Learning.html
    │   │   ├── chapters.html
    │   │   ├── genindex.html
    │   │   ├── index.html
    │   │   ├── modules.html
    │   │   ├── objects.inv
    │   │   ├── py-modindex.html
    │   │   ├── search.html
    │   │   └── searchindex.js
    ├── make.bat
    └── source
    │   ├── chapters.Bayesian.rst
    │   ├── chapters.Cluster_EM.rst
    │   ├── chapters.Decision_Tree.rst
    │   ├── chapters.Ensemble.rst
    │   ├── chapters.KNN_Dimension_Reduction.rst
    │   ├── chapters.Kaggle.rst
    │   ├── chapters.Linear.rst
    │   ├── chapters.Model_Selection.rst
    │   ├── chapters.Perceptron_Neural_Network.rst
    │   ├── chapters.PreProcessing.rst
    │   ├── chapters.SVM.rst
    │   ├── chapters.Semi_Supervised_Learning.rst
    │   ├── chapters.rst
    │   ├── conf.py
    │   ├── index.rst
    │   └── modules.rst
└── 勘误.md


/README.md:
--------------------------------------------------------------------------------
 1 | # ReadMe
 2 | 
 3 | 另：本人搜集了个人笔记并整理成册，命名为《AI算法工程师手册》，详见:www.huaxiaozhuan.com
 4 | 
 5 | ## 1. 源码结构
 6 | 
 7 | 这里给出主要的目录结构。其中 `sphinx` 自动生成的目录和文件未全部列出。
 8 | 
 9 | ```
10 | book/
11 | 		docs/ 	.......................> 说明文档
12 | 				make.bat ...............> sphinx 脚本
13 | 				build/...................> sphinx 生成的文档所在目录
14 | 						html/............> sphinx 生成的 HTML文档的目录
15 | 				source/..................> sphinx 的配置文件以及生成的 .rst 文件
16 | 						conf.py..........> sphinx 的配置文件
17 | 		chapters/ ........................> 源代码
18 | 				Bayesian/...................> 朴素贝叶斯和贝叶斯网络	
19 | 				Cluster_EM/.................> 聚类和 EM 算法
20 | 				Decision_Tree/..............> 决策树
21 | 			 	Ensemble/...................> 集成学习
22 | 				KNN_Dimension_Reduction/....> KNN和降维
23 | 				Linear/.....................> 线性模型
24 | 				Model_Selection/............> 模型选择
25 | 				Perceptron_Neural_Network/..> 感知机和神经网络
26 | 				PreProcessing/..............> 数据预处理
27 | 				Semi_Supervised_Learning....> 半监督学习
28 | 				SVM/........................> 支持向量机
29 | 				Kaggle/.....................> Kaggle 实战
30 | ```
31 | 
32 | ## 2. 使用 sphinx 
33 | 
34 | 
35 | 使用 `sphinx`自动生成文档主要利用了 `sphix`的 `autodoc` 功能。这里的 `conf.py` 已经配置好。生成文档需要两步：
36 | 
37 | 1. 进入命令行后，切换到 `book/`文件夹下
38 | 2. 在命令行中输入命令：
39 | 
40 | 	```
41 | 	sphinx-apidoc -o docs/source chapters
42 | 	```
43 | 	该命令将会从 `chapters`目录下的`.py`文件中的抽取注释生成`.rst`文档（这些文档将被存放在 `docs/source/`目录下）
44 | 
45 | 3. 在命令行中输入命令：
46 | 
47 | 	```
48 | 	cd docs
49 | 	make html
50 | 	```
51 | 	其中第一行命令是进入`docs/`目录。第二行命令是根据`.rst`文档生成 `html`文档（这些`html`文档位于`docs/build/html/`目录下
52 | 
53 | ## 3. 修改主题
54 | 
55 | 你可以修改生成的`HTML`文件的样式，这是通过修改`sphinx`的主题来实现的。
56 | 
57 | 修改 `conf.py`的 `html_theme = 'classic'` 就能实现修改主题。这里我采用经典主题`'classic'`。内建的主题有：
58 | 
59 | ```
60 | 'alabaster'、'sphinx_rtd_theme'、'classic'、'sphinxdoc'、'scrolls'、'agogo'、
61 | 'traditional'、 'nature'、 'haiku'、'pyramid bizstyle'
62 | 
63 | ```
64 | 
65 | ## 4. 源码注释
66 | 
67 | 源码注释的格式为：
68 | 
69 | ```
70 | def func(a,b):
71 |     '''
72 | 	函数的描述
73 |     
74 |     :param a:  参数 a 的描述
75 |     :param b: 参数 b 的描述 
76 |     :return:  返回值的描述
77 |     '''
78 |     pass
79 | ```
80 | 
81 | 这里要注意空行的空格的存在。如果没有这些空格和空行，则 `sphinx`可能会误判这些注释的意义。
82 | 


--------------------------------------------------------------------------------
/chapters/Bayesian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__init__.py


--------------------------------------------------------------------------------
/chapters/Bayesian/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Bayesian/__pycache__/bayesian.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/bayesian.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Bayesian/__pycache__/bernoulliNB.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/bernoulliNB.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Bayesian/__pycache__/gaussianNB.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/gaussianNB.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Bayesian/__pycache__/multinomialNB.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/multinomialNB.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Bayesian/bayesian.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     贝叶斯分类器和贝叶斯网络
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     贝叶斯分类器
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn import datasets,cross_validation,naive_bayes
13 | import  matplotlib.pyplot as plt
14 | from .gaussianNB import test_GaussianNB
15 | from .multinomialNB import test_MultinomialNB,test_MultinomialNB_alpha
16 | from .bernoulliNB import test_BernoulliNB,test_BernoulliNB_alpha,test_BernoulliNB_binarize
17 | def load_data():
18 |     '''
19 |     加载用于分类问题的数据集。这里使用 scikit-learn 自带的 digits 数据集
20 | 
21 |     :return: 一个元组，用于分类问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记
22 |     '''
23 |     digits=datasets.load_digits() # 加载 scikit-learn 自带的 digits 数据集
24 |     return cross_validation.train_test_split(digits.data,digits.target,
25 | 		test_size=0.25,random_state=0,stratify=digits.target) #分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
26 | def show_digits():
27 |     '''
28 |     绘制 digits 数据集。这里只是绘制数据集中前 25 个样本的图片。
29 | 
30 |     :return: None
31 |     '''
32 |     digits=datasets.load_digits()
33 |     fig=plt.figure()
34 |     print("vector from images 0:",digits.data[0])
35 |     for i in range(25):
36 |         ax=fig.add_subplot(5,5,i+1)
37 |         ax.imshow(digits.images[i],cmap=plt.cm.gray_r, interpolation='nearest')
38 |     plt.show()
39 | 
40 | if __name__=='__main__':
41 |     X_train,X_test,y_train,y_test=load_data() # 产生用于分类问题的数据集
42 |     test_GaussianNB(X_train,X_test,y_train,y_test) # 调用 test_GaussianNB
43 |     test_MultinomialNB(X_train,X_test,y_train,y_test) # 调用 test_MultinomialNB
44 |     test_MultinomialNB_alpha(X_train,X_test,y_train,y_test) # 调用 test_MultinomialNB_alpha
45 |     test_BernoulliNB(X_train,X_test,y_train,y_test) # 调用 test_BernoulliNB
46 |     test_BernoulliNB_alpha(X_train,X_test,y_train,y_test) # 调用 test_BernoulliNB_alpha
47 |     test_BernoulliNB_binarize(X_train,X_test,y_train,y_test) # 调用 test_BernoulliNB_binarize
48 | 


--------------------------------------------------------------------------------
/chapters/Bayesian/bernoulliNB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     贝叶斯分类器和贝叶斯网络
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     BernoulliNB
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn import naive_bayes
13 | import  numpy as np
14 | import  matplotlib.pyplot as plt
15 | 
16 | def test_BernoulliNB(*data):
17 |     '''
18 |     测试 BernoulliNB 的用法
19 | 
20 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
21 |     :return: None
22 |     '''
23 |     X_train,X_test,y_train,y_test=data
24 |     cls=naive_bayes.BernoulliNB()
25 |     cls.fit(X_train,y_train)
26 |     print('Training Score: %.2f' % cls.score(X_train,y_train))
27 |     print('Testing Score: %.2f' % cls.score(X_test, y_test))
28 | def test_BernoulliNB_alpha(*data):
29 |     '''
30 |     测试 BernoulliNB 的预测性能随 alpha 参数的影响
31 | 
32 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
33 |     :return: None
34 |     '''
35 |     X_train,X_test,y_train,y_test=data
36 |     alphas=np.logspace(-2,5,num=200)
37 |     train_scores=[]
38 |     test_scores=[]
39 |     for alpha in alphas:
40 |         cls=naive_bayes.BernoulliNB(alpha=alpha)
41 |         cls.fit(X_train,y_train)
42 |         train_scores.append(cls.score(X_train,y_train))
43 |         test_scores.append(cls.score(X_test, y_test))
44 | 
45 |     ## 绘图
46 |     fig=plt.figure()
47 |     ax=fig.add_subplot(1,1,1)
48 |     ax.plot(alphas,train_scores,label="Training Score")
49 |     ax.plot(alphas,test_scores,label="Testing Score")
50 |     ax.set_xlabel(r"$\alpha$")
51 |     ax.set_ylabel("score")
52 |     ax.set_ylim(0,1.0)
53 |     ax.set_title("BernoulliNB")
54 |     ax.set_xscale("log")
55 |     ax.legend(loc="best")
56 |     plt.show()
57 | def test_BernoulliNB_binarize(*data):
58 |     '''
59 |     测试 BernoulliNB 的预测性能随 binarize 参数的影响
60 | 
61 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
62 |     :return: None
63 |     '''
64 |     X_train,X_test,y_train,y_test=data
65 |     min_x=min(np.min(X_train.ravel()),np.min(X_test.ravel()))-0.1
66 |     max_x=max(np.max(X_train.ravel()),np.max(X_test.ravel()))+0.1
67 |     binarizes=np.linspace(min_x,max_x,endpoint=True,num=100)
68 |     train_scores=[]
69 |     test_scores=[]
70 |     for binarize in binarizes:
71 |         cls=naive_bayes.BernoulliNB(binarize=binarize)
72 |         cls.fit(X_train,y_train)
73 |         train_scores.append(cls.score(X_train,y_train))
74 |         test_scores.append(cls.score(X_test, y_test))
75 | 
76 |     ## 绘图
77 |     fig=plt.figure()
78 |     ax=fig.add_subplot(1,1,1)
79 |     ax.plot(binarizes,train_scores,label="Training Score")
80 |     ax.plot(binarizes,test_scores,label="Testing Score")
81 |     ax.set_xlabel("binarize")
82 |     ax.set_ylabel("score")
83 |     ax.set_ylim(0,1.0)
84 |     ax.set_xlim(min_x-1,max_x+1)
85 |     ax.set_title("BernoulliNB")
86 |     ax.legend(loc="best")
87 |     plt.show()
88 | 


--------------------------------------------------------------------------------
/chapters/Bayesian/gaussianNB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     贝叶斯分类器和贝叶斯网络
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     GaussianNB
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn import naive_bayes
12 | 
13 | def test_GaussianNB(*data):
14 |     '''
15 |     测试 GaussianNB 的用法
16 | 
17 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
18 |     :return: None
19 |     '''
20 |     X_train,X_test,y_train,y_test=data
21 |     cls=naive_bayes.GaussianNB()
22 |     cls.fit(X_train,y_train)
23 |     print('Training Score: %.2f' % cls.score(X_train,y_train))
24 |     print('Testing Score: %.2f' % cls.score(X_test, y_test))


--------------------------------------------------------------------------------
/chapters/Bayesian/multinomialNB.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     贝叶斯分类器和贝叶斯网络
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     MultinomialNB
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn import naive_bayes
13 | import  numpy as np
14 | import  matplotlib.pyplot as plt
15 | 
16 | def test_MultinomialNB(*data):
17 |     '''
18 |     测试 MultinomialNB 的用法
19 | 
20 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
21 |     :return: None
22 |     '''
23 |     X_train,X_test,y_train,y_test=data
24 |     cls=naive_bayes.MultinomialNB()
25 |     cls.fit(X_train,y_train)
26 |     print('Training Score: %.2f' % cls.score(X_train,y_train))
27 |     print('Testing Score: %.2f' % cls.score(X_test, y_test))
28 | def test_MultinomialNB_alpha(*data):
29 |     '''
30 |     测试 MultinomialNB 的预测性能随 alpha 参数的影响
31 | 
32 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
33 |     :return: None
34 |     '''
35 |     X_train,X_test,y_train,y_test=data
36 |     alphas=np.logspace(-2,5,num=200)
37 |     train_scores=[]
38 |     test_scores=[]
39 |     for alpha in alphas:
40 |         cls=naive_bayes.MultinomialNB(alpha=alpha)
41 |         cls.fit(X_train,y_train)
42 |         train_scores.append(cls.score(X_train,y_train))
43 |         test_scores.append(cls.score(X_test, y_test))
44 | 
45 |     ## 绘图
46 |     fig=plt.figure()
47 |     ax=fig.add_subplot(1,1,1)
48 |     ax.plot(alphas,train_scores,label="Training Score")
49 |     ax.plot(alphas,test_scores,label="Testing Score")
50 |     ax.set_xlabel(r"$\alpha$")
51 |     ax.set_ylabel("score")
52 |     ax.set_ylim(0,1.0)
53 |     ax.set_title("MultinomialNB")
54 |     ax.set_xscale("log")
55 |     plt.show()
56 | 


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__init__.py


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__pycache__/agglomerative_clustering.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/agglomerative_clustering.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__pycache__/cluster.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/cluster.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__pycache__/dbscan.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/dbscan.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__pycache__/gmm.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/gmm.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Cluster_EM/__pycache__/kmeans.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/kmeans.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Cluster_EM/agglomerative_clustering.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     聚类和EM算法
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     AgglomerativeClustering
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn import  cluster
12 | from sklearn.metrics import adjusted_rand_score
13 | import matplotlib.pyplot as plt
14 | 
15 | def test_AgglomerativeClustering(*data):
16 |     '''
17 |     测试 AgglomerativeClustering 的用法
18 | 
19 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
20 |     :return: None
21 |     '''
22 |     X,labels_true=data
23 |     clst=cluster.AgglomerativeClustering()
24 |     predicted_labels=clst.fit_predict(X)
25 |     print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))
26 | def test_AgglomerativeClustering_nclusters(*data):
27 |     '''
28 |     测试 AgglomerativeClustering 的聚类结果随 n_clusters 参数的影响
29 | 
30 |     :param data:  可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
31 |     :return: None
32 |     '''
33 |     X,labels_true=data
34 |     nums=range(1,50)
35 |     ARIs=[]
36 |     for num in nums:
37 |         clst=cluster.AgglomerativeClustering(n_clusters=num)
38 |         predicted_labels=clst.fit_predict(X)
39 |         ARIs.append(adjusted_rand_score(labels_true,predicted_labels))
40 | 
41 |     ## 绘图
42 |     fig=plt.figure()
43 |     ax=fig.add_subplot(1,1,1)
44 |     ax.plot(nums,ARIs,marker="+")
45 |     ax.set_xlabel("n_clusters")
46 |     ax.set_ylabel("ARI")
47 |     fig.suptitle("AgglomerativeClustering")
48 |     plt.show()
49 | def test_AgglomerativeClustering_linkage(*data):
50 |     '''
51 |     测试 AgglomerativeClustering 的聚类结果随链接方式的影响
52 | 
53 |     :param data:  可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
54 |     :return: None
55 |     '''
56 |     X,labels_true=data
57 |     nums=range(1,50)
58 |     fig=plt.figure()
59 |     ax=fig.add_subplot(1,1,1)
60 | 
61 |     linkages=['ward','complete','average']
62 |     markers="+o*"
63 |     for i, linkage in enumerate(linkages):
64 |         ARIs=[]
65 |         for num in nums:
66 |             clst=cluster.AgglomerativeClustering(n_clusters=num,linkage=linkage)
67 |             predicted_labels=clst.fit_predict(X)
68 |             ARIs.append(adjusted_rand_score(labels_true,predicted_labels))
69 |         ax.plot(nums,ARIs,marker=markers[i],label="linkage:%s"%linkage)
70 | 
71 |     ax.set_xlabel("n_clusters")
72 |     ax.set_ylabel("ARI")
73 |     ax.legend(loc="best")
74 |     fig.suptitle("AgglomerativeClustering")
75 |     plt.show()


--------------------------------------------------------------------------------
/chapters/Cluster_EM/cluster.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     聚类和EM算法
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     聚类
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | import numpy as np
13 | import matplotlib.pyplot as plt
14 | from sklearn.datasets.samples_generator import make_blobs
15 | # from  .agglomerative_clustering import test_AgglomerativeClustering,test_AgglomerativeClustering_nclusters,test_AgglomerativeClustering_linkage
16 | # from .dbscan import test_DBSCAN,test_DBSCAN_epsilon,test_DBSCAN_min_samples
17 | from chapters.Cluster_EM.gmm import test_GMM,test_GMM_cov_type,test_GMM_n_components
18 | # from .kmeans import test_Kmeans,test_Kmeans_n_init,test_Kmeans_nclusters
19 | 
20 | def create_data(centers,num=100,std=0.7):
21 |     '''
22 |     生成用于聚类的数据集
23 | 
24 |     :param centers: 聚类的中心点组成的数组。如果中心点是二维的，则产生的每个样本都是二维的。
25 |     :param num: 样本数
26 |     :param std: 每个簇中样本的标准差
27 |     :return: 用于聚类的数据集。是一个元组，第一个元素为样本集，第二个元素为样本集的真实簇分类标记
28 |     '''
29 |     X, labels_true = make_blobs(n_samples=num, centers=centers, cluster_std=std)
30 |     return  X,labels_true
31 | def plot_data(*data):
32 |     '''
33 |     绘制用于聚类的数据集
34 | 
35 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
36 |     :return: None
37 |     '''
38 |     X,labels_true=data
39 |     labels=np.unique(labels_true)
40 |     fig=plt.figure()
41 |     ax=fig.add_subplot(1,1,1)
42 |     colors='rgbyckm' # 每个簇的样本标记不同的颜色
43 |     for i,label in enumerate(labels):
44 |         position=labels_true==label
45 |         ax.scatter(X[position,0],X[position,1],label="cluster %d"%label,
46 | 		color=colors[i%len(colors)])
47 | 
48 |     ax.legend(loc="best",framealpha=0.5)
49 |     ax.set_xlabel("X[0]")
50 |     ax.set_ylabel("Y[1]")
51 |     ax.set_title("data")
52 |     plt.show()
53 | 
54 | if __name__=='__main__':
55 |     centers=[[1,1],[2,2],[1,2],[10,20]] # 用于产生聚类的中心点
56 |     X,labels_true=create_data(centers,1000,0.5) # 产生用于聚类的数据集
57 |     # plot_data(X,labels_true) # 绘制用于聚类的数据集
58 |     # test_Kmeans(X,labels_true) #  调用 test_Kmeans 函数
59 |     # test_Kmeans_nclusters(X,labels_true) #  调用 test_Kmeans_nclusters 函数
60 |     # test_Kmeans_n_init(X,labels_true) #  调用 test_Kmeans_n_init 函数
61 |     # test_DBSCAN(X,labels_true) #  调用 test_DBSCAN 函数
62 |     # test_DBSCAN_epsilon(X,labels_true) #  调用 test_DBSCAN_epsilon 函数
63 |     # test_DBSCAN_min_samples(X,labels_true) #  调用 test_DBSCAN_min_samples 函数
64 |     # test_AgglomerativeClustering(X,labels_true) #  调用 test_AgglomerativeClustering 函数
65 |     # test_AgglomerativeClustering_nclusters(X,labels_true) #  调用 test_AgglomerativeClustering_nclusters 函数
66 |     # test_AgglomerativeClustering_linkage(X,labels_true) #  调用 test_AgglomerativeClustering_linkage 函数
67 |     # test_GMM(X,labels_true) #  调用 test_GMM 函数
68 |     # test_GMM_n_components(X,labels_true) #  调用 test_GMM_n_components 函数
69 |     test_GMM_cov_type(X,labels_true) #  调用 test_GMM_cov_type 函数
70 | 
71 | 


--------------------------------------------------------------------------------
/chapters/Cluster_EM/dbscan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     聚类和EM算法
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     DBSCAN
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn import  cluster
12 | from sklearn.metrics import adjusted_rand_score
13 | import matplotlib.pyplot as plt
14 | 
15 | def test_DBSCAN(*data):
16 |     '''
17 |     测试 DBSCAN 的用法
18 | 
19 |     :param data:  可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
20 |     :return: None
21 |     '''
22 |     X,labels_true=data
23 |     clst=cluster.DBSCAN()
24 |     predicted_labels=clst.fit_predict(X)
25 |     print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))
26 |     print("Core sample num:%d"%len(clst.core_sample_indices_))
27 | def test_DBSCAN_epsilon(*data):
28 |     '''
29 |     测试 DBSCAN 的聚类结果随  eps 参数的影响
30 | 
31 |     :param data:  可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
32 |     :return: None
33 |     '''
34 |     X,labels_true=data
35 |     epsilons=np.logspace(-1,1.5)
36 |     ARIs=[]
37 |     Core_nums=[]
38 |     for epsilon in epsilons:
39 |         clst=cluster.DBSCAN(eps=epsilon)
40 |         predicted_labels=clst.fit_predict(X)
41 |         ARIs.append( adjusted_rand_score(labels_true,predicted_labels))
42 |         Core_nums.append(len(clst.core_sample_indices_))
43 | 
44 |     ## 绘图
45 |     fig=plt.figure()
46 |     ax=fig.add_subplot(1,2,1)
47 |     ax.plot(epsilons,ARIs,marker='+')
48 |     ax.set_xscale('log')
49 |     ax.set_xlabel(r"$\epsilon$")
50 |     ax.set_ylim(0,1)
51 |     ax.set_ylabel('ARI')
52 | 
53 |     ax=fig.add_subplot(1,2,2)
54 |     ax.plot(epsilons,Core_nums,marker='o')
55 |     ax.set_xscale('log')
56 |     ax.set_xlabel(r"$\epsilon$")
57 |     ax.set_ylabel('Core_Nums')
58 | 
59 |     fig.suptitle("DBSCAN")
60 |     plt.show()
61 | def test_DBSCAN_min_samples(*data):
62 |     '''
63 |     测试 DBSCAN 的聚类结果随  min_samples 参数的影响
64 | 
65 |     :param data:  可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
66 |     :return:  None
67 |     '''
68 |     X,labels_true=data
69 |     min_samples=range(1,100)
70 |     ARIs=[]
71 |     Core_nums=[]
72 |     for num in min_samples:
73 |         clst=cluster.DBSCAN(min_samples=num)
74 |         predicted_labels=clst.fit_predict(X)
75 |         ARIs.append( adjusted_rand_score(labels_true,predicted_labels))
76 |         Core_nums.append(len(clst.core_sample_indices_))
77 | 
78 |     ## 绘图
79 |     fig=plt.figure()
80 |     ax=fig.add_subplot(1,2,1)
81 |     ax.plot(min_samples,ARIs,marker='+')
82 |     ax.set_xlabel( "min_samples")
83 |     ax.set_ylim(0,1)
84 |     ax.set_ylabel('ARI')
85 | 
86 |     ax=fig.add_subplot(1,2,2)
87 |     ax.plot(min_samples,Core_nums,marker='o')
88 |     ax.set_xlabel( "min_samples")
89 |     ax.set_ylabel('Core_Nums')
90 | 
91 |     fig.suptitle("DBSCAN")
92 |     plt.show()
93 | 


--------------------------------------------------------------------------------
/chapters/Cluster_EM/gmm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     聚类和EM算法
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     GMM
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn import mixture
12 | from sklearn.metrics import adjusted_rand_score
13 | import matplotlib.pyplot as plt
14 | 
15 | def test_GMM(*data):
16 |     '''
17 |     测试 GMM 的用法
18 | 
19 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
20 |     :return: None
21 |     '''
22 |     X,labels_true=data
23 |     clst=mixture.GaussianMixture()
24 |     clst.fit(X)
25 |     predicted_labels=clst.predict(X)
26 |     print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))
27 | def test_GMM_n_components(*data):
28 |     '''
29 |     测试 GMM 的聚类结果随 n_components 参数的影响
30 | 
31 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
32 |     :return: None
33 |     '''
34 |     X,labels_true=data
35 |     nums=range(1,50)
36 |     ARIs=[]
37 |     for num in nums:
38 |         clst=mixture.GaussianMixture(n_components=num)
39 |         clst.fit(X)
40 |         predicted_labels=clst.predict(X)
41 |         ARIs.append(adjusted_rand_score(labels_true,predicted_labels))
42 | 
43 |     ## 绘图
44 |     fig=plt.figure()
45 |     ax=fig.add_subplot(1,1,1)
46 |     ax.plot(nums,ARIs,marker="+")
47 |     ax.set_xlabel("n_components")
48 |     ax.set_ylabel("ARI")
49 |     fig.suptitle("GMM")
50 |     plt.show()
51 | def test_GMM_cov_type(*data):
52 |     '''
53 |     测试 GMM 的聚类结果随协方差类型的影响
54 | 
55 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
56 |     :return: None
57 |     '''
58 |     X,labels_true=data
59 |     nums=range(1,50)
60 | 
61 |     cov_types=['spherical','tied','diag','full']
62 |     markers="+o*s"
63 |     fig=plt.figure()
64 |     ax=fig.add_subplot(1,1,1)
65 | 
66 |     for i ,cov_type in enumerate(cov_types):
67 |         ARIs=[]
68 |         for num in nums:
69 |             clst=mixture.GaussianMixture(n_components=num,covariance_type=cov_type)
70 |             clst.fit(X)
71 |             predicted_labels=clst.predict(X)
72 |             ARIs.append(adjusted_rand_score(labels_true,predicted_labels))
73 |         ax.plot(nums,ARIs,marker=markers[i],label="covariance_type:%s"%cov_type)
74 | 
75 |     ax.set_xlabel("n_components")
76 |     ax.legend(loc="best")
77 |     ax.set_ylabel("ARI")
78 |     fig.suptitle("GMM")
79 |     plt.show()
80 | 


--------------------------------------------------------------------------------
/chapters/Cluster_EM/kmeans.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     聚类和EM算法
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     KMeans
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | from sklearn import  cluster
 12 | from sklearn.metrics import adjusted_rand_score
 13 | import matplotlib.pyplot as plt
 14 | 
 15 | def test_Kmeans(*data):
 16 |     '''
 17 |     测试 KMeans 的用法
 18 | 
 19 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
 20 |     :return: None
 21 |     '''
 22 |     X,labels_true=data
 23 |     clst=cluster.KMeans()
 24 |     clst.fit(X)
 25 |     predicted_labels=clst.predict(X)
 26 |     print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels))
 27 |     print("Sum center distance %s"%clst.inertia_)
 28 | def test_Kmeans_nclusters(*data):
 29 |     '''
 30 |     测试 KMeans 的聚类结果随 n_clusters 参数的影响
 31 | 
 32 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
 33 |     :return: None
 34 |     '''
 35 |     X,labels_true=data
 36 |     nums=range(1,50)
 37 |     ARIs=[]
 38 |     Distances=[]
 39 |     for num in nums:
 40 |         clst=cluster.KMeans(n_clusters=num)
 41 |         clst.fit(X)
 42 |         predicted_labels=clst.predict(X)
 43 |         ARIs.append(adjusted_rand_score(labels_true,predicted_labels))
 44 |         Distances.append(clst.inertia_)
 45 | 
 46 |     ## 绘图
 47 |     fig=plt.figure()
 48 |     ax=fig.add_subplot(1,2,1)
 49 |     ax.plot(nums,ARIs,marker="+")
 50 |     ax.set_xlabel("n_clusters")
 51 |     ax.set_ylabel("ARI")
 52 |     ax=fig.add_subplot(1,2,2)
 53 |     ax.plot(nums,Distances,marker='o')
 54 |     ax.set_xlabel("n_clusters")
 55 |     ax.set_ylabel("inertia_")
 56 |     fig.suptitle("KMeans")
 57 |     plt.show()
 58 | def test_Kmeans_n_init(*data):
 59 |     '''
 60 |     测试 KMeans 的聚类结果随 n_init 和 init  参数的影响
 61 | 
 62 |     :param data: 可变参数。它是一个元组。元组元素依次为：第一个元素为样本集，第二个元素为样本集的真实簇分类标记
 63 |     :return: None
 64 |     '''
 65 |     X,labels_true=data
 66 |     nums=range(1,50)
 67 |     ## 绘图
 68 |     fig=plt.figure()
 69 | 
 70 |     ARIs_k=[]
 71 |     Distances_k=[]
 72 |     ARIs_r=[]
 73 |     Distances_r=[]
 74 |     for num in nums:
 75 |             clst=cluster.KMeans(n_init=num,init='k-means++')
 76 |             clst.fit(X)
 77 |             predicted_labels=clst.predict(X)
 78 |             ARIs_k.append(adjusted_rand_score(labels_true,predicted_labels))
 79 |             Distances_k.append(clst.inertia_)
 80 | 
 81 |             clst=cluster.KMeans(n_init=num,init='random')
 82 |             clst.fit(X)
 83 |             predicted_labels=clst.predict(X)
 84 |             ARIs_r.append(adjusted_rand_score(labels_true,predicted_labels))
 85 |             Distances_r.append(clst.inertia_)
 86 | 
 87 |     ax=fig.add_subplot(1,2,1)
 88 |     ax.plot(nums,ARIs_k,marker="+",label="k-means++")
 89 |     ax.plot(nums,ARIs_r,marker="+",label="random")
 90 |     ax.set_xlabel("n_init")
 91 |     ax.set_ylabel("ARI")
 92 |     ax.set_ylim(0,1)
 93 |     ax.legend(loc='best')
 94 |     ax=fig.add_subplot(1,2,2)
 95 |     ax.plot(nums,Distances_k,marker='o',label="k-means++")
 96 |     ax.plot(nums,Distances_r,marker='o',label="random")
 97 |     ax.set_xlabel("n_init")
 98 |     ax.set_ylabel("inertia_")
 99 |     ax.legend(loc='best')
100 | 
101 |     fig.suptitle("KMeans")
102 |     plt.show()
103 | 


--------------------------------------------------------------------------------
/chapters/Decision_Tree/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__init__.py


--------------------------------------------------------------------------------
/chapters/Decision_Tree/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Decision_Tree/__pycache__/decisiontree_classifier.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__pycache__/decisiontree_classifier.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Decision_Tree/__pycache__/decisiontree_regressor.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__pycache__/decisiontree_regressor.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Decision_Tree/decisiontree_classifier.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     决策树
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     DecisionTreeClassifier
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | from sklearn.tree import DecisionTreeClassifier
 13 | from sklearn import  datasets
 14 | from sklearn import cross_validation
 15 | import matplotlib.pyplot as plt
 16 | def load_data():
 17 |     '''
 18 |     加载用于分类问题的数据集。数据集采用 scikit-learn 自带的 iris 数据集
 19 | 
 20 |     :return: 一个元组，用于分类问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记
 21 |     '''
 22 |     iris=datasets.load_iris() # scikit-learn 自带的 iris 数据集
 23 |     X_train=iris.data
 24 |     y_train=iris.target
 25 |     return cross_validation.train_test_split(X_train, y_train,test_size=0.25,
 26 | 		random_state=0,stratify=y_train)# 分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 27 | def test_DecisionTreeClassifier(*data):
 28 |     '''
 29 |     测试 DecisionTreeClassifier 的用法
 30 | 
 31 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 32 |     :return:  None
 33 |     '''
 34 |     X_train,X_test,y_train,y_test=data
 35 |     clf = DecisionTreeClassifier()
 36 |     clf.fit(X_train, y_train)
 37 | 
 38 |     print("Training score:%f"%(clf.score(X_train,y_train)))
 39 |     print("Testing score:%f"%(clf.score(X_test,y_test)))
 40 | def test_DecisionTreeClassifier_criterion(*data):
 41 |     '''
 42 |     测试 DecisionTreeClassifier 的预测性能随 criterion 参数的影响
 43 | 
 44 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 45 |     :return:  None
 46 |     '''
 47 |     X_train,X_test,y_train,y_test=data
 48 |     criterions=['gini','entropy']
 49 |     for criterion in criterions:
 50 |         clf = DecisionTreeClassifier(criterion=criterion)
 51 |         clf.fit(X_train, y_train)
 52 |         print("criterion:%s"%criterion)
 53 |         print("Training score:%f"%(clf.score(X_train,y_train)))
 54 |         print("Testing score:%f"%(clf.score(X_test,y_test)))
 55 | def test_DecisionTreeClassifier_splitter(*data):
 56 |     '''
 57 |     测试 DecisionTreeClassifier 的预测性能随划分类型的影响
 58 | 
 59 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 60 |     :return:  None
 61 |     '''
 62 |     X_train,X_test,y_train,y_test=data
 63 |     splitters=['best','random']
 64 |     for splitter in splitters:
 65 |         clf = DecisionTreeClassifier(splitter=splitter)
 66 |         clf.fit(X_train, y_train)
 67 |         print("splitter:%s"%splitter)
 68 |         print("Training score:%f"%(clf.score(X_train,y_train)))
 69 |         print("Testing score:%f"%(clf.score(X_test,y_test)))
 70 | def test_DecisionTreeClassifier_depth(*data,maxdepth):
 71 |     '''
 72 |     测试 DecisionTreeClassifier 的预测性能随 max_depth 参数的影响
 73 | 
 74 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 75 |     :param maxdepth: 一个整数，用于 DecisionTreeClassifier 的 max_depth 参数
 76 |     :return:  None
 77 |     '''
 78 |     X_train,X_test,y_train,y_test=data
 79 |     depths=np.arange(1,maxdepth)
 80 |     training_scores=[]
 81 |     testing_scores=[]
 82 |     for depth in depths:
 83 |         clf = DecisionTreeClassifier(max_depth=depth)
 84 |         clf.fit(X_train, y_train)
 85 |         training_scores.append(clf.score(X_train,y_train))
 86 |         testing_scores.append(clf.score(X_test,y_test))
 87 | 
 88 |     ## 绘图
 89 |     fig=plt.figure()
 90 |     ax=fig.add_subplot(1,1,1)
 91 |     ax.plot(depths,training_scores,label="traing score",marker='o')
 92 |     ax.plot(depths,testing_scores,label="testing score",marker='*')
 93 |     ax.set_xlabel("maxdepth")
 94 |     ax.set_ylabel("score")
 95 |     ax.set_title("Decision Tree Classification")
 96 |     ax.legend(framealpha=0.5,loc='best')
 97 |     plt.show()
 98 | if __name__=='__main__':
 99 |     X_train,X_test,y_train,y_test=load_data() # 产生用于分类问题的数据集
100 |     test_DecisionTreeClassifier(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeClassifier
101 |     # test_DecisionTreeClassifier_criterion(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeClassifier_criterion
102 |     # test_DecisionTreeClassifier_splitter(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeClassifier_splitter
103 |     # test_DecisionTreeClassifier_depth(X_train,X_test,y_train,y_test,maxdepth=100) # 调用 test_DecisionTreeClassifier_depth
104 | 


--------------------------------------------------------------------------------
/chapters/Decision_Tree/decisiontree_regressor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     决策树
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     DecisionTreeRegressor
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | from sklearn.tree import DecisionTreeRegressor
 13 | from sklearn import cross_validation
 14 | import matplotlib.pyplot as plt
 15 | def creat_data(n):
 16 |     '''
 17 |     产生用于回归问题的数据集
 18 | 
 19 |     :param n:  数据集容量
 20 |     :return: 返回一个元组，元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
 21 |     '''
 22 |     np.random.seed(0)
 23 |     X = 5 * np.random.rand(n, 1)
 24 |     y = np.sin(X).ravel()
 25 |     noise_num=(int)(n/5)
 26 |     y[::5] += 3 * (0.5 - np.random.rand(noise_num)) # 每第5个样本，就在该样本的值上添加噪音
 27 |     return cross_validation.train_test_split(X, y,
 28 | 		test_size=0.25,random_state=1) # 拆分原始数据集为训练集和测试集，其中测试集大小为元素数据集大小的 1/4
 29 | def test_DecisionTreeRegressor(*data):
 30 |     '''
 31 |     测试 DecisionTreeRegressor 的用法
 32 | 
 33 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 34 |     :return: None
 35 |     '''
 36 |     X_train,X_test,y_train,y_test=data
 37 |     regr = DecisionTreeRegressor()
 38 |     regr.fit(X_train, y_train)
 39 |     print("Training score:%f"%(regr.score(X_train,y_train)))
 40 |     print("Testing score:%f"%(regr.score(X_test,y_test)))
 41 |     ##绘图
 42 |     fig=plt.figure()
 43 |     ax=fig.add_subplot(1,1,1)
 44 |     X = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
 45 |     Y = regr.predict(X)
 46 |     ax.scatter(X_train, y_train, label="train sample",c='g')
 47 |     ax.scatter(X_test, y_test, label="test sample",c='r')
 48 |     ax.plot(X, Y, label="predict_value", linewidth=2,alpha=0.5)
 49 |     ax.set_xlabel("data")
 50 |     ax.set_ylabel("target")
 51 |     ax.set_title("Decision Tree Regression")
 52 |     ax.legend(framealpha=0.5)
 53 |     plt.show()
 54 | def test_DecisionTreeRegressor_splitter(*data):
 55 |     '''
 56 |     测试 DecisionTreeRegressor 预测性能随划分类型的影响
 57 | 
 58 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 59 |     :return: None
 60 |     '''
 61 |     X_train,X_test,y_train,y_test=data
 62 |     splitters=['best','random']
 63 |     for splitter in splitters:
 64 |         regr = DecisionTreeRegressor(splitter=splitter)
 65 |         regr.fit(X_train, y_train)
 66 |         print("Splitter %s"%splitter)
 67 |         print("Training score:%f"%(regr.score(X_train,y_train)))
 68 |         print("Testing score:%f"%(regr.score(X_test,y_test)))
 69 | def test_DecisionTreeRegressor_depth(*data,maxdepth):
 70 |     '''
 71 |     测试 DecisionTreeRegressor 预测性能随  max_depth 的影响
 72 | 
 73 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 74 |     :param maxdepth: 一个整数，它作为 DecisionTreeRegressor 的 max_depth 参数
 75 |     :return: None
 76 |     '''
 77 |     X_train,X_test,y_train,y_test=data
 78 |     depths=np.arange(1,maxdepth)
 79 |     training_scores=[]
 80 |     testing_scores=[]
 81 |     for depth in depths:
 82 |         regr = DecisionTreeRegressor(max_depth=depth)
 83 |         regr.fit(X_train, y_train)
 84 |         training_scores.append(regr.score(X_train,y_train))
 85 |         testing_scores.append(regr.score(X_test,y_test))
 86 | 
 87 |     ## 绘图
 88 |     fig=plt.figure()
 89 |     ax=fig.add_subplot(1,1,1)
 90 |     ax.plot(depths,training_scores,label="traing score")
 91 |     ax.plot(depths,testing_scores,label="testing score")
 92 |     ax.set_xlabel("maxdepth")
 93 |     ax.set_ylabel("score")
 94 |     ax.set_title("Decision Tree Regression")
 95 |     ax.legend(framealpha=0.5)
 96 |     plt.show()
 97 | if __name__=='__main__':
 98 |     X_train,X_test,y_train,y_test=creat_data(100) # 产生用于回归问题的数据集
 99 |     test_DecisionTreeRegressor(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeRegressor
100 |     # test_DecisionTreeRegressor_splitter(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeRegressor_splitter
101 |     # test_DecisionTreeRegressor_depth(X_train,X_test,y_train,y_test,maxdepth=20) # 调用 test_DecisionTreeRegressor_depth


--------------------------------------------------------------------------------
/chapters/Ensemble/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__init__.py


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/adaboost_classifier.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/adaboost_classifier.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/adaboost_regressor.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/adaboost_regressor.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/gradientboosting_classifier.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/gradientboosting_classifier.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/gradientboosting_regressor.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/gradientboosting_regressor.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/randomforest_classifier.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/randomforest_classifier.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/__pycache__/randomforest_regressor.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/randomforest_regressor.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Ensemble/adaboost_regressor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     集成学习
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     AdaBoostRegressor
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import numpy as np
 14 | from sklearn import datasets,cross_validation,ensemble
 15 | 
 16 | def load_data_regression():
 17 |     '''
 18 |     加载用于回归问题的数据集
 19 | 
 20 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
 21 |     '''
 22 |     diabetes = datasets.load_diabetes() #使用 scikit-learn 自带的一个糖尿病病人的数据集
 23 |     return cross_validation.train_test_split(diabetes.data,diabetes.target,
 24 |     test_size=0.25,random_state=0) # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 25 | 
 26 | def test_AdaBoostRegressor(*data):
 27 |     '''
 28 |     测试 AdaBoostRegressor 的用法，绘制 AdaBoostRegressor 的预测性能随基础回归器数量的影响
 29 | 
 30 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 31 |     :return:  None
 32 |     '''
 33 |     X_train,X_test,y_train,y_test=data
 34 |     regr=ensemble.AdaBoostRegressor()
 35 |     regr.fit(X_train,y_train)
 36 |     ## 绘图
 37 |     fig=plt.figure()
 38 |     ax=fig.add_subplot(1,1,1)
 39 |     estimators_num=len(regr.estimators_)
 40 |     X=range(1,estimators_num+1)
 41 |     ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score")
 42 |     ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score")
 43 |     ax.set_xlabel("estimator num")
 44 |     ax.set_ylabel("score")
 45 |     ax.legend(loc="best")
 46 |     ax.set_title("AdaBoostRegressor")
 47 |     plt.show()
 48 | def test_AdaBoostRegressor_base_regr(*data):
 49 |     '''
 50 |     测试 AdaBoostRegressor 的预测性能随基础回归器数量的和基础回归器类型的影响
 51 | 
 52 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 53 |     :return: None
 54 |     '''
 55 |     from sklearn.svm import  LinearSVR
 56 |     X_train,X_test,y_train,y_test=data
 57 |     fig=plt.figure()
 58 |     regrs=[ensemble.AdaBoostRegressor(), # 基础回归器为默认类型
 59 | 		ensemble.AdaBoostRegressor(base_estimator=LinearSVR(epsilon=0.01,C=100))] # 基础回归器为 LinearSVR
 60 |     labels=["Decision Tree Regressor","Linear SVM Regressor"]
 61 |     for i ,regr in enumerate(regrs):
 62 |         ax=fig.add_subplot(2,1,i+1)
 63 |         regr.fit(X_train,y_train)
 64 |         ## 绘图
 65 |         estimators_num=len(regr.estimators_)
 66 |         X=range(1,estimators_num+1)
 67 |         ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score")
 68 |         ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score")
 69 |         ax.set_xlabel("estimator num")
 70 |         ax.set_ylabel("score")
 71 |         ax.legend(loc="lower right")
 72 |         ax.set_ylim(-1,1)
 73 |         ax.set_title("Base_Estimator:%s"%labels[i])
 74 |     plt.suptitle("AdaBoostRegressor")
 75 |     plt.show()
 76 | def test_AdaBoostRegressor_learning_rate(*data):
 77 |     '''
 78 |     测试 AdaBoostRegressor 的预测性能随学习率的影响
 79 | 
 80 |     :param data:   可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 81 |     :return: None
 82 |     '''
 83 |     X_train,X_test,y_train,y_test=data
 84 |     learning_rates=np.linspace(0.01,1)
 85 |     fig=plt.figure()
 86 |     ax=fig.add_subplot(1,1,1)
 87 |     traing_scores=[]
 88 |     testing_scores=[]
 89 |     for learning_rate in learning_rates:
 90 |         regr=ensemble.AdaBoostRegressor(learning_rate=learning_rate,n_estimators=500)
 91 |         regr.fit(X_train,y_train)
 92 |         traing_scores.append(regr.score(X_train,y_train))
 93 |         testing_scores.append(regr.score(X_test,y_test))
 94 |     ax.plot(learning_rates,traing_scores,label="Traing score")
 95 |     ax.plot(learning_rates,testing_scores,label="Testing score")
 96 |     ax.set_xlabel("learning rate")
 97 |     ax.set_ylabel("score")
 98 |     ax.legend(loc="best")
 99 |     ax.set_title("AdaBoostRegressor")
100 |     plt.show()
101 | def test_AdaBoostRegressor_loss(*data):
102 |     '''
103 |     测试 AdaBoostRegressor 的预测性能随损失函数类型的影响
104 | 
105 |     :param data:    可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
106 |     :return: None
107 |     '''
108 |     X_train,X_test,y_train,y_test=data
109 |     losses=['linear','square','exponential']
110 |     fig=plt.figure()
111 |     ax=fig.add_subplot(1,1,1)
112 |     for i ,loss in enumerate(losses):
113 |         regr=ensemble.AdaBoostRegressor(loss=loss,n_estimators=30)
114 |         regr.fit(X_train,y_train)
115 |         ## 绘图
116 |         estimators_num=len(regr.estimators_)
117 |         X=range(1,estimators_num+1)
118 |         ax.plot(list(X),list(regr.staged_score(X_train,y_train)),
119 | 			label="Traing score:loss=%s"%loss)
120 |         ax.plot(list(X),list(regr.staged_score(X_test,y_test)),
121 | 			label="Testing score:loss=%s"%loss)
122 |         ax.set_xlabel("estimator num")
123 |         ax.set_ylabel("score")
124 |         ax.legend(loc="lower right")
125 |         ax.set_ylim(-1,1)
126 |     plt.suptitle("AdaBoostRegressor")
127 |     plt.show()
128 | 
129 | if __name__=='__main__':
130 |     X_train,X_test,y_train,y_test=load_data_regression()# 获取回归数据
131 |     test_AdaBoostRegressor(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor
132 |     # test_AdaBoostRegressor_base_regr(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor_base_regr
133 |     # test_AdaBoostRegressor_learning_rate(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor_learning_rate
134 |     # test_AdaBoostRegressor_loss(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor_loss


--------------------------------------------------------------------------------
/chapters/Ensemble/randomforest_classifier.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     集成学习
  3 |     ~~~~~~~~~~~~~~~~
  4 | 
  5 |     RandomForestClassifier
  6 | 
  7 |     :copyright: (c) 2016 by the huaxz1986.
  8 |     :license: lgpl-3.0, see LICENSE for more details.
  9 | """
 10 | 
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | from sklearn import datasets,cross_validation,ensemble
 14 | def load_data_classification():
 15 |     '''
 16 |     加载用于分类问题的数据集
 17 | 
 18 |     :return: 一个元组，用于分类问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记
 19 |     '''
 20 |     digits=datasets.load_digits() # 使用 scikit-learn 自带的 digits 数据集
 21 |     return cross_validation.train_test_split(digits.data,digits.target,
 22 |     test_size=0.25,random_state=0,stratify=digits.target) # 分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 23 | def test_RandomForestClassifier(*data):
 24 |     '''
 25 |     测试 RandomForestClassifier 的用法
 26 | 
 27 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 28 |     :return: None
 29 |     '''
 30 |     X_train,X_test,y_train,y_test=data
 31 |     clf=ensemble.RandomForestClassifier()
 32 |     clf.fit(X_train,y_train)
 33 |     print("Traing Score:%f"%clf.score(X_train,y_train))
 34 |     print("Testing Score:%f"%clf.score(X_test,y_test))
 35 | def test_RandomForestClassifier_num(*data):
 36 |     '''
 37 |     测试 RandomForestClassifier 的预测性能随 n_estimators 参数的影响
 38 | 
 39 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 40 |     :return: None
 41 |     '''
 42 |     X_train,X_test,y_train,y_test=data
 43 |     nums=np.arange(1,100,step=2)
 44 |     fig=plt.figure()
 45 |     ax=fig.add_subplot(1,1,1)
 46 |     testing_scores=[]
 47 |     training_scores=[]
 48 |     for num in nums:
 49 |         clf=ensemble.RandomForestClassifier(n_estimators=num)
 50 |         clf.fit(X_train,y_train)
 51 |         training_scores.append(clf.score(X_train,y_train))
 52 |         testing_scores.append(clf.score(X_test,y_test))
 53 |     ax.plot(nums,training_scores,label="Training Score")
 54 |     ax.plot(nums,testing_scores,label="Testing Score")
 55 |     ax.set_xlabel("estimator num")
 56 |     ax.set_ylabel("score")
 57 |     ax.legend(loc="lower right")
 58 |     ax.set_ylim(0,1.05)
 59 |     plt.suptitle("RandomForestClassifier")
 60 |     plt.show()
 61 | def test_RandomForestClassifier_max_depth(*data):
 62 |     '''
 63 |     测试 RandomForestClassifier 的预测性能随 max_depth 参数的影响
 64 | 
 65 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 66 |     :return:  None
 67 |     '''
 68 |     X_train,X_test,y_train,y_test=data
 69 |     maxdepths=range(1,20)
 70 |     fig=plt.figure()
 71 |     ax=fig.add_subplot(1,1,1)
 72 |     testing_scores=[]
 73 |     training_scores=[]
 74 |     for max_depth in maxdepths:
 75 |         clf=ensemble.RandomForestClassifier(max_depth=max_depth)
 76 |         clf.fit(X_train,y_train)
 77 |         training_scores.append(clf.score(X_train,y_train))
 78 |         testing_scores.append(clf.score(X_test,y_test))
 79 |     ax.plot(maxdepths,training_scores,label="Training Score")
 80 |     ax.plot(maxdepths,testing_scores,label="Testing Score")
 81 |     ax.set_xlabel("max_depth")
 82 |     ax.set_ylabel("score")
 83 |     ax.legend(loc="lower right")
 84 |     ax.set_ylim(0,1.05)
 85 |     plt.suptitle("RandomForestClassifier")
 86 |     plt.show()
 87 | def test_RandomForestClassifier_max_features(*data):
 88 |     '''
 89 |     测试 RandomForestClassifier 的预测性能随 max_features 参数的影响
 90 | 
 91 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 92 |     :return:  None
 93 |     '''
 94 |     X_train,X_test,y_train,y_test=data
 95 |     max_features=np.linspace(0.01,1.0)
 96 |     fig=plt.figure()
 97 |     ax=fig.add_subplot(1,1,1)
 98 |     testing_scores=[]
 99 |     training_scores=[]
100 |     for max_feature in max_features:
101 |         clf=ensemble.RandomForestClassifier(max_features=max_feature)
102 |         clf.fit(X_train,y_train)
103 |         training_scores.append(clf.score(X_train,y_train))
104 |         testing_scores.append(clf.score(X_test,y_test))
105 |     ax.plot(max_features,training_scores,label="Training Score")
106 |     ax.plot(max_features,testing_scores,label="Testing Score")
107 |     ax.set_xlabel("max_feature")
108 |     ax.set_ylabel("score")
109 |     ax.legend(loc="lower right")
110 |     ax.set_ylim(0,1.05)
111 |     plt.suptitle("RandomForestClassifier")
112 |     plt.show()
113 | if __name__=='__main__':
114 |     X_train,X_test,y_train,y_test=load_data_classification()  # 获取分类数据
115 |     test_RandomForestClassifier(X_train,X_test,y_train,y_test)  # 调用 test_RandomForestClassifier
116 |     # test_RandomForestClassifier_num(X_train,X_test,y_train,y_test)  # 调用 test_RandomForestClassifier_num
117 |     # test_RandomForestClassifier_max_depth(X_train,X_test,y_train,y_test)  # 调用 test_RandomForestClassifier_max_depth
118 |     # test_RandomForestClassifier_max_features(X_train,X_test,y_train,y_test)  # 调用 test_RandomForestClassifier_max_features


--------------------------------------------------------------------------------
/chapters/Ensemble/randomforest_regressor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     集成学习
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     RandomForestRegressor
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | from sklearn import datasets,cross_validation,ensemble
 14 | def load_data_regression():
 15 |     '''
 16 |     加载用于回归问题的数据集
 17 | 
 18 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
 19 |     '''
 20 |     diabetes = datasets.load_diabetes() #使用 scikit-learn 自带的一个糖尿病病人的数据集
 21 |     return cross_validation.train_test_split(diabetes.data,diabetes.target,
 22 |     test_size=0.25,random_state=0) # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 23 | def test_RandomForestRegressor(*data):
 24 |     '''
 25 |     测试 RandomForestRegressor 的用法
 26 | 
 27 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 28 |     :return: None
 29 |     '''
 30 |     X_train,X_test,y_train,y_test=data
 31 |     regr=ensemble.RandomForestRegressor()
 32 |     regr.fit(X_train,y_train)
 33 |     print("Traing Score:%f"%regr.score(X_train,y_train))
 34 |     print("Testing Score:%f"%regr.score(X_test,y_test))
 35 | def test_RandomForestRegressor_num(*data):
 36 |     '''
 37 |     测试 RandomForestRegressor 的预测性能随  n_estimators 参数的影响
 38 | 
 39 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 40 |     :return: None
 41 |     '''
 42 |     X_train,X_test,y_train,y_test=data
 43 |     nums=np.arange(1,100,step=2)
 44 |     fig=plt.figure()
 45 |     ax=fig.add_subplot(1,1,1)
 46 |     testing_scores=[]
 47 |     training_scores=[]
 48 |     for num in nums:
 49 |         regr=ensemble.RandomForestRegressor(n_estimators=num)
 50 |         regr.fit(X_train,y_train)
 51 |         training_scores.append(regr.score(X_train,y_train))
 52 |         testing_scores.append(regr.score(X_test,y_test))
 53 |     ax.plot(nums,training_scores,label="Training Score")
 54 |     ax.plot(nums,testing_scores,label="Testing Score")
 55 |     ax.set_xlabel("estimator num")
 56 |     ax.set_ylabel("score")
 57 |     ax.legend(loc="lower right")
 58 |     ax.set_ylim(-1,1)
 59 |     plt.suptitle("RandomForestRegressor")
 60 |     plt.show()
 61 | def test_RandomForestRegressor_max_depth(*data):
 62 |     '''
 63 |     测试 RandomForestRegressor 的预测性能随  max_depth 参数的影响
 64 | 
 65 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 66 |     :return:  None
 67 |     '''
 68 |     X_train,X_test,y_train,y_test=data
 69 |     maxdepths=range(1,20)
 70 |     fig=plt.figure()
 71 |     ax=fig.add_subplot(1,1,1)
 72 |     testing_scores=[]
 73 |     training_scores=[]
 74 |     for max_depth in maxdepths:
 75 |         regr=ensemble.RandomForestRegressor(max_depth=max_depth)
 76 |         regr.fit(X_train,y_train)
 77 |         training_scores.append(regr.score(X_train,y_train))
 78 |         testing_scores.append(regr.score(X_test,y_test))
 79 |     ax.plot(maxdepths,training_scores,label="Training Score")
 80 |     ax.plot(maxdepths,testing_scores,label="Testing Score")
 81 |     ax.set_xlabel("max_depth")
 82 |     ax.set_ylabel("score")
 83 |     ax.legend(loc="lower right")
 84 |     ax.set_ylim(0,1.05)
 85 |     plt.suptitle("RandomForestRegressor")
 86 |     plt.show()
 87 | def test_RandomForestRegressor_max_features(*data):
 88 |     '''
 89 |    测试 RandomForestRegressor 的预测性能随  max_features 参数的影响
 90 | 
 91 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 92 |     :return: None
 93 |     '''
 94 |     X_train,X_test,y_train,y_test=data
 95 |     max_features=np.linspace(0.01,1.0)
 96 |     fig=plt.figure()
 97 |     ax=fig.add_subplot(1,1,1)
 98 |     testing_scores=[]
 99 |     training_scores=[]
100 |     for max_feature in max_features:
101 |         regr=ensemble.RandomForestRegressor(max_features=max_feature)
102 |         regr.fit(X_train,y_train)
103 |         training_scores.append(regr.score(X_train,y_train))
104 |         testing_scores.append(regr.score(X_test,y_test))
105 |     ax.plot(max_features,training_scores,label="Training Score")
106 |     ax.plot(max_features,testing_scores,label="Testing Score")
107 |     ax.set_xlabel("max_feature")
108 |     ax.set_ylabel("score")
109 |     ax.legend(loc="lower right")
110 |     ax.set_ylim(0,1.05)
111 |     plt.suptitle("RandomForestRegressor")
112 |     plt.show()
113 | if __name__=='__main__':
114 |     X_train,X_test,y_train,y_test=load_data_regression() # 获取回归数据
115 |     test_RandomForestRegressor(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor
116 |     # test_RandomForestRegressor_num(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor_num
117 |     # test_RandomForestRegressor_max_depth(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor_max_depth
118 |     # test_RandomForestRegressor_max_features(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor_max_features
119 | 
120 | 


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__init__.py


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/isomap.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/isomap.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_classifier.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_classifier.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_regressor.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_regressor.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/kpca.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/kpca.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/lle.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/lle.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/mds.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/mds.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/__pycache__/pca.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/pca.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/isomap.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     kNN和降维
  4 |     ~~~~~~~~~~
  5 | 
  6 |     Isomap
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | from sklearn import   datasets,manifold
 14 | 
 15 | def load_data():
 16 |     '''
 17 |     加载用于降维的数据
 18 | 
 19 |     :return: 一个元组，依次为训练样本集和样本集的标记
 20 |     '''
 21 |     iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集
 22 |     return  iris.data,iris.target
 23 | 
 24 | def test_Isomap(*data):
 25 |     '''
 26 |     测试 Isomap 的用法
 27 | 
 28 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
 29 |     :return: None
 30 |     '''
 31 |     X,y=data
 32 |     for n in [4,3,2,1]: # 依次考察降维目标为 4维、3维、2维、1维
 33 |         isomap=manifold.Isomap(n_components=n)
 34 |         isomap.fit(X)
 35 |         print('reconstruction_error(n_components=%d) : %s'%
 36 |             (n, isomap.reconstruction_error()))
 37 | def plot_Isomap_k(*data):
 38 |     '''
 39 |     测试 Isomap 中 n_neighbors 参数的影响，其中降维至 2维
 40 | 
 41 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
 42 |     :return: None
 43 |     '''
 44 |     X,y=data
 45 |     Ks=[1,5,25,y.size-1] # n_neighbors参数的候选值的集合
 46 | 
 47 |     fig=plt.figure()
 48 |     for i, k in enumerate(Ks):
 49 |         isomap=manifold.Isomap(n_components=2,n_neighbors=k)
 50 |         X_r=isomap.fit_transform(X)#原始数据集转换到二维
 51 | 
 52 |         ax=fig.add_subplot(2,2,i+1)## 两行两列，每个单元显示不同 n_neighbors 参数的 Isomap 的效果图
 53 |         colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
 54 |             (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合，不同标记的样本染不同的颜色
 55 |         for label ,color in zip( np.unique(y),colors):
 56 |             position=y==label
 57 |             ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"
 58 |             %label,color=color)
 59 | 
 60 |         ax.set_xlabel("X[0]")
 61 |         ax.set_ylabel("X[1]")
 62 |         ax.legend(loc="best")
 63 |         ax.set_title("k=%d"%k)
 64 |     plt.suptitle("Isomap")
 65 |     plt.show()
 66 | def plot_Isomap_k_d1(*data):
 67 |     '''
 68 |     测试 Isomap 中 n_neighbors 参数的影响，其中降维至 1维
 69 | 
 70 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
 71 |     :return: None
 72 |     '''
 73 |     X,y=data
 74 |     Ks=[1,5,25,y.size-1]# n_neighbors参数的候选值的集合
 75 | 
 76 |     fig=plt.figure()
 77 |     for i, k in enumerate(Ks):
 78 |         isomap=manifold.Isomap(n_components=1,n_neighbors=k)
 79 |         X_r=isomap.fit_transform(X)#原始数据集转换到 1 维
 80 | 
 81 |         ax=fig.add_subplot(2,2,i+1)## 两行两列，每个单元显示不同 n_neighbors 参数的 Isomap 的效果图
 82 |         colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
 83 |             (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合，不同标记的样本染不同的颜色
 84 |         for label ,color in zip( np.unique(y),colors):
 85 |             position=y==label
 86 |             ax.scatter(X_r[position],np.zeros_like(X_r[position]),
 87 |             label="target= %d"%label,color=color)
 88 | 
 89 |         ax.set_xlabel("X")
 90 |         ax.set_ylabel("Y")
 91 |         ax.legend(loc="best")
 92 |         ax.set_title("k=%d"%k)
 93 |     plt.suptitle("Isomap")
 94 |     plt.show()
 95 | if __name__=='__main__':
 96 |     X,y=load_data() # 产生用于降维的数据集
 97 |     test_Isomap(X,y)   # 调用 test_Isomap
 98 |     #plot_Isomap_k(X,y)   # 调用 plot_Isomap_k
 99 |     #plot_Isomap_k_d1(X,y)   # 调用 plot_Isomap_k_d1
100 | 


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/kneighbors_classifier.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     kNN和降维
  4 |     ~~~~~~~~~~
  5 | 
  6 |     KNN分类和回归模型
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | from sklearn import neighbors, datasets,cross_validation
 14 | 
 15 | def load_classification_data():
 16 |     '''
 17 |     加载分类模型使用的数据集。
 18 | 
 19 |     :return: 一个元组，依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 20 |     '''
 21 |     digits=datasets.load_digits() # 使用 scikit-learn 自带的手写识别数据集 Digit Dataset
 22 |     X_train=digits.data
 23 |     y_train=digits.target
 24 |     return cross_validation.train_test_split(X_train, y_train,test_size=0.25,
 25 |             random_state=0,stratify=y_train) # 进行分层采样拆分，测试集大小占 1/4
 26 | def test_KNeighborsClassifier(*data):
 27 |     '''
 28 |     测试 KNeighborsClassifier 的用法
 29 | 
 30 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 31 |     :return: None
 32 |     '''
 33 |     X_train,X_test,y_train,y_test=data
 34 |     clf=neighbors.KNeighborsClassifier()
 35 |     clf.fit(X_train,y_train)
 36 |     print("Training Score:%f"%clf.score(X_train,y_train))
 37 |     print("Testing Score:%f"%clf.score(X_test,y_test))
 38 | def test_KNeighborsClassifier_k_w(*data):
 39 |     '''
 40 |     测试 KNeighborsClassifier 中 n_neighbors 和 weights 参数的影响
 41 | 
 42 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 43 |     :return: None
 44 |     '''
 45 |     X_train,X_test,y_train,y_test=data
 46 |     Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
 47 |     weights=['uniform','distance']
 48 | 
 49 |     fig=plt.figure()
 50 |     ax=fig.add_subplot(1,1,1)
 51 |     ### 绘制不同 weights 下， 预测得分随 n_neighbors 的曲线
 52 |     for weight in weights:
 53 |         training_scores=[]
 54 |         testing_scores=[]
 55 |         for K in Ks:
 56 |             clf=neighbors.KNeighborsClassifier(weights=weight,n_neighbors=K)
 57 |             clf.fit(X_train,y_train)
 58 |             testing_scores.append(clf.score(X_test,y_test))
 59 |             training_scores.append(clf.score(X_train,y_train))
 60 |         ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight)
 61 |         ax.plot(Ks,training_scores,label="training score:weight=%s"%weight)
 62 |     ax.legend(loc='best')
 63 |     ax.set_xlabel("K")
 64 |     ax.set_ylabel("score")
 65 |     ax.set_ylim(0,1.05)
 66 |     ax.set_title("KNeighborsClassifier")
 67 |     plt.show()
 68 | def test_KNeighborsClassifier_k_p(*data):
 69 |     '''
 70 |     测试 KNeighborsClassifier 中 n_neighbors 和 p 参数的影响
 71 | 
 72 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 73 |     :return: None
 74 |     '''
 75 |     X_train,X_test,y_train,y_test=data
 76 |     Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int')
 77 |     Ps=[1,2,10]
 78 | 
 79 |     fig=plt.figure()
 80 |     ax=fig.add_subplot(1,1,1)
 81 |     ### 绘制不同 p 下， 预测得分随 n_neighbors 的曲线
 82 |     for P in Ps:
 83 |         training_scores=[]
 84 |         testing_scores=[]
 85 |         for K in Ks:
 86 |             clf=neighbors.KNeighborsClassifier(p=P,n_neighbors=K)
 87 |             clf.fit(X_train,y_train)
 88 |             testing_scores.append(clf.score(X_test,y_test))
 89 |             training_scores.append(clf.score(X_train,y_train))
 90 |         ax.plot(Ks,testing_scores,label="testing score:p=%d"%P)
 91 |         ax.plot(Ks,training_scores,label="training score:p=%d"%P)
 92 |     ax.legend(loc='best')
 93 |     ax.set_xlabel("K")
 94 |     ax.set_ylabel("score")
 95 |     ax.set_ylim(0,1.05)
 96 |     ax.set_title("KNeighborsClassifier")
 97 |     plt.show()
 98 | 
 99 | if __name__=='__main__':
100 |     X_train,X_test,y_train,y_test=load_classification_data() # 获取分类模型的数据集
101 |     #test_KNeighborsClassifier(X_train,X_test,y_train,y_test) # 调用 test_KNeighborsClassifier
102 |     #test_KNeighborsClassifier_k_w(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsClassifier_k_w
103 |     #test_KNeighborsClassifier_k_p(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsClassifier_k_p


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/kneighbors_regressor.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     kNN和降维
  4 |     ~~~~~~~~~~
  5 | 
  6 |     KNeighborsRegressor
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | from sklearn import neighbors, cross_validation
 14 | 
 15 | def create_regression_data(n):
 16 |     '''
 17 |     创建回归模型使用的数据集
 18 | 
 19 |     :param n: 数据集大小
 20 |     :return: 一个元组，依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 21 |     '''
 22 |     X =5 * np.random.rand(n, 1)
 23 |     y = np.sin(X).ravel()
 24 |     y[::5] += 1 * (0.5 - np.random.rand(int(n/5))) # 每隔 5 个样本就在样本的值上添加噪音
 25 |     return cross_validation.train_test_split(X, y,test_size=0.25,random_state=0)# 进行简单拆分，测试集大小占 1/4
 26 | 
 27 | def test_KNeighborsRegressor(*data):
 28 |     '''
 29 |     测试 KNeighborsRegressor 的用法
 30 | 
 31 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 32 |     :return: None
 33 |     '''
 34 |     X_train,X_test,y_train,y_test=data
 35 |     regr=neighbors.KNeighborsRegressor()
 36 |     regr.fit(X_train,y_train)
 37 |     print("Training Score:%f"%regr.score(X_train,y_train))
 38 |     print("Testing Score:%f"%regr.score(X_test,y_test))
 39 | def test_KNeighborsRegressor_k_w(*data):
 40 |     '''
 41 |     测试 KNeighborsRegressor 中 n_neighbors 和 weights 参数的影响
 42 | 
 43 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 44 |     :return: None
 45 |     '''
 46 |     X_train,X_test,y_train,y_test=data
 47 |     Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int')
 48 |     weights=['uniform','distance']
 49 | 
 50 |     fig=plt.figure()
 51 |     ax=fig.add_subplot(1,1,1)
 52 |     ### 绘制不同 weights 下， 预测得分随 n_neighbors 的曲线
 53 |     for weight in weights:
 54 |         training_scores=[]
 55 |         testing_scores=[]
 56 |         for K in Ks:
 57 |             regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K)
 58 |             regr.fit(X_train,y_train)
 59 |             testing_scores.append(regr.score(X_test,y_test))
 60 |             training_scores.append(regr.score(X_train,y_train))
 61 |         ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight)
 62 |         ax.plot(Ks,training_scores,label="training score:weight=%s"%weight)
 63 |     ax.legend(loc='best')
 64 |     ax.set_xlabel("K")
 65 |     ax.set_ylabel("score")
 66 |     ax.set_ylim(0,1.05)
 67 |     ax.set_title("KNeighborsRegressor")
 68 |     plt.show()
 69 | def test_KNeighborsRegressor_k_p(*data):
 70 |     '''
 71 |     测试 KNeighborsRegressor 中 n_neighbors 和 p 参数的影响
 72 | 
 73 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 74 |     :return: None
 75 |     '''
 76 |     X_train,X_test,y_train,y_test=data
 77 |     Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int')
 78 |     Ps=[1,2,10]
 79 | 
 80 |     fig=plt.figure()
 81 |     ax=fig.add_subplot(1,1,1)
 82 |     ### 绘制不同 p 下， 预测得分随 n_neighbors 的曲线
 83 |     for P in Ps:
 84 |         training_scores=[]
 85 |         testing_scores=[]
 86 |         for K in Ks:
 87 |             regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K)
 88 |             regr.fit(X_train,y_train)
 89 |             testing_scores.append(regr.score(X_test,y_test))
 90 |             training_scores.append(regr.score(X_train,y_train))
 91 |         ax.plot(Ks,testing_scores,label="testing score:p=%d"%P)
 92 |         ax.plot(Ks,training_scores,label="training score:p=%d"%P)
 93 |     ax.legend(loc='best')
 94 |     ax.set_xlabel("K")
 95 |     ax.set_ylabel("score")
 96 |     ax.set_ylim(0,1.05)
 97 |     ax.set_title("KNeighborsRegressor")
 98 |     plt.show()
 99 | 
100 | if __name__=='__main__':
101 |     X_train,X_test,y_train,y_test=create_regression_data(1000)# 获取回归模型的数据集
102 |     test_KNeighborsRegressor(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsRegressor
103 |     #test_KNeighborsRegressor_k_w(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsRegressor_k_w
104 |     #test_KNeighborsRegressor_k_p(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsRegressor_k_p


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/lle.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     kNN和降维
 4 |     ~~~~~~~~~~
 5 | 
 6 |     LocallyLinearEmbedding
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from sklearn import   datasets,manifold
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于降维的数据
18 | 
19 |     :return: 一个元组，依次为训练样本集和样本集的标记
20 |     '''
21 |     iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集
22 |     return  iris.data,iris.target
23 | def test_LocallyLinearEmbedding(*data):
24 |     '''
25 |     测试 LocallyLinearEmbedding 的用法
26 | 
27 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
28 |     :return: None
29 |     '''
30 |     X,y=data
31 |     for n in [4,3,2,1]:# 依次考察降维目标为 4维、3维、2维、1维
32 |         lle=manifold.LocallyLinearEmbedding(n_components=n)
33 |         lle.fit(X)
34 |         print('reconstruction_error(n_components=%d) : %s'%
35 |             (n, lle.reconstruction_error_))
36 | def plot_LocallyLinearEmbedding_k(*data):
37 |     '''
38 |     测试 LocallyLinearEmbedding 中 n_neighbors 参数的影响，其中降维至 2维
39 | 
40 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
41 |     :return: None
42 |     '''
43 |     X,y=data
44 |     Ks=[1,5,25,y.size-1]# n_neighbors参数的候选值的集合
45 | 
46 |     fig=plt.figure()
47 |     for i, k in enumerate(Ks):
48 |         lle=manifold.LocallyLinearEmbedding(n_components=2,n_neighbors=k)
49 |         X_r=lle.fit_transform(X)#原始数据集转换到二维
50 | 
51 |         ax=fig.add_subplot(2,2,i+1)## 两行两列，每个单元显示不同 n_neighbors 参数的 LocallyLinearEmbedding 的效果图
52 |         colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
53 |             (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合，不同标记的样本染不同的颜色
54 |         for label ,color in zip( np.unique(y),colors):
55 |             position=y==label
56 |             ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"
57 |             %label,color=color)
58 | 
59 |         ax.set_xlabel("X[0]")
60 |         ax.set_ylabel("X[1]")
61 |         ax.legend(loc="best")
62 |         ax.set_title("k=%d"%k)
63 |     plt.suptitle("LocallyLinearEmbedding")
64 |     plt.show()
65 | def plot_LocallyLinearEmbedding_k_d1(*data):
66 |     '''
67 |     测试 LocallyLinearEmbedding 中 n_neighbors 参数的影响，其中降维至 1维
68 | 
69 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
70 |     :return: None
71 |     '''
72 |     X,y=data
73 |     Ks=[1,5,25,y.size-1]# n_neighbors参数的候选值的集合
74 | 
75 |     fig=plt.figure()
76 |     for i, k in enumerate(Ks):
77 |         lle=manifold.LocallyLinearEmbedding(n_components=1,n_neighbors=k)
78 |         X_r=lle.fit_transform(X)#原始数据集转换到 1 维
79 | 
80 |         ax=fig.add_subplot(2,2,i+1)## 两行两列，每个单元显示不同 n_neighbors 参数的 LocallyLinearEmbedding 的效果图
81 |         colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
82 |             (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合，不同标记的样本染不同的颜色
83 |         for label ,color in zip( np.unique(y),colors):
84 |             position=y==label
85 |             ax.scatter(X_r[position],np.zeros_like(X_r[position]),
86 |             label="target= %d"%label,color=color)
87 | 
88 |         ax.set_xlabel("X")
89 |         ax.set_ylabel("Y")
90 |         ax.legend(loc="best")
91 |         ax.set_title("k=%d"%k)
92 |     plt.suptitle("LocallyLinearEmbedding")
93 |     plt.show()
94 | if __name__=='__main__':
95 |     X,y=load_data() # 产生用于降维的数据集
96 |     test_LocallyLinearEmbedding(X,y)   # 调用 test_LocallyLinearEmbedding
97 |     #plot_LocallyLinearEmbedding_k(X,y)   # 调用 plot_LocallyLinearEmbedding_k
98 |     #plot_LocallyLinearEmbedding_k_d1(X,y)   # 调用 plot_LocallyLinearEmbedding_k_d1


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/mds.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     kNN和降维
 4 |     ~~~~~~~~~~
 5 | 
 6 |     MDS
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from sklearn import   datasets,manifold
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于降维的数据
18 | 
19 |     :return: 一个元组，依次为训练样本集和样本集的标记
20 |     '''
21 |     iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集
22 |     return  iris.data,iris.target
23 | 
24 | def test_MDS(*data):
25 |     '''
26 |     测试 MDS 的用法
27 | 
28 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
29 |     :return: None
30 |     '''
31 |     X,y=data
32 |     for n in [4,3,2,1]: # 依次考察降维目标为 4维、3维、2维、1维
33 |         mds=manifold.MDS(n_components=n)
34 |         mds.fit(X)
35 |         print('stress(n_components=%d) : %s'% (n, str(mds.stress_)))
36 | def plot_MDS(*data):
37 |     '''
38 |     绘制经过 使用 MDS 降维到二维之后的样本点
39 | 
40 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
41 |     :return: None
42 |     '''
43 |     X,y=data
44 |     mds=manifold.MDS(n_components=2)
45 |     X_r=mds.fit_transform(X) #原始数据集转换到二维
46 | 
47 |     ### 绘制二维图形
48 |     fig=plt.figure()
49 |     ax=fig.add_subplot(1,1,1)
50 |     colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
51 |         (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合，不同标记的样本染不同的颜色
52 |     for label ,color in zip( np.unique(y),colors):
53 |         position=y==label
54 |         ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color)
55 | 
56 |     ax.set_xlabel("X[0]")
57 |     ax.set_ylabel("X[1]")
58 |     ax.legend(loc="best")
59 |     ax.set_title("MDS")
60 |     plt.show()
61 | if __name__=='__main__':
62 |     X,y=load_data() # 产生用于降维的数据集
63 |     test_MDS(X,y)   # 调用 test_MDS
64 |     #plot_MDS(X,y)   # 调用 plot_MDS
65 | 


--------------------------------------------------------------------------------
/chapters/KNN_Dimension_Reduction/pca.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     kNN和降维
 4 |     ~~~~~~~~~~
 5 | 
 6 |     PCA
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | from sklearn import   datasets,decomposition
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于降维的数据
18 | 
19 |     :return: 一个元组，依次为训练样本集和样本集的标记
20 |     '''
21 |     iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集
22 |     return  iris.data,iris.target
23 | 
24 | def test_PCA(*data):
25 |     '''
26 |     测试 PCA 的用法
27 | 
28 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
29 |     :return: None
30 |     '''
31 |     X,y=data
32 |     pca=decomposition.PCA(n_components=None) # 使用默认的 n_components
33 |     pca.fit(X)
34 |     print('explained variance ratio : %s'% str(pca.explained_variance_ratio_))
35 | def plot_PCA(*data):
36 |     '''
37 |     绘制经过 PCA 降维到二维之后的样本点
38 | 
39 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、训练样本的标记
40 |     :return: None
41 |     '''
42 |     X,y=data
43 |     pca=decomposition.PCA(n_components=2) # 目标维度为2维
44 |     pca.fit(X)
45 |     X_r=pca.transform(X) # 原始数据集转换到二维
46 |     ###### 绘制二维数据 ########
47 |     fig=plt.figure()
48 |     ax=fig.add_subplot(1,1,1)
49 |     colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
50 |         (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合，不同标记的样本染不同的颜色
51 |     for label ,color in zip( np.unique(y),colors):
52 |         position=y==label
53 |         ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color)
54 | 
55 |     ax.set_xlabel("X[0]")
56 |     ax.set_ylabel("Y[0]")
57 |     ax.legend(loc="best")
58 |     ax.set_title("PCA")
59 |     plt.show()
60 | if __name__=='__main__':
61 |     X,y=load_data() # 产生用于降维的数据集
62 |     test_PCA(X,y)   # 调用 test_PCA
63 |     #plot_PCA(X,y)   # 调用 plot_PCA
64 | 


--------------------------------------------------------------------------------
/chapters/Kaggle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__init__.py


--------------------------------------------------------------------------------
/chapters/Kaggle/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Kaggle/__pycache__/data_clean.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/data_clean.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Kaggle/__pycache__/data_preprocess.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/data_preprocess.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Kaggle/__pycache__/grid_search.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/grid_search.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Kaggle/__pycache__/learning_validation_curve.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/learning_validation_curve.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Kaggle/grid_search.py:
--------------------------------------------------------------------------------
 1 | import  scipy
 2 | from sklearn.ensemble import GradientBoostingClassifier
 3 | from sklearn.model_selection import GridSearchCV
 4 | from sklearn.metrics import classification_report
 5 | from data_clean import current_time
 6 | from sklearn.model_selection import train_test_split
 7 | from data_preprocess import Data_Preprocesser,Data_Cleaner
 8 | 
 9 | def grid_search(tuned_parameters,data,train_size,seed):
10 |     '''
11 |     参数优化
12 | 
13 |     :param tuned_parameters: 待优化的参数字典
14 |     :param data: 数据集
15 |     :param train_size:训练集大小
16 |     :param seed:用于生成随机数种子
17 |     :return:
18 |     '''
19 | 
20 |     print("----- Begin run grid_search at %s -------"%current_time())
21 |     X=data[:,:-1]
22 |     y=data[:,-1]
23 |     X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=train_size,stratify=data[:,-1],random_state=seed)
24 |     clf=GridSearchCV(GradientBoostingClassifier(),tuned_parameters,cv=10,scoring="roc_auc")
25 |     clf.fit(X_train,y_train)
26 |     print("Best parameters set found:",clf.best_params_)
27 |     print("Randomized Grid scores:")
28 |     for params, mean_score, scores in clf.grid_scores_:
29 |         print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))
30 |         print("Optimized Score:",clf.score(X_test,y_test))
31 |         print("Detailed classification report:")
32 |         y_true, y_pred = y_test, clf.predict(X_test)
33 |         print(classification_report(y_true, y_pred))
34 |     print("----- End run grid_search at %s -------"%current_time())
35 | 
36 | if __name__=='__main__':
37 |     clearner=Data_Cleaner("./data/people.csv",'./data/act_train.csv','./data/act_test.csv')
38 |     result=clearner.load_data()
39 |     preprocessor=Data_Preprocesser(*result)
40 |     train_datas,test_datas=preprocessor.load_data()
41 |     tuned_parameters={'subsample':[0.3,0.35,0.4,0.45,0.5,0.55,0.6],
42 |                       'n_estimators':[30,35,50,100,150,200]
43 |         ,
44 |                       'max_depth':[2,4,8,16,32]}
45 |     grid_search(tuned_parameters,train_datas['type 7'],train_size=0.75,seed=0)


--------------------------------------------------------------------------------
/chapters/Linear/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__init__.py


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/elasticnet.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/elasticnet.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/lasso.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/lasso.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/lda.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/lda.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/linear_regression.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/linear_regression.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/logistic_regression.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/logistic_regression.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/__pycache__/ridge.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/ridge.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Linear/elasticnet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     广义线性模型
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     ElasticNet
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn import datasets, linear_model,cross_validation
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于回归问题的数据集
18 | 
19 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
20 |     '''
21 |     diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集
22 |     return cross_validation.train_test_split(datasets.data,diabetes.target,
23 | 		test_size=0.25,random_state=0) # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
24 | 
25 | def test_ElasticNet(*data):
26 |     '''
27 |     测试 ElasticNet 的用法
28 | 
29 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
30 |     :return: None
31 |     '''
32 |     X_train,X_test,y_train,y_test=data
33 |     regr = linear_model.ElasticNet()
34 |     regr.fit(X_train, y_train)
35 |     print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_))
36 |     print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2))
37 |     print('Score: %.2f' % regr.score(X_test, y_test))
38 | def test_ElasticNet_alpha_rho(*data):
39 |     '''
40 |     测试 ElasticNet 的预测性能随 alpha 和 l1_ratio 的影响
41 | 
42 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
43 |     :return: None
44 |     '''
45 |     X_train,X_test,y_train,y_test=data
46 |     alphas=np.logspace(-2,2)
47 |     rhos=np.linspace(0.01,1)
48 |     scores=[]
49 |     for alpha in alphas:
50 |             for rho in rhos:
51 |                 regr = linear_model.ElasticNet(alpha=alpha,l1_ratio=rho)
52 |                 regr.fit(X_train, y_train)
53 |                 scores.append(regr.score(X_test, y_test))
54 |     ## 绘图
55 |     alphas, rhos = np.meshgrid(alphas, rhos)
56 |     scores=np.array(scores).reshape(alphas.shape)
57 |     from mpl_toolkits.mplot3d import Axes3D
58 |     from matplotlib import cm
59 |     fig=plt.figure()
60 |     ax=Axes3D(fig)
61 |     surf = ax.plot_surface(alphas, rhos, scores, rstride=1, cstride=1, cmap=cm.jet,
62 |         linewidth=0, antialiased=False)
63 |     fig.colorbar(surf, shrink=0.5, aspect=5)
64 |     ax.set_xlabel(r"$\alpha$")
65 |     ax.set_ylabel(r"$\rho$")
66 |     ax.set_zlabel("score")
67 |     ax.set_title("ElasticNet")
68 |     plt.show()
69 | if __name__=='__main__':
70 |     X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集
71 |     test_ElasticNet(X_train,X_test,y_train,y_test) # 调用 test_ElasticNet
72 |     # test_ElasticNet_alpha_rho(X_train,X_test,y_train,y_test) # 调用 test_ElasticNet_alpha_rho


--------------------------------------------------------------------------------
/chapters/Linear/lasso.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     广义线性模型
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     Lasso
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn import datasets, linear_model,cross_validation
14 | def load_data():
15 |     '''
16 |     加载用于回归问题的数据集
17 | 
18 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
19 |     '''
20 |     diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集
21 |     return cross_validation.train_test_split(datasets.data,diabetes.target,
22 | 		test_size=0.25,random_state=0) # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
23 | def test_Lasso(*data):
24 |     '''
25 |     测试 Lasso 的用法
26 | 
27 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
28 |     :return: None
29 |     '''
30 |     X_train,X_test,y_train,y_test=data
31 |     regr = linear_model.Lasso()
32 |     regr.fit(X_train, y_train)
33 |     print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_))
34 |     print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2))
35 |     print('Score: %.2f' % regr.score(X_test, y_test))
36 | def test_Lasso_alpha(*data):
37 |     '''
38 |     测试 Lasso 的预测性能随 alpha 参数的影响
39 | 
40 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
41 |     :return: None
42 |     '''
43 |     X_train,X_test,y_train,y_test=data
44 |     alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
45 |     scores=[]
46 |     for i,alpha in enumerate(alphas):
47 |         regr = linear_model.Lasso(alpha=alpha)
48 |         regr.fit(X_train, y_train)
49 |         scores.append(regr.score(X_test, y_test))
50 |     ## 绘图
51 |     fig=plt.figure()
52 |     ax=fig.add_subplot(1,1,1)
53 |     ax.plot(alphas,scores)
54 |     ax.set_xlabel(r"$\alpha$")
55 |     ax.set_ylabel(r"score")
56 |     ax.set_xscale('log')
57 |     ax.set_title("Lasso")
58 |     plt.show()
59 | if __name__=='__main__':
60 |     X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集
61 |     test_Lasso(X_train,X_test,y_train,y_test) # 调用 test_Lasso
62 |     # test_Lasso_alpha(X_train,X_test,y_train,y_test) # 调用 test_Lasso_alpha
63 | 


--------------------------------------------------------------------------------
/chapters/Linear/lda.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     广义线性模型
  4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
  5 | 
  6 |     线性判别分析
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | from sklearn import datasets, discriminant_analysis,cross_validation
 14 | 
 15 | def load_data():
 16 |     '''
 17 |     加载用于分类问题的数据集
 18 | 
 19 |     :return: 一个元组，用于分类问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记
 20 |     '''
 21 |     iris=datasets.load_iris() # 使用 scikit-learn 自带的 iris 数据集
 22 |     X_train=iris.data
 23 |     y_train=iris.target
 24 |     return cross_validation.train_test_split(X_train, y_train,test_size=0.25,
 25 | 		random_state=0,stratify=y_train)# 分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 26 | def test_LinearDiscriminantAnalysis(*data):
 27 |     '''
 28 |     测试 LinearDiscriminantAnalysis 的用法
 29 | 
 30 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 31 |     :return:  None
 32 |     '''
 33 |     X_train,X_test,y_train,y_test=data
 34 |     lda = discriminant_analysis.LinearDiscriminantAnalysis()
 35 |     lda.fit(X_train, y_train)
 36 |     print('Coefficients:%s, intercept %s'%(lda.coef_,lda.intercept_))
 37 |     print('Score: %.2f' % lda.score(X_test, y_test))
 38 | def plot_LDA(converted_X,y):
 39 |     '''
 40 |     绘制经过 LDA 转换后的数据
 41 | 
 42 |     :param converted_X: 经过 LDA转换后的样本集
 43 |     :param y: 样本集的标记
 44 |     :return:  None
 45 |     '''
 46 |     from mpl_toolkits.mplot3d import Axes3D
 47 |     fig=plt.figure()
 48 |     ax=Axes3D(fig)
 49 |     colors='rgb'
 50 |     markers='o*s'
 51 |     for target,color,marker in zip([0,1,2],colors,markers):
 52 |         pos=(y==target).ravel()
 53 |         X=converted_X[pos,:]
 54 |         ax.scatter(X[:,0], X[:,1], X[:,2],color=color,marker=marker,
 55 | 			label="Label %d"%target)
 56 |     ax.legend(loc="best")
 57 |     fig.suptitle("Iris After LDA")
 58 |     plt.show()
 59 | def run_plot_LDA():
 60 |     '''
 61 |     执行 plot_LDA 。其中数据集来自于 load_data() 函数
 62 | 
 63 |     :return: None
 64 |     '''
 65 |     X_train,X_test,y_train,y_test=load_data()
 66 |     X=np.vstack((X_train,X_test))
 67 |     Y=np.vstack((y_train.reshape(y_train.size,1),y_test.reshape(y_test.size,1)))
 68 |     lda = discriminant_analysis.LinearDiscriminantAnalysis()
 69 |     lda.fit(X, Y)
 70 |     converted_X=np.dot(X,np.transpose(lda.coef_))+lda.intercept_
 71 |     plot_LDA(converted_X,Y)
 72 | def test_LinearDiscriminantAnalysis_solver(*data):
 73 |     '''
 74 |     测试 LinearDiscriminantAnalysis 的预测性能随 solver 参数的影响
 75 | 
 76 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 77 |     :return:  None
 78 |     '''
 79 |     X_train,X_test,y_train,y_test=data
 80 |     solvers=['svd','lsqr','eigen']
 81 |     for solver in solvers:
 82 |         if(solver=='svd'):
 83 |             lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver)
 84 |         else:
 85 |             lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver,
 86 | 			shrinkage=None)
 87 |         lda.fit(X_train, y_train)
 88 |         print('Score at solver=%s: %.2f' %(solver, lda.score(X_test, y_test)))
 89 | def test_LinearDiscriminantAnalysis_shrinkage(*data):
 90 |     '''
 91 |     测试  LinearDiscriminantAnalysis 的预测性能随 shrinkage 参数的影响
 92 | 
 93 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 94 |     :return:  None
 95 |     '''
 96 |     X_train,X_test,y_train,y_test=data
 97 |     shrinkages=np.linspace(0.0,1.0,num=20)
 98 |     scores=[]
 99 |     for shrinkage in shrinkages:
100 |         lda = discriminant_analysis.LinearDiscriminantAnalysis(solver='lsqr',
101 | 			shrinkage=shrinkage)
102 |         lda.fit(X_train, y_train)
103 |         scores.append(lda.score(X_test, y_test))
104 |     ## 绘图
105 |     fig=plt.figure()
106 |     ax=fig.add_subplot(1,1,1)
107 |     ax.plot(shrinkages,scores)
108 |     ax.set_xlabel(r"shrinkage")
109 |     ax.set_ylabel(r"score")
110 |     ax.set_ylim(0,1.05)
111 |     ax.set_title("LinearDiscriminantAnalysis")
112 |     plt.show()
113 | 
114 | if __name__=='__main__':
115 |     X_train,X_test,y_train,y_test=load_data() # 产生用于分类的数据集
116 |     test_LinearDiscriminantAnalysis(X_train,X_test,y_train,y_test) # 调用 test_LinearDiscriminantAnalysis
117 |     # run_plot_LDA() # 调用 run_plot_LDA
118 |     # test_LinearDiscriminantAnalysis_solver(X_train,X_test,y_train,y_test) # 调用 test_LinearDiscriminantAnalysis_solver
119 |     # test_LinearDiscriminantAnalysis_shrinkage(X_train,X_test,y_train,y_test) # 调用 test_LinearDiscriminantAnalysis_shrinkage


--------------------------------------------------------------------------------
/chapters/Linear/linear_regression.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     广义线性模型
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     LinearRegression
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn import datasets, linear_model,cross_validation
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于回归问题的数据集
18 | 
19 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
20 |     '''
21 |     diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集
22 |     return cross_validation.train_test_split(diabetes.data,diabetes.target,
23 | 		test_size=0.25,random_state=0) # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
24 | def test_LinearRegression(*data):
25 |     '''
26 |     测试 LinearRegression 的用法
27 | 
28 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
29 |     :return: None
30 |     '''
31 |     X_train,X_test,y_train,y_test=data
32 |     regr = linear_model.LinearRegression()
33 |     regr.fit(X_train, y_train)
34 |     print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_))
35 |     print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2))
36 |     print('Score: %.2f' % regr.score(X_test, y_test))
37 | if __name__=='__main__':
38 |     X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集
39 |     test_LinearRegression(X_train,X_test,y_train,y_test) # 调用 test_LinearRegression
40 | 


--------------------------------------------------------------------------------
/chapters/Linear/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     广义线性模型
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     Logistic 回归（也称作对数几率回归）
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn import datasets, linear_model,cross_validation
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于分类问题的数据集
18 | 
19 |     :return: 一个元组，用于分类问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记
20 |     '''
21 |     iris=datasets.load_iris() # 使用 scikit-learn 自带的 iris 数据集
22 |     X_train=iris.data
23 |     y_train=iris.target
24 |     return cross_validation.train_test_split(X_train, y_train,test_size=0.25,
25 | 		random_state=0,stratify=y_train)# 分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
26 | def test_LogisticRegression(*data):
27 |     '''
28 |     测试 LogisticRegression 的用法
29 | 
30 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
31 |     :return: None
32 |     '''
33 |     X_train,X_test,y_train,y_test=data
34 |     regr = linear_model.LogisticRegression()
35 |     regr.fit(X_train, y_train)
36 |     print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_))
37 |     print('Score: %.2f' % regr.score(X_test, y_test))
38 | def test_LogisticRegression_multinomial(*data):
39 |     '''
40 |     测试 LogisticRegression 的预测性能随 multi_class 参数的影响
41 | 
42 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
43 |     :return: None
44 |     '''
45 |     X_train,X_test,y_train,y_test=data
46 |     regr = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')
47 |     regr.fit(X_train, y_train)
48 |     print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_))
49 |     print('Score: %.2f' % regr.score(X_test, y_test))
50 | def test_LogisticRegression_C(*data):
51 |     '''
52 |     测试 LogisticRegression 的预测性能随  C  参数的影响
53 | 
54 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
55 |     :return: None
56 |     '''
57 |     X_train,X_test,y_train,y_test=data
58 |     Cs=np.logspace(-2,4,num=100)
59 |     scores=[]
60 |     for C in Cs:
61 |         regr = linear_model.LogisticRegression(C=C)
62 |         regr.fit(X_train, y_train)
63 |         scores.append(regr.score(X_test, y_test))
64 |     ## 绘图
65 |     fig=plt.figure()
66 |     ax=fig.add_subplot(1,1,1)
67 |     ax.plot(Cs,scores)
68 |     ax.set_xlabel(r"C")
69 |     ax.set_ylabel(r"score")
70 |     ax.set_xscale('log')
71 |     ax.set_title("LogisticRegression")
72 |     plt.show()
73 | 
74 | if __name__=='__main__':
75 |     X_train,X_test,y_train,y_test=load_data() # 加载用于分类的数据集
76 |     test_LogisticRegression(X_train,X_test,y_train,y_test) # 调用  test_LogisticRegression
77 |     # test_LogisticRegression_multinomial(X_train,X_test,y_train,y_test) # 调用  test_LogisticRegression_multinomial
78 |     # test_LogisticRegression_C(X_train,X_test,y_train,y_test) # 调用  test_LogisticRegression_C


--------------------------------------------------------------------------------
/chapters/Linear/ridge.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     广义线性模型
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     线性回归
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn import datasets, linear_model,cross_validation
14 | 
15 | def load_data():
16 |     '''
17 |     加载用于回归问题的数据集
18 | 
19 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
20 |     '''
21 |     diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集
22 |     return cross_validation.train_test_split(datasets.data,diabetes.target,
23 | 		test_size=0.25,random_state=0) # 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
24 | 
25 | def test_Ridge(*data):
26 |     '''
27 |     测试 Ridge 的用法
28 | 
29 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
30 |     :return: None
31 |     '''
32 |     X_train,X_test,y_train,y_test=data
33 |     regr = linear_model.Ridge()
34 |     regr.fit(X_train, y_train)
35 |     print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_))
36 |     print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2))
37 |     print('Score: %.2f' % regr.score(X_test, y_test))
38 | def test_Ridge_alpha(*data):
39 |     '''
40 |     测试 Ridge 的预测性能随 alpha 参数的影响
41 | 
42 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
43 |     :return: None
44 |     '''
45 |     X_train,X_test,y_train,y_test=data
46 |     alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
47 |     scores=[]
48 |     for i,alpha in enumerate(alphas):
49 |         regr = linear_model.Ridge(alpha=alpha)
50 |         regr.fit(X_train, y_train)
51 |         scores.append(regr.score(X_test, y_test))
52 |     ## 绘图
53 |     fig=plt.figure()
54 |     ax=fig.add_subplot(1,1,1)
55 |     ax.plot(alphas,scores)
56 |     ax.set_xlabel(r"$\alpha$")
57 |     ax.set_ylabel(r"score")
58 |     ax.set_xscale('log')
59 |     ax.set_title("Ridge")
60 |     plt.show()
61 | if __name__=='__main__':
62 |     X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集
63 |     test_Ridge(X_train,X_test,y_train,y_test) # 调用 test_Ridge
64 |     # test_Ridge_alpha(X_train,X_test,y_train,y_test) # 调用 test_Ridge_alpha


--------------------------------------------------------------------------------
/chapters/Model_Selection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__init__.py


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/classification_metrics.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/classification_metrics.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/data_splittion.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/data_splittion.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/grid_search.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/grid_search.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/learning_curve.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/learning_curve.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/loss_function.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/loss_function.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/regression_metrics.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/regression_metrics.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/__pycache__/validation_curve.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/validation_curve.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Model_Selection/data_splittion.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     模型选择
  4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
  5 | 
  6 |     数据集切分
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut\
 12 |             ,cross_val_score
 13 | import  numpy as np
 14 | def test_train_test_split():
 15 |     '''
 16 |     测试  train_test_split 的用法
 17 | 
 18 |     :return:  None
 19 |     '''
 20 |     X=[[1,2,3,4],
 21 |        [11,12,13,14],
 22 |        [21,22,23,24],
 23 |        [31,32,33,34],
 24 |        [41,42,43,44],
 25 |        [51,52,53,54],
 26 |        [61,62,63,64],
 27 |        [71,72,73,74]]
 28 |     y=[1,1,0,0,1,1,0,0]
 29 | 
 30 |     X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0) # 切分，测试集大小为原始数据集大小的 40%
 31 |     print("X_train=",X_train)
 32 |     print("X_test=",X_test)
 33 |     print("y_train=",y_train)
 34 |     print("y_test=",y_test)
 35 |     X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4,
 36 |              random_state=0,stratify=y) # 分层采样切分，测试集大小为原始数据集大小的 40%
 37 |     print("Stratify:X_train=",X_train)
 38 |     print("Stratify:X_test=",X_test)
 39 |     print("Stratify:y_train=",y_train)
 40 |     print("Stratify:y_test=",y_test)
 41 | def test_KFold():
 42 |     '''
 43 |     测试  KFold 的用法
 44 | 
 45 |     :return: None
 46 |     '''
 47 |     X=np.array([[1,2,3,4],
 48 |        [11,12,13,14],
 49 |        [21,22,23,24],
 50 |        [31,32,33,34],
 51 |        [41,42,43,44],
 52 |        [51,52,53,54],
 53 |        [61,62,63,64],
 54 |        [71,72,73,74],
 55 |        [81,82,83,84]])
 56 |     y=np.array([1,1,0,0,1,1,0,0,1])
 57 | 
 58 |     folder=KFold(n_splits=3,random_state=0,shuffle=False) # 切分之前不混洗数据集
 59 |     for train_index,test_index in folder.split(X,y):
 60 |           print("Train Index:",train_index)
 61 |           print("Test Index:",test_index)
 62 |           print("X_train:",X[train_index])
 63 |           print("X_test:",X[test_index])
 64 |           print("")
 65 | 
 66 |     shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) # 切分之前混洗数据集
 67 |     for train_index,test_index in shuffle_folder.split(X,y):
 68 |           print("Shuffled Train Index:",train_index)
 69 |           print("Shuffled Test Index:",test_index)
 70 |           print("Shuffled X_train:",X[train_index])
 71 |           print("Shuffled X_test:",X[test_index])
 72 |           print("")
 73 | def test_StratifiedKFold():
 74 |     '''
 75 |     测试  StratifiedKFold 的用法
 76 | 
 77 |     :return: None
 78 |     '''
 79 |     X=np.array([[1,2,3,4],
 80 |        [11,12,13,14],
 81 |        [21,22,23,24],
 82 |        [31,32,33,34],
 83 |        [41,42,43,44],
 84 |        [51,52,53,54],
 85 |        [61,62,63,64],
 86 |        [71,72,73,74]])
 87 | 
 88 |     y=np.array([1,1,0,0,1,1,0,0])
 89 | 
 90 |     folder=KFold(n_splits=4,random_state=0,shuffle=False)
 91 |     stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False)
 92 |     for train_index,test_index in folder.split(X,y):
 93 |           print("Train Index:",train_index)
 94 |           print("Test Index:",test_index)
 95 |           print("y_train:",y[train_index])
 96 |           print("y_test:",y[test_index])
 97 |           print("")
 98 | 
 99 |     for train_index,test_index in stratified_folder.split(X,y):
100 |           print("Stratified Train Index:",train_index)
101 |           print("Stratified Test Index:",test_index)
102 |           print("Stratified y_train:",y[train_index])
103 |           print("Stratified y_test:",y[test_index])
104 |           print("")
105 | def test_LeaveOneOut():
106 |     '''
107 |     测试  LeaveOneOut 的用法
108 | 
109 |     :return: None
110 |     '''
111 |     X=np.array([[1,2,3,4],
112 |        [11,12,13,14],
113 |        [21,22,23,24],
114 |        [31,32,33,34]]
115 |     )
116 |     y=np.array([1,1,0,0])
117 | 
118 |     lo=LeaveOneOut(len(y))
119 |     for train_index,test_index in lo:
120 |           print("Train Index:",train_index)
121 |           print("Test Index:",test_index)
122 |           print("X_train:",X[train_index])
123 |           print("X_test:",X[test_index])
124 |           print("")
125 | def test_cross_val_score():
126 |     '''
127 |     测试  cross_val_score 的用法
128 | 
129 |     :return: None
130 |     '''
131 |     from sklearn.datasets import  load_digits
132 |     from sklearn.svm import  LinearSVC
133 | 
134 |     digits=load_digits() # 加载用于分类问题的数据集
135 |     X=digits.data
136 |     y=digits.target
137 | 
138 |     result=cross_val_score(LinearSVC(),X,y,cv=10) # 使用 LinearSVC 作为分类器
139 |     print("Cross Val Score is:",result)
140 | 
141 | 
142 | if __name__=='__main__':
143 |     # test_train_test_split() # 调用 test_train_test_split
144 |     # test_KFold()# 调用 test_KFold
145 |     test_StratifiedKFold()# 调用 test_StratifiedKFold
146 |     # test_LeaveOneOut()# 调用 test_LeaveOneOut
147 |     # test_cross_val_score()# 调用 test_cross_val_score


--------------------------------------------------------------------------------
/chapters/Model_Selection/grid_search.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     模型选择
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     参数优化
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn.datasets import load_digits
12 | from sklearn.linear_model import  LogisticRegression
13 | from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
14 | from sklearn.metrics import classification_report
15 | from sklearn.model_selection import train_test_split
16 | import scipy
17 | 
18 | def test_GridSearchCV():
19 |     '''
20 |     测试 GridSearchCV 的用法。使用 LogisticRegression 作为分类器，主要优化 C、penalty、multi_class 等参数
21 | 
22 |     :return: None
23 |     '''
24 |     ### 加载数据
25 |     digits = load_digits()
26 |     X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,test_size=0.25,
27 |                 random_state=0,stratify=digits.target)
28 |     #### 参数优化 ######
29 |     tuned_parameters = [{'penalty': ['l1','l2'],
30 |                         'C': [0.01,0.05,0.1,0.5,1,5,10,50,100],
31 |                         'solver':['liblinear'],
32 |                         'multi_class': ['ovr']},
33 | 
34 |                         {'penalty': ['l2'],
35 |                         'C': [0.01,0.05,0.1,0.5,1,5,10,50,100],
36 |                          'solver':['lbfgs'],
37 |                         'multi_class': ['ovr','multinomial']},
38 |                         ]
39 |     clf=GridSearchCV(LogisticRegression(tol=1e-6),tuned_parameters,cv=10)
40 |     clf.fit(X_train,y_train)
41 |     print("Best parameters set found:",clf.best_params_)
42 |     print("Grid scores:")
43 |     for params, mean_score, scores in clf.grid_scores_:
44 |              print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))
45 | 
46 |     print("Optimized Score:",clf.score(X_test,y_test))
47 |     print("Detailed classification report:")
48 |     y_true, y_pred = y_test, clf.predict(X_test)
49 |     print(classification_report(y_true, y_pred))
50 | def test_RandomizedSearchCV():
51 |     '''
52 |     测试 RandomizedSearchCV 的用法。使用 LogisticRegression 作为分类器，主要优化 C、multi_class 等参数。其中 C 的分布函数为指数分布
53 | 
54 |     :return:  None
55 |     '''
56 |     ### 加载数据
57 |     digits = load_digits()
58 |     X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,
59 |                 test_size=0.25,random_state=0,stratify=digits.target)
60 |     #### 参数优化 ######
61 |     tuned_parameters ={  'C': scipy.stats.expon(scale=100), # 指数分布
62 |                         'multi_class': ['ovr','multinomial']}
63 |     clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6),
64 |                         tuned_parameters,cv=10,scoring="accuracy",n_iter=100)
65 |     clf.fit(X_train,y_train)
66 |     print("Best parameters set found:",clf.best_params_)
67 |     print("Randomized Grid scores:")
68 |     for params, mean_score, scores in clf.grid_scores_:
69 |              print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))
70 | 
71 |     print("Optimized Score:",clf.score(X_test,y_test))
72 |     print("Detailed classification report:")
73 |     y_true, y_pred = y_test, clf.predict(X_test)
74 |     print(classification_report(y_true, y_pred))
75 | 
76 | if __name__=='__main__':
77 |     test_GridSearchCV()# 调用 test_GridSearchCV
78 |     # test_RandomizedSearchCV() # 调用 test_RandomizedSearchCV
79 | 


--------------------------------------------------------------------------------
/chapters/Model_Selection/learning_curve.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     模型选择
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     学习曲线
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn.datasets import load_digits
14 | from sklearn.svm import LinearSVC
15 | from sklearn.learning_curve import learning_curve
16 | 
17 | def test_learning_curve():
18 |     '''
19 |     测试 learning_curve 的用法 。验证对于 LinearSVC 分类器 ，数据集的大小对于预测性能的影响
20 | 
21 |     :return:
22 |     '''
23 |     ### 加载数据
24 |     digits = load_digits()
25 |     X,y=digits.data,digits.target
26 |     #### 获取学习曲线 ######
27 |     train_sizes=np.linspace(0.1,1.0,endpoint=True,dtype='float')
28 |     abs_trains_sizes,train_scores, test_scores = learning_curve(LinearSVC(),
29 |             X, y,cv=10, scoring="accuracy",train_sizes=train_sizes)
30 |     ###### 对每个 C ，获取 10 折交叉上的预测得分上的均值和方差 #####
31 |     train_scores_mean = np.mean(train_scores, axis=1)
32 |     train_scores_std = np.std(train_scores, axis=1)
33 |     test_scores_mean = np.mean(test_scores, axis=1)
34 |     test_scores_std = np.std(test_scores, axis=1)
35 |     ####### 绘图 ######
36 |     fig=plt.figure()
37 |     ax=fig.add_subplot(1,1,1)
38 | 
39 |     ax.plot(abs_trains_sizes, train_scores_mean, label="Training Accuracy", color="r")
40 |     ax.fill_between(abs_trains_sizes, train_scores_mean - train_scores_std,
41 |                      train_scores_mean + train_scores_std, alpha=0.2, color="r")
42 |     ax.plot(abs_trains_sizes, test_scores_mean, label="Testing Accuracy", color="g")
43 |     ax.fill_between(abs_trains_sizes, test_scores_mean - test_scores_std,
44 |                      test_scores_mean + test_scores_std, alpha=0.2, color="g")
45 | 
46 |     ax.set_title("Learning Curve with LinearSVC")
47 |     ax.set_xlabel("Sample Nums")
48 |     ax.set_ylabel("Score")
49 |     ax.set_ylim(0,1.1)
50 |     ax.legend(loc='best')
51 |     plt.show()
52 | 
53 | if __name__=="__main__":
54 |     test_learning_curve() # 调用 test_learning_curve


--------------------------------------------------------------------------------
/chapters/Model_Selection/loss_function.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     模型选择
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     损失函数
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.metrics import zero_one_loss,log_loss
13 | 
14 | 
15 | def test_zero_one_loss():
16 |     '''
17 |     测试 0-1 损失函数
18 | 
19 |     :return: None
20 |     '''
21 |     y_true=[1,1,1,1,1,0,0,0,0,0]
22 |     y_pred=[0,0,0,1,1,1,1,1,0,0]
23 |     print("zero_one_loss<fraction>:",zero_one_loss(y_true,y_pred,normalize=True))
24 |     print("zero_one_loss<num>:",zero_one_loss(y_true,y_pred,normalize=False))
25 | def test_log_loss():
26 |     '''
27 |     测试对数损失函数
28 | 
29 |     :return:  None
30 |     '''
31 |     y_true=[1, 1, 1, 0, 0, 0]
32 |     y_pred=[[0.1, 0.9],
33 |             [0.2, 0.8],
34 |             [0.3, 0.7],
35 |             [0.7, 0.3],
36 |             [0.8, 0.2],
37 |             [0.9, 0.1]]
38 |     print("log_loss<average>:",log_loss(y_true,y_pred,normalize=True))
39 |     print("log_loss<total>:",log_loss(y_true,y_pred,normalize=False))
40 | 
41 | if __name__=="__main__":
42 |     test_zero_one_loss() # 调用 test_zero_one_loss
43 |     # test_log_loss() # 调用 test_log_loss


--------------------------------------------------------------------------------
/chapters/Model_Selection/regression_metrics.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     模型选择
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     回归问题性能度量
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn.metrics import mean_absolute_error,mean_squared_error
12 | 
13 | def test_mean_absolute_error():
14 |     '''
15 |     测试 mean_absolute_error 的用法
16 | 
17 |     :return: None
18 |     '''
19 |     y_true=[1,1,1,1,1,2,2,2,0,0]
20 |     y_pred=[0,0,0,1,1,1,0,0,0,0]
21 | 
22 |     print("Mean Absolute Error:",mean_absolute_error(y_true,y_pred))
23 | def test_mean_squared_error():
24 |     '''
25 |     测试 mean_squared_error 的用法
26 | 
27 |     :return: None
28 |     '''
29 |     y_true=[1,1,1,1,1,2,2,2,0,0]
30 |     y_pred=[0,0,0,1,1,1,0,0,0,0]
31 | 
32 |     print("Mean Absolute Error:",mean_absolute_error(y_true,y_pred))
33 |     print("Mean Square Error:",mean_squared_error(y_true,y_pred))
34 | 
35 | if __name__=="__main__":
36 |     test_mean_absolute_error() # 调用  test_mean_absolute_error()
37 |     # test_mean_squared_error() # 调用  test_mean_squared_error()


--------------------------------------------------------------------------------
/chapters/Model_Selection/validation_curve.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     模型选择
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     验证曲线
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 | from sklearn.datasets import load_digits
14 | from sklearn.svm import LinearSVC
15 | from sklearn.learning_curve import validation_curve
16 | 
17 | def test_validation_curve():
18 |     '''
19 |     测试 validation_curve 的用法 。验证对于 LinearSVC 分类器 ， C 参数对于预测准确率的影响
20 | 
21 |     :return:  None
22 |     '''
23 |     ### 加载数据
24 |     digits = load_digits()
25 |     X,y=digits.data,digits.target
26 |     #### 获取验证曲线 ######
27 |     param_name="C"
28 |     param_range = np.logspace(-2, 2)
29 |     train_scores, test_scores = validation_curve(LinearSVC(), X, y, param_name=param_name,
30 |              param_range=param_range,cv=10, scoring="accuracy")
31 |     ###### 对每个 C ，获取 10 折交叉上的预测得分上的均值和方差 #####
32 |     train_scores_mean = np.mean(train_scores, axis=1)
33 |     train_scores_std = np.std(train_scores, axis=1)
34 |     test_scores_mean = np.mean(test_scores, axis=1)
35 |     test_scores_std = np.std(test_scores, axis=1)
36 |     ####### 绘图 ######
37 |     fig=plt.figure()
38 |     ax=fig.add_subplot(1,1,1)
39 | 
40 |     ax.semilogx(param_range, train_scores_mean, label="Training Accuracy", color="r")
41 |     ax.fill_between(param_range, train_scores_mean - train_scores_std,
42 |                      train_scores_mean + train_scores_std, alpha=0.2, color="r")
43 |     ax.semilogx(param_range, test_scores_mean, label="Testing Accuracy", color="g")
44 |     ax.fill_between(param_range, test_scores_mean - test_scores_std,
45 |                      test_scores_mean + test_scores_std, alpha=0.2, color="g")
46 | 
47 |     ax.set_title("Validation Curve with LinearSVC")
48 |     ax.set_xlabel("C")
49 |     ax.set_ylabel("Score")
50 |     ax.set_ylim(0,1.1)
51 |     ax.legend(loc='best')
52 |     plt.show()
53 | 
54 | if __name__=='__main__':
55 |     test_validation_curve() # 调用 test_validation_curve


--------------------------------------------------------------------------------
/chapters/Perceptron_Neural_Network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__init__.py


--------------------------------------------------------------------------------
/chapters/Perceptron_Neural_Network/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Perceptron_Neural_Network/__pycache__/neural_network.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/neural_network.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Perceptron_Neural_Network/__pycache__/neural_network_iris.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/neural_network_iris.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Perceptron_Neural_Network/__pycache__/perceptron.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/perceptron.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Perceptron_Neural_Network/neural_network.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     感知机和神经网络
  4 |     ~~~~~~~~~~~~~~~~~~
  5 | 
  6 |     神经网络模型。注意 MLPClassifier 是 scikit-learn version 0.18 版本才出现的。截止目前，该版本还是开发版，官方提供的稳定版为 0.17。
  7 |     所以为了运行本示例，需要手动下载编译安装 scikit-learn version 0.18 版
  8 | 
  9 |     :copyright: (c) 2016 by the huaxz1986.
 10 |     :license: lgpl-3.0, see LICENSE for more details.
 11 | """
 12 | import numpy as np
 13 | from matplotlib import  pyplot as plt
 14 | from sklearn.neural_network import MLPClassifier
 15 | 
 16 | 
 17 | def creat_data_no_linear_2d(n):
 18 |         '''
 19 |         创建二维的线性不可分数据集
 20 | 
 21 |         :param n: 负例的数量
 22 |         :return: 线性不可分数据集，数据集大小为 2*n+n/10 （ n/10 是误差点的数量，误差点导致了线性不可分）
 23 |         '''
 24 |         np.random.seed(1)
 25 |         x_11=np.random.randint(0,100,(n,1)) #  第一组：第一维坐标值
 26 |         x_12=10+np.random.randint(-5,5,(n,1,))#  第一组：第二维坐标值
 27 |         x_21=np.random.randint(0,100,(n,1))#  第二组：第一维坐标值
 28 |         x_22=20+np.random.randint(0,10,(n,1))#  第二组：第二维坐标值
 29 | 
 30 |         x_31=np.random.randint(0,100,(int(n/10),1))#  第三组：第一维坐标值
 31 |         x_32=20+np.random.randint(0,10,(int(n/10),1))#  第三组：第二维坐标值
 32 | 
 33 |         new_x_11=x_11*np.sqrt(2)/2-x_12*np.sqrt(2)/2## 沿第一维轴旋转45度
 34 |         new_x_12=x_11*np.sqrt(2)/2+x_12*np.sqrt(2)/2## 沿第一维轴旋转45度
 35 |         new_x_21=x_21*np.sqrt(2)/2-x_22*np.sqrt(2)/2## 沿第一维轴旋转45度
 36 |         new_x_22=x_21*np.sqrt(2)/2+x_22*np.sqrt(2)/2## 沿第一维轴旋转45度
 37 |         new_x_31=x_31*np.sqrt(2)/2-x_32*np.sqrt(2)/2## 沿第一维轴旋转45度
 38 |         new_x_32=x_31*np.sqrt(2)/2+x_32*np.sqrt(2)/2## 沿第一维轴旋转45度
 39 | 
 40 |         plus_samples=np.hstack([new_x_11,new_x_12,np.ones((n,1))]) # 拼接成正例数据集
 41 |         minus_samples=np.hstack([new_x_21,new_x_22,-np.ones((n,1))])# 拼接成负例数据集
 42 |         err_samples=np.hstack([new_x_31,new_x_32,np.ones((int(n/10),1))])# 拼接成正例数据集，它导致了线性不可分
 43 |         samples=np.vstack([plus_samples,minus_samples,err_samples]) # 拼接成数据集
 44 |         np.random.shuffle(samples)  # 混洗数据
 45 |         return samples
 46 | def plot_samples_2d(ax,samples):
 47 |             '''
 48 |             绘制二维数据集
 49 | 
 50 |             :param ax: Axes 实例，用于绘制图形
 51 |             :param samples: 二维数据集
 52 |             :return: None
 53 |             '''
 54 |             Y=samples[:,-1]
 55 |             position_p=Y==1 ## 正类位置
 56 |             position_m=Y==-1 ## 负类位置
 57 |             ax.scatter(samples[position_p,0],samples[position_p,1],
 58 |                 marker='+',label='+',color='b')
 59 |             ax.scatter(samples[position_m,0],samples[position_m,1],
 60 |                 marker='^',label='-',color='y')
 61 | def run_plot_samples_2d():
 62 |     '''
 63 |     绘制二维线性不可分数据集
 64 | 
 65 |     :return: None
 66 |     '''
 67 |     fig=plt.figure()
 68 |     ax=fig.add_subplot(1,1,1)
 69 |     data=creat_data_no_linear_2d(100) # 生成二维线性不可分数据集
 70 |     plot_samples_2d(ax,data)
 71 |     ax.legend(loc='best')
 72 |     plt.show()
 73 | def predict_with_MLPClassifier(ax,train_data):
 74 |         '''
 75 |         使用 MLPClassifier绘制预测结果
 76 | 
 77 |         :param ax: Axes 实例，用于绘制图形
 78 |         :param train_data: 训练数据集
 79 |         :return: None
 80 |         '''
 81 |         train_x=train_data[:,:-1]
 82 |         train_y=train_data[:,-1]
 83 |         clf=MLPClassifier(activation='logistic',max_iter=1000)# 构造分类器实例
 84 |         clf.fit(train_x,train_y) # 训练分类器
 85 |         print(clf.score(train_x,train_y)) # 查看在训练集上的评价预测精度
 86 | 
 87 |         ## 用训练好的训练集预测平面上每一点的输出##
 88 |         x_min, x_max = train_x[:, 0].min() - 1, train_x[:, 0].max() + 2
 89 |         y_min, y_max = train_x[:, 1].min() - 1, train_x[:, 1].max() + 2
 90 |         plot_step=1
 91 |         xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
 92 |             np.arange(y_min, y_max, plot_step))
 93 |         Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
 94 |         Z = Z.reshape(xx.shape)
 95 |         ax.contourf(xx, yy, Z, cmap=plt.cm.Paired)
 96 | def run_predict_with_MLPClassifier():
 97 |     '''
 98 |     用 MLPClassifier 预测线性不可分数据集
 99 | 
100 |     :return: None
101 |     '''
102 |     data=creat_data_no_linear_2d(500) #生成线性不可分数据集
103 |     fig=plt.figure()
104 |     ax=fig.add_subplot(1,1,1)
105 |     predict_with_MLPClassifier(ax,data)
106 |     plot_samples_2d(ax,data)
107 |     ax.legend(loc='best')
108 |     plt.show()
109 | 
110 | if __name__=='__main__':
111 |     run_plot_samples_2d() # 调用 run_plot_samples_2d
112 |     #run_predict_with_MLPClassifier() # 调用 run_predict_with_MLPClassifier


--------------------------------------------------------------------------------
/chapters/PreProcessing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__init__.py


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/binarize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/binarize.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/dictionary_learning.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/dictionary_learning.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/feature_selection_bagging.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/feature_selection_bagging.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/feature_selection_embeded.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/feature_selection_embeded.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/feature_selection_filter.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/feature_selection_filter.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/normalize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/normalize.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/onehot_encode.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/onehot_encode.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/pipeline.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/pipeline.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/__pycache__/standardize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/standardize.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/PreProcessing/binarize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     二元化
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn.preprocessing import Binarizer
12 | def test_Binarizer():
13 |     '''
14 |     测试 Binarizer 的用法
15 | 
16 |     :return: None
17 |     '''
18 |     X=[   [1,2,3,4,5],
19 |           [5,4,3,2,1],
20 |           [3,3,3,3,3,],
21 |           [1,1,1,1,1] ]
22 |     print("before transform:",X)
23 |     binarizer=Binarizer(threshold=2.5)
24 |     print("after transform:",binarizer.transform(X))
25 | 
26 | if __name__=='__main__':
27 |     test_Binarizer() # 调用 test_Binarizer


--------------------------------------------------------------------------------
/chapters/PreProcessing/dictionary_learning.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     字典学习
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | from sklearn.decomposition import DictionaryLearning
12 | 
13 | def test_DictionaryLearning():
14 |     '''
15 |     测试 DictionaryLearning 的用法
16 | 
17 |     :return: None
18 |     '''
19 |     X=[[1,2,3,4,5],
20 |        [6,7,8,9,10],
21 |        [10,9,8,7,6,],
22 |        [5,4,3,2,1] ]
23 |     print("before transform:",X)
24 |     dct=DictionaryLearning(n_components=3)
25 |     dct.fit(X)
26 |     print("components is :",dct.components_)
27 |     print("after transform:",dct.transform(X))
28 | 
29 | if __name__=='__main__':
30 |     test_DictionaryLearning() # 调用 test_DictionaryLearning
31 | 


--------------------------------------------------------------------------------
/chapters/PreProcessing/feature_selection_bagging.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     包裹式特征选择
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.feature_selection import  RFE,RFECV
13 | from sklearn.svm import LinearSVC
14 | from sklearn.datasets import  load_iris
15 | from  sklearn import  cross_validation
16 | 
17 | def test_RFE():
18 |     '''
19 |     测试 RFE 的用法，其中目标特征数量为 2
20 | 
21 |     :return: None
22 |     '''
23 |     iris=load_iris()
24 |     X=iris.data
25 |     y=iris.target
26 |     estimator=LinearSVC()
27 |     selector=RFE(estimator=estimator,n_features_to_select=2)
28 |     selector.fit(X,y)
29 |     print("N_features %s"%selector.n_features_)
30 |     print("Support is %s"%selector.support_)
31 |     print("Ranking %s"%selector.ranking_)
32 | def test_RFECV():
33 |     '''
34 |     测试 RFECV 的用法
35 | 
36 |     :return:  None
37 |     '''
38 |     iris=load_iris()
39 |     X=iris.data
40 |     y=iris.target
41 |     estimator=LinearSVC()
42 |     selector=RFECV(estimator=estimator,cv=3)
43 |     selector.fit(X,y)
44 |     print("N_features %s"%selector.n_features_)
45 |     print("Support is %s"%selector.support_)
46 |     print("Ranking %s"%selector.ranking_)
47 |     print("Grid Scores %s"%selector.grid_scores_)
48 | def test_compare_with_no_feature_selection():
49 |     '''
50 |     比较经过特征选择和未经特征选择的数据集，对 LinearSVC 的预测性能的区别
51 | 
52 |     :return: None
53 |     '''
54 |     ### 加载数据
55 |     iris=load_iris()
56 |     X,y=iris.data,iris.target
57 |     ### 特征提取
58 |     estimator=LinearSVC()
59 |     selector=RFE(estimator=estimator,n_features_to_select=2)
60 |     X_t=selector.fit_transform(X,y)
61 |     #### 切分测试集与验证集
62 |     X_train,X_test,y_train,y_test=cross_validation.train_test_split(X, y,
63 |                 test_size=0.25,random_state=0,stratify=y)
64 |     X_train_t,X_test_t,y_train_t,y_test_t=cross_validation.train_test_split(X_t, y,
65 |                 test_size=0.25,random_state=0,stratify=y)
66 |     ### 测试与验证
67 |     clf=LinearSVC()
68 |     clf_t=LinearSVC()
69 |     clf.fit(X_train,y_train)
70 |     clf_t.fit(X_train_t,y_train_t)
71 |     print("Original DataSet: test score=%s"%(clf.score(X_test,y_test)))
72 |     print("Selected DataSet: test score=%s"%(clf_t.score(X_test_t,y_test_t)))
73 | if __name__=='__main__':
74 |     test_RFE() # 调用 test_RFE
75 |     test_compare_with_no_feature_selection() # 调用 test_compare_with_no_feature_selection
76 |     test_RFECV() # 调用 test_RFECV


--------------------------------------------------------------------------------
/chapters/PreProcessing/feature_selection_embeded.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     嵌入式特征选择
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.feature_selection import  SelectFromModel
13 | from sklearn.svm import LinearSVC
14 | from sklearn.datasets import  load_digits,load_diabetes
15 | import numpy as np
16 | import  matplotlib.pyplot as plt
17 | from sklearn.linear_model import Lasso
18 | 
19 | def test_SelectFromModel():
20 |     '''
21 |     测试 SelectFromModel 的用法。
22 | 
23 |     :return: None
24 |     '''
25 |     digits=load_digits()
26 |     X=digits.data
27 |     y=digits.target
28 |     estimator=LinearSVC(penalty='l1',dual=False)
29 |     selector=SelectFromModel(estimator=estimator,threshold='mean')
30 |     selector.fit(X,y)
31 |     selector.transform(X)
32 |     print("Threshold %s"%selector.threshold_)
33 |     print("Support is %s"%selector.get_support(indices=True))
34 | def test_Lasso(*data):
35 |     '''
36 |     测试 alpha 与稀疏性的关系
37 | 
38 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
39 |     :return: None
40 |     '''
41 |     X,y=data
42 |     alphas=np.logspace(-2,2)
43 |     zeros=[]
44 |     for alpha in alphas:
45 |         regr=Lasso(alpha=alpha)
46 |         regr.fit(X,y)
47 |         ### 计算零的个数 ###
48 |         num=0
49 |         for ele in regr.coef_:
50 |             if abs(ele) < 1e-5:num+=1
51 |         zeros.append(num)
52 |     ##### 绘图
53 |     fig=plt.figure()
54 |     ax=fig.add_subplot(1,1,1)
55 |     ax.plot(alphas,zeros)
56 |     ax.set_xlabel(r"$\alpha$")
57 |     ax.set_xscale("log")
58 |     ax.set_ylim(0,X.shape[1]+1)
59 |     ax.set_ylabel("zeros in coef")
60 |     ax.set_title("Sparsity In Lasso")
61 |     plt.show()
62 | def test_LinearSVC(*data):
63 |     '''
64 |     测试 C  与 稀疏性的关系
65 | 
66 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
67 |     :return: None
68 |     '''
69 |     X,y=data
70 |     Cs=np.logspace(-2,2)
71 |     zeros=[]
72 |     for C in Cs:
73 |         clf=LinearSVC(C=C,penalty='l1',dual=False)
74 |         clf.fit(X,y)
75 |          ### 计算零的个数 ###
76 |         num=0
77 |         for row in clf.coef_:
78 |             for ele in row:
79 |                 if abs(ele) < 1e-5:num+=1
80 |         zeros.append(num)
81 |     ##### 绘图
82 |     fig=plt.figure()
83 |     ax=fig.add_subplot(1,1,1)
84 |     ax.plot(Cs,zeros)
85 |     ax.set_xlabel("C")
86 |     ax.set_xscale("log")
87 |     ax.set_ylabel("zeros in coef")
88 |     ax.set_title("Sparsity In SVM")
89 |     plt.show()
90 | if __name__=='__main__':
91 |     test_SelectFromModel() # 调用 test_SelectFromModel
92 |     # data=load_diabetes() # 生成用于回归问题的数据集
93 |     # test_Lasso(data.data,data.target) # 调用 test_Lasso
94 |     # data=load_digits() # 生成用于分类问题的数据集
95 |     # test_LinearSVC(data.data,data.target) # 调用 test_LinearSVC


--------------------------------------------------------------------------------
/chapters/PreProcessing/feature_selection_filter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     过滤式特征选择
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.feature_selection import  VarianceThreshold,SelectKBest,f_classif
13 | 
14 | def test_VarianceThreshold():
15 |     '''
16 |     测试 VarianceThreshold  的用法
17 | 
18 |     :return:  None
19 |     '''
20 |     X=[[100,1,2,3],
21 |        [100,4,5,6],
22 |        [100,7,8,9],
23 |        [101,11,12,13]]
24 |     selector=VarianceThreshold(1)
25 |     selector.fit(X)
26 |     print("Variances is %s"%selector.variances_)
27 |     print("After transform is %s"%selector.transform(X))
28 |     print("The surport is %s"%selector.get_support(True))
29 |     print("After reverse transform is %s"%
30 |             selector.inverse_transform(selector.transform(X)))
31 | def test_SelectKBest():
32 |     '''
33 |     测试 SelectKBest  的用法，其中考察的特征指标是 f_classif
34 | 
35 |     :return:  None
36 |     '''
37 |     X=[   [1,2,3,4,5],
38 |           [5,4,3,2,1],
39 |           [3,3,3,3,3,],
40 |           [1,1,1,1,1] ]
41 |     y=[0,1,0,1]
42 |     print("before transform:",X)
43 |     selector=SelectKBest(score_func=f_classif,k=3)
44 |     selector.fit(X,y)
45 |     print("scores_:",selector.scores_)
46 |     print("pvalues_:",selector.pvalues_)
47 |     print("selected index:",selector.get_support(True))
48 |     print("after transform:",selector.transform(X))
49 | if __name__=='__main__':
50 |     test_VarianceThreshold() # 调用 test_VarianceThreshold
51 |     # test_SelectKBest() # 调用 test_SelectKBest


--------------------------------------------------------------------------------
/chapters/PreProcessing/normalize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     数据正则化
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.preprocessing import Normalizer
13 | def test_Normalizer():
14 |     '''
15 |     测试 Normalizer 的用法
16 | 
17 |     :return: None
18 |     '''
19 |     X=[   [1,2,3,4,5],
20 |           [5,4,3,2,1],
21 |           [1,3,5,2,4,],
22 |           [2,4,1,3,5] ]
23 |     print("before transform:",X)
24 |     normalizer=Normalizer(norm='l2')
25 |     print("after transform:",normalizer.transform(X))
26 | 
27 | if __name__=='__main__':
28 |     test_Normalizer() # 调用 test_Normalizer


--------------------------------------------------------------------------------
/chapters/PreProcessing/onehot_encode.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     独热码编码
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.preprocessing import OneHotEncoder
13 | def test_OneHotEncoder():
14 |     '''
15 |     测试 OneHotEncoder 的用法
16 | 
17 |     :return: None
18 |     '''
19 |     X=[   [1,2,3,4,5],
20 |           [5,4,3,2,1],
21 |           [3,3,3,3,3,],
22 |           [1,1,1,1,1] ]
23 |     print("before transform:",X)
24 |     encoder=OneHotEncoder(sparse=False)
25 |     encoder.fit(X)
26 |     print("active_features_:",encoder.active_features_)
27 |     print("feature_indices_:",encoder.feature_indices_)
28 |     print("n_values_:",encoder.n_values_)
29 |     print("after transform:",encoder.transform( [[1,2,3,4,5]]))
30 | if __name__=='__main__':
31 |     test_OneHotEncoder() # 调用 test_OneHotEncoder


--------------------------------------------------------------------------------
/chapters/PreProcessing/pipeline.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     流水线
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.svm import  LinearSVC
13 | from sklearn.datasets import  load_digits
14 | from sklearn import  cross_validation
15 | from sklearn.linear_model import LogisticRegression
16 | from  sklearn.pipeline import Pipeline
17 | def test_Pipeline(data):
18 |     '''
19 |     测试 Pipeline 的用法
20 | 
21 |     :param data:  一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
22 |     :return: None
23 |     '''
24 |     X_train,X_test,y_train,y_test=data
25 |     steps=[("Linear_SVM",LinearSVC(C=1,penalty='l1',dual=False)),
26 |            ("LogisticRegression",LogisticRegression(C=1))]
27 |     pipeline=Pipeline(steps)
28 |     pipeline.fit(X_train,y_train)
29 |     print("Named steps:",pipeline.named_steps)
30 |     print("Pipeline Score:",pipeline.score(X_test,y_test))
31 | if __name__=='__main__':
32 |     data=load_digits() # 生成用于分类问题的数据集
33 |     test_Pipeline(cross_validation.train_test_split(data.data, data.target,test_size=0.25
34 | 			,random_state=0,stratify=data.target)) # 调用 test_Pipeline
35 | 


--------------------------------------------------------------------------------
/chapters/PreProcessing/standardize.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     数据预处理
 4 |     ~~~~~~~~~~~~~~~~
 5 | 
 6 |     数据标准化
 7 | 
 8 |     :copyright: (c) 2016 by the huaxz1986.
 9 |     :license: lgpl-3.0, see LICENSE for more details.
10 | """
11 | 
12 | from sklearn.preprocessing import MinMaxScaler,MaxAbsScaler,StandardScaler
13 | 
14 | def test_MinMaxScaler():
15 |     '''
16 |     测试 MinMaxScaler 的用法
17 | 
18 |     :return: None
19 |     '''
20 |     X=[   [1,5,1,2,10],
21 |       [2,6,3,2,7],
22 |       [3,7,5,6,4,],
23 |       [4,8,7,8,1] ]
24 |     print("before transform:",X)
25 |     scaler=MinMaxScaler(feature_range=(0,2))
26 |     scaler.fit(X)
27 |     print("min_ is :",scaler.min_)
28 |     print("scale_ is :",scaler.scale_)
29 |     print("data_max_ is :",scaler.data_max_)
30 |     print("data_min_ is :",scaler.data_min_)
31 |     print("data_range_ is :",scaler.data_range_)
32 |     print("after transform:",scaler.transform(X))
33 | def test_MaxAbsScaler():
34 |     '''
35 |     测试 MaxAbsScaler 的用法
36 | 
37 |     :return: None
38 |     '''
39 |     X=[   [1,5,1,2,10],
40 |       [2,6,3,2,7],
41 |       [3,7,5,6,4,],
42 |       [4,8,7,8,1] ]
43 |     print("before transform:",X)
44 |     scaler=MaxAbsScaler()
45 |     scaler.fit(X)
46 |     print("scale_ is :",scaler.scale_)
47 |     print("max_abs_ is :",scaler.max_abs_)
48 |     print("after transform:",scaler.transform(X))
49 | def test_StandardScaler():
50 |     '''
51 |     测试 StandardScaler 的用法
52 | 
53 |     :return: None
54 |     '''
55 |     X=[   [1,5,1,2,10],
56 |       [2,6,3,2,7],
57 |       [3,7,5,6,4,],
58 |       [4,8,7,8,1] ]
59 |     print("before transform:",X)
60 |     scaler=StandardScaler()
61 |     scaler.fit(X)
62 |     print("scale_ is :",scaler.scale_)
63 |     print("mean_ is :",scaler.mean_)
64 |     print("var_ is :",scaler.var_)
65 |     print("after transform:",scaler.transform(X))
66 | 
67 | if __name__=='__main__':
68 |     test_MinMaxScaler()  # 调用 test_MinMaxScaler
69 |     # test_MaxAbsScaler()  # 调用 test_MaxAbsScaler
70 |     # test_MaxAbsScaler()  # 调用 test_MaxAbsScaler


--------------------------------------------------------------------------------
/chapters/SVM/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__init__.py


--------------------------------------------------------------------------------
/chapters/SVM/__pycache__/SVC.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/SVC.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/SVM/__pycache__/SVR.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/SVR.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/SVM/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/SVM/__pycache__/linearSVC.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/linearSVC.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/SVM/__pycache__/linearSVR.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/linearSVR.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/SVM/linearSVC.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     支持向量机
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     LinearSVC
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | from sklearn import datasets, linear_model,cross_validation,svm
 14 | 
 15 | def load_data_classfication():
 16 |     '''
 17 |     加载用于分类问题的数据集
 18 | 
 19 |     :return: 一个元组，用于分类问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记
 20 |     '''
 21 |     iris=datasets.load_iris() # 使用 scikit-learn 自带的 iris 数据集
 22 |     X_train=iris.data
 23 |     y_train=iris.target
 24 |     return cross_validation.train_test_split(X_train, y_train,test_size=0.25,
 25 | 		random_state=0,stratify=y_train) # 分层采样拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 26 | 
 27 | def test_LinearSVC(*data):
 28 |     '''
 29 |     测试 LinearSVC 的用法
 30 | 
 31 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 32 |     :return:  None
 33 |     '''
 34 |     X_train,X_test,y_train,y_test=data
 35 |     cls=svm.LinearSVC()
 36 |     cls.fit(X_train,y_train)
 37 |     print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_))
 38 |     print('Score: %.2f' % cls.score(X_test, y_test))
 39 | def test_LinearSVC_loss(*data):
 40 |     '''
 41 |     测试 LinearSVC 的预测性能随损失函数的影响
 42 | 
 43 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 44 |     :return:  None
 45 |     '''
 46 |     X_train,X_test,y_train,y_test=data
 47 |     losses=['hinge','squared_hinge']
 48 |     for loss in losses:
 49 |         cls=svm.LinearSVC(loss=loss)
 50 |         cls.fit(X_train,y_train)
 51 |         print("Loss:%f"%loss)
 52 |         print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_))
 53 |         print('Score: %.2f' % cls.score(X_test, y_test))
 54 | def test_LinearSVC_L12(*data):
 55 |     '''
 56 |     测试 LinearSVC 的预测性能随正则化形式的影响
 57 | 
 58 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 59 |     :return:  None
 60 |     '''
 61 |     X_train,X_test,y_train,y_test=data
 62 |     L12=['l1','l2']
 63 |     for p in L12:
 64 |         cls=svm.LinearSVC(penalty=p,dual=False)
 65 |         cls.fit(X_train,y_train)
 66 |         print("penalty:%s"%p)
 67 |         print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_))
 68 |         print('Score: %.2f' % cls.score(X_test, y_test))
 69 | def test_LinearSVC_C(*data):
 70 |     '''
 71 |     测试 LinearSVC 的预测性能随参数 C 的影响
 72 | 
 73 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的标记、测试样本的标记
 74 |     :return:   None
 75 |     '''
 76 |     X_train,X_test,y_train,y_test=data
 77 |     Cs=np.logspace(-2,1)
 78 |     train_scores=[]
 79 |     test_scores=[]
 80 |     for C in Cs:
 81 |         cls=svm.LinearSVC(C=C)
 82 |         cls.fit(X_train,y_train)
 83 |         train_scores.append(cls.score(X_train,y_train))
 84 |         test_scores.append(cls.score(X_test,y_test))
 85 | 
 86 |     ## 绘图
 87 |     fig=plt.figure()
 88 |     ax=fig.add_subplot(1,1,1)
 89 |     ax.plot(Cs,train_scores,label="Traing score")
 90 |     ax.plot(Cs,test_scores,label="Testing score")
 91 |     ax.set_xlabel(r"C")
 92 |     ax.set_ylabel(r"score")
 93 |     ax.set_xscale('log')
 94 |     ax.set_title("LinearSVC")
 95 |     ax.legend(loc='best')
 96 |     plt.show()
 97 | if __name__=="__main__":
 98 |     X_train,X_test,y_train,y_test=load_data_classfication() # 生成用于分类的数据集
 99 |     test_LinearSVC(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC
100 |     # test_LinearSVC_loss(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC_loss
101 |     # test_LinearSVC_L12(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC_L12
102 |     # test_LinearSVC_C(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC_C


--------------------------------------------------------------------------------
/chapters/SVM/linearSVR.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     支持向量机
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     LinearSVR
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import matplotlib.pyplot as plt
 12 | import numpy as np
 13 | from sklearn import datasets, linear_model,cross_validation,svm
 14 | def load_data_regression():
 15 |     '''
 16 |     加载用于回归问题的数据集
 17 | 
 18 |     :return: 一个元组，用于回归问题。元组元素依次为：训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值
 19 |     '''
 20 |     diabetes = datasets.load_diabetes() #使用 scikit-learn 自带的一个糖尿病病人的数据集
 21 |     return cross_validation.train_test_split(diabetes.data,diabetes.target,
 22 | 		test_size=0.25,random_state=0)# 拆分成训练集和测试集，测试集大小为原始数据集大小的 1/4
 23 | 
 24 | def test_LinearSVR(*data):
 25 |     '''
 26 |     测试 LinearSVR 的用法
 27 | 
 28 |     :param data: 可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 29 |     :return: None
 30 |     '''
 31 |     X_train,X_test,y_train,y_test=data
 32 |     regr=svm.LinearSVR()
 33 |     regr.fit(X_train,y_train)
 34 |     print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_))
 35 |     print('Score: %.2f' % regr.score(X_test, y_test))
 36 | def test_LinearSVR_loss(*data):
 37 |     '''
 38 |    测试 LinearSVR 的预测性能随不同损失函数的影响
 39 | 
 40 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 41 |     :return:
 42 |     '''
 43 |     X_train,X_test,y_train,y_test=data
 44 |     losses=['epsilon_insensitive','squared_epsilon_insensitive']
 45 |     for loss in losses:
 46 |         regr=svm.LinearSVR(loss=loss)
 47 |         regr.fit(X_train,y_train)
 48 |         print("loss：%s"%loss)
 49 |         print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_))
 50 |         print('Score: %.2f' % regr.score(X_test, y_test))
 51 | def test_LinearSVR_epsilon(*data):
 52 |     '''
 53 |     测试 LinearSVR 的预测性能随 epsilon 参数的影响
 54 | 
 55 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 56 |     :return: None
 57 |     '''
 58 |     X_train,X_test,y_train,y_test=data
 59 |     epsilons=np.logspace(-2,2)
 60 |     train_scores=[]
 61 |     test_scores=[]
 62 |     for  epsilon in  epsilons:
 63 |         regr=svm.LinearSVR(epsilon=epsilon,loss='squared_epsilon_insensitive')
 64 |         regr.fit(X_train,y_train)
 65 |         train_scores.append(regr.score(X_train, y_train))
 66 |         test_scores.append(regr.score(X_test, y_test))
 67 |     fig=plt.figure()
 68 |     ax=fig.add_subplot(1,1,1)
 69 |     ax.plot(epsilons,train_scores,label="Training score ",marker='+' )
 70 |     ax.plot(epsilons,test_scores,label= " Testing  score ",marker='o' )
 71 |     ax.set_title( "LinearSVR_epsilon ")
 72 |     ax.set_xscale("log")
 73 |     ax.set_xlabel(r"$\epsilon$")
 74 |     ax.set_ylabel("score")
 75 |     ax.set_ylim(-1,1.05)
 76 |     ax.legend(loc="best",framealpha=0.5)
 77 |     plt.show()
 78 | def test_LinearSVR_C(*data):
 79 |     '''
 80 |     测试 LinearSVR 的预测性能随 C 参数的影响
 81 | 
 82 |     :param data:  可变参数。它是一个元组，这里要求其元素依次为：训练样本集、测试样本集、训练样本的值、测试样本的值
 83 |     :return: None
 84 |     '''
 85 |     X_train,X_test,y_train,y_test=data
 86 |     Cs=np.logspace(-1,2)
 87 |     train_scores=[]
 88 |     test_scores=[]
 89 |     for  C in  Cs:
 90 |         regr=svm.LinearSVR(epsilon=0.1,loss='squared_epsilon_insensitive',C=C)
 91 |         regr.fit(X_train,y_train)
 92 |         train_scores.append(regr.score(X_train, y_train))
 93 |         test_scores.append(regr.score(X_test, y_test))
 94 |     fig=plt.figure()
 95 |     ax=fig.add_subplot(1,1,1)
 96 |     ax.plot(Cs,train_scores,label="Training score ",marker='+' )
 97 |     ax.plot(Cs,test_scores,label= " Testing  score ",marker='o' )
 98 |     ax.set_title( "LinearSVR_C ")
 99 |     ax.set_xscale("log")
100 |     ax.set_xlabel(r"C")
101 |     ax.set_ylabel("score")
102 |     ax.set_ylim(-1,1.05)
103 |     ax.legend(loc="best",framealpha=0.5)
104 |     plt.show()
105 | if __name__=="__main__":
106 |     X_train,X_test,y_train,y_test=load_data_regression() # 生成用于回归问题的数据集
107 |     test_LinearSVR(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR
108 |     # test_LinearSVR_loss(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR_loss
109 |     # test_LinearSVR_epsilon(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR_epsilon
110 |     # test_LinearSVR_C(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR_C


--------------------------------------------------------------------------------
/chapters/Semi_Supervised_Learning/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__init__.py


--------------------------------------------------------------------------------
/chapters/Semi_Supervised_Learning/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Semi_Supervised_Learning/__pycache__/labelPropagation.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__pycache__/labelPropagation.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Semi_Supervised_Learning/__pycache__/labelSpreading.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__pycache__/labelSpreading.cpython-35.pyc


--------------------------------------------------------------------------------
/chapters/Semi_Supervised_Learning/labelPropagation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     半监督学习
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     LabelPropagation
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | from sklearn import  metrics
 14 | from sklearn import datasets
 15 | from sklearn.semi_supervised import LabelPropagation
 16 | 
 17 | def load_data():
 18 |     '''
 19 |     加载数据集
 20 | 
 21 |     :return: 一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 22 |     '''
 23 |     digits = datasets.load_digits()
 24 |     ######   混洗样本　########
 25 |     rng = np.random.RandomState(0)
 26 |     indices = np.arange(len(digits.data)) # 样本下标集合
 27 |     rng.shuffle(indices) # 混洗样本下标集合
 28 |     X = digits.data[indices]
 29 |     y = digits.target[indices]
 30 |     ###### 生成未标记样本的下标集合 ####
 31 |     n_labeled_points = int(len(y)/10) # 只有 10% 的样本有标记
 32 |     unlabeled_indices = np.arange(len(y))[n_labeled_points:] # 后面 90% 的样本未标记
 33 | 
 34 |     return X,y,unlabeled_indices
 35 | 
 36 | def test_LabelPropagation(*data):
 37 |     '''
 38 |     测试 LabelPropagation 的用法
 39 | 
 40 |     :param data: 一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 41 |     :return: None
 42 |     '''
 43 |     X,y,unlabeled_indices=data
 44 |     y_train=np.copy(y) # 必须拷贝，后面要用到 y
 45 |     y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1
 46 |     clf=LabelPropagation(max_iter=100,kernel='rbf',gamma=0.1)
 47 |     clf.fit(X,y_train)
 48 |     ### 获取预测准确率
 49 |     predicted_labels = clf.transduction_[unlabeled_indices] # 预测标记
 50 |     true_labels = y[unlabeled_indices] # 真实标记
 51 |     print("Accuracy:%f"%metrics.accuracy_score(true_labels,predicted_labels))
 52 |     # 或者 print("Accuracy:%f"%clf.score(X[unlabeled_indices],true_labels))
 53 | def test_LabelPropagation_rbf(*data):
 54 |     '''
 55 |     测试 LabelPropagation 的 rbf 核时，预测性能随 alpha 和 gamma 的变化
 56 | 
 57 |     :param data: 一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 58 |     :return: None
 59 |     '''
 60 |     X,y,unlabeled_indices=data
 61 |     y_train=np.copy(y) # 必须拷贝，后面要用到 y
 62 |     y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1
 63 | 
 64 |     fig=plt.figure()
 65 |     ax=fig.add_subplot(1,1,1)
 66 |     alphas=np.linspace(0.01,1,num=10,endpoint=True)
 67 |     gammas=np.logspace(-2,2,num=50)
 68 |     colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
 69 |         (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合，不同曲线用不同颜色
 70 |     ## 训练并绘图
 71 |     for alpha,color in zip(alphas,colors):
 72 |         scores=[]
 73 |         for gamma in gammas:
 74 |             clf=LabelPropagation(max_iter=100,gamma=gamma,alpha=alpha,kernel='rbf')
 75 |             clf.fit(X,y_train)
 76 |             scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices]))
 77 |         ax.plot(gammas,scores,label=r"$\alpha=%s$"%alpha,color=color)
 78 | 
 79 |     ### 设置图形
 80 |     ax.set_xlabel(r"$\gamma$")
 81 |     ax.set_ylabel("score")
 82 |     ax.set_xscale("log")
 83 |     ax.legend(loc="best")
 84 |     ax.set_title("LabelPropagation rbf kernel")
 85 |     plt.show()
 86 | def test_LabelPropagation_knn(*data):
 87 |     '''
 88 |    测试 LabelPropagation 的 knn 核时，预测性能随 alpha 和 n_neighbors 的变化
 89 | 
 90 |     :param data:  一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 91 |     :return:  None
 92 |     '''
 93 |     X,y,unlabeled_indices=data
 94 |     y_train=np.copy(y) # 必须拷贝，后面要用到 y
 95 |     y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1
 96 | 
 97 |     fig=plt.figure()
 98 |     ax=fig.add_subplot(1,1,1)
 99 |     alphas=np.linspace(0.01,1,num=10,endpoint=True)
100 |     Ks=[1,2,3,4,5,8,10,15,20,25,30,35,40,50]
101 |     colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
102 |         (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合，不同曲线用不同颜色
103 |     ## 训练并绘图
104 |     for alpha,color in zip(alphas,colors):
105 |         scores=[]
106 |         for K in Ks:
107 |             clf=LabelPropagation(max_iter=100,n_neighbors=K,alpha=alpha,kernel='knn')
108 |             clf.fit(X,y_train)
109 |             scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices]))
110 |         ax.plot(Ks,scores,label=r"$\alpha=%s$"%alpha,color=color)
111 | 
112 |     ### 设置图形
113 |     ax.set_xlabel(r"$k$")
114 |     ax.set_ylabel("score")
115 |     ax.legend(loc="best")
116 |     ax.set_title("LabelPropagation knn kernel")
117 |     plt.show()
118 | if __name__=='__main__':
119 |     data=load_data() # 获取半监督分类数据集
120 |     #test_LabelPropagation(*data) # 调用 test_LabelPropagation
121 |     #test_LabelPropagation_rbf(*data)# 调用 test_LabelPropagation_rbf
122 |     test_LabelPropagation_knn(*data)# 调用 test_LabelPropagation_knn


--------------------------------------------------------------------------------
/chapters/Semi_Supervised_Learning/labelSpreading.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 |     半监督学习
  4 |     ~~~~~~~~~~~~~~~~
  5 | 
  6 |     LabelSpreading
  7 | 
  8 |     :copyright: (c) 2016 by the huaxz1986.
  9 |     :license: lgpl-3.0, see LICENSE for more details.
 10 | """
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | from sklearn import  metrics
 14 | from sklearn import datasets
 15 | from sklearn.semi_supervised.label_propagation import LabelSpreading
 16 | 
 17 | def load_data():
 18 |     '''
 19 |     加载数据集
 20 | 
 21 |     :return: 一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 22 |     '''
 23 |     digits = datasets.load_digits()
 24 |     ######   混洗样本　########
 25 |     rng = np.random.RandomState(0)
 26 |     indices = np.arange(len(digits.data)) # 样本下标集合
 27 |     rng.shuffle(indices) # 混洗样本下标集合
 28 |     X = digits.data[indices]
 29 |     y = digits.target[indices]
 30 |     ###### 生成未标记样本的下标集合 ####
 31 |     n_labeled_points = int(len(y)/10) # 只有 10% 的样本有标记
 32 |     unlabeled_indices = np.arange(len(y))[n_labeled_points:] # 后面 90% 的样本未标记
 33 | 
 34 |     return X,y,unlabeled_indices
 35 | 
 36 | def test_LabelSpreading(*data):
 37 |     '''
 38 |     测试 LabelSpreading 的用法
 39 | 
 40 |     :param data: 一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 41 |     :return: None
 42 |     '''
 43 |     X,y,unlabeled_indices=data
 44 |     y_train=np.copy(y) # 必须拷贝，后面要用到 y
 45 |     y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1
 46 |     clf=LabelSpreading(max_iter=100,kernel='rbf',gamma=0.1)
 47 |     clf.fit(X,y_train)
 48 |     ### 获取预测准确率
 49 |     predicted_labels = clf.transduction_[unlabeled_indices] # 预测标记
 50 |     true_labels = y[unlabeled_indices] # 真实标记
 51 |     print("Accuracy:%f"%metrics.accuracy_score(true_labels,predicted_labels))
 52 |     # 或者 print("Accuracy:%f"%clf.score(X[unlabeled_indices],true_labels))
 53 | def test_LabelSpreading_rbf(*data):
 54 |     '''
 55 |     测试 LabelSpreading 的 rbf 核时，预测性能随 alpha 和 gamma 的变化
 56 | 
 57 |     :param data: 一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 58 |     :return: None
 59 |     '''
 60 |     X,y,unlabeled_indices=data
 61 |     y_train=np.copy(y) # 必须拷贝，后面要用到 y
 62 |     y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1
 63 | 
 64 |     fig=plt.figure()
 65 |     ax=fig.add_subplot(1,1,1)
 66 |     alphas=np.linspace(0.01,1,num=10,endpoint=True)
 67 |     gammas=np.logspace(-2,2,num=50)
 68 |     colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
 69 |         (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合，不同曲线用不同颜色
 70 |     ## 训练并绘图
 71 |     for alpha,color in zip(alphas,colors):
 72 |         scores=[]
 73 |         for gamma in gammas:
 74 |             clf=LabelSpreading(max_iter=100,gamma=gamma,alpha=alpha,kernel='rbf')
 75 |             clf.fit(X,y_train)
 76 |             scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices]))
 77 |         ax.plot(gammas,scores,label=r"$\alpha=%s$"%alpha,color=color)
 78 | 
 79 |     ### 设置图形
 80 |     ax.set_xlabel(r"$\gamma$")
 81 |     ax.set_ylabel("score")
 82 |     ax.set_xscale("log")
 83 |     ax.legend(loc="best")
 84 |     ax.set_title("LabelSpreading rbf kernel")
 85 |     plt.show()
 86 | def test_LabelSpreading_knn(*data):
 87 |     '''
 88 |    测试 LabelSpreading 的 knn 核时，预测性能随 alpha 和 n_neighbors 的变化
 89 | 
 90 | 
 91 |     :param data:  一个元组，依次为： 样本集合、样本标记集合、 未标记样本的下标集合
 92 |     :return:  None
 93 |     '''
 94 |     X,y,unlabeled_indices=data
 95 |     y_train=np.copy(y) # 必须拷贝，后面要用到 y
 96 |     y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1
 97 | 
 98 |     fig=plt.figure()
 99 |     ax=fig.add_subplot(1,1,1)
100 |     alphas=np.linspace(0.01,1,num=10,endpoint=True)
101 |     Ks=[1,2,3,4,5,8,10,15,20,25,30,35,40,50]
102 |     colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5),
103 |         (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合，不同曲线用不同颜色
104 |     ## 训练并绘图
105 |     for alpha,color in zip(alphas,colors):
106 |         scores=[]
107 |         for K in Ks:
108 |             clf=LabelSpreading(kernel='knn',max_iter=100,n_neighbors=K,alpha=alpha)
109 |             clf.fit(X,y_train)
110 |             scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices]))
111 |         ax.plot(Ks,scores,label=r"$\alpha=%s$"%alpha,color=color)
112 | 
113 |     ### 设置图形
114 |     ax.set_xlabel(r"$k$")
115 |     ax.set_ylabel("score")
116 |     ax.legend(loc="best")
117 |     ax.set_title("LabelSpreading knn kernel")
118 |     plt.show()
119 | if __name__=='__main__':
120 |     data=load_data() # 获取半监督分类数据集
121 |     #test_LabelSpreading(*data) # 调用 test_LabelSpreading
122 |     #test_LabelSpreading_rbf(*data)# 调用 test_LabelSpreading_rbf
123 |     test_LabelSpreading_knn(*data)# 调用 test_LabelSpreading_knn
124 | 
125 | 


--------------------------------------------------------------------------------
/chapters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/__init__.py


--------------------------------------------------------------------------------
/chapters/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Bayesian.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Bayesian.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Cluster_EM.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Cluster_EM.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Decision_Tree.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Decision_Tree.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Ensemble.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Ensemble.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.KNN_Dimension_Reduction.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.KNN_Dimension_Reduction.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Kaggle.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Kaggle.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Linear.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Linear.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Model_Selection.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Model_Selection.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Perceptron_Neural_Network.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Perceptron_Neural_Network.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.PreProcessing.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.PreProcessing.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.SVM.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.SVM.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.Semi_Supervised_Learning.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Semi_Supervised_Learning.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/chapters.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/modules.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/modules.doctree


--------------------------------------------------------------------------------
/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 3b6ff7cf1b72d4e2c1632fab2716f079
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Bayesian.txt:
--------------------------------------------------------------------------------
 1 | chapters.Bayesian package
 2 | =========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Bayesian.bayesian module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: chapters.Bayesian.bayesian
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Bayesian.bernoulliNB module
16 | ------------------------------------
17 | 
18 | .. automodule:: chapters.Bayesian.bernoulliNB
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Bayesian.gaussianNB module
24 | -----------------------------------
25 | 
26 | .. automodule:: chapters.Bayesian.gaussianNB
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Bayesian.multinomialNB module
32 | --------------------------------------
33 | 
34 | .. automodule:: chapters.Bayesian.multinomialNB
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: chapters.Bayesian
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Cluster_EM.txt:
--------------------------------------------------------------------------------
 1 | chapters.Cluster_EM package
 2 | ===========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Cluster_EM.agglomerative_clustering module
 8 | ---------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Cluster_EM.agglomerative_clustering
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Cluster_EM.cluster module
16 | ----------------------------------
17 | 
18 | .. automodule:: chapters.Cluster_EM.cluster
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Cluster_EM.dbscan module
24 | ---------------------------------
25 | 
26 | .. automodule:: chapters.Cluster_EM.dbscan
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Cluster_EM.gmm module
32 | ------------------------------
33 | 
34 | .. automodule:: chapters.Cluster_EM.gmm
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Cluster_EM.kmeans module
40 | ---------------------------------
41 | 
42 | .. automodule:: chapters.Cluster_EM.kmeans
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: chapters.Cluster_EM
52 |     :members:
53 |     :undoc-members:
54 |     :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Decision_Tree.txt:
--------------------------------------------------------------------------------
 1 | chapters.Decision_Tree package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Decision_Tree.decisiontree_classifier module
 8 | -----------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Decision_Tree.decisiontree_classifier
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Decision_Tree.decisiontree_regressor module
16 | ----------------------------------------------------
17 | 
18 | .. automodule:: chapters.Decision_Tree.decisiontree_regressor
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: chapters.Decision_Tree
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Ensemble.txt:
--------------------------------------------------------------------------------
 1 | chapters.Ensemble package
 2 | =========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Ensemble.adaboost_classifier module
 8 | --------------------------------------------
 9 | 
10 | .. automodule:: chapters.Ensemble.adaboost_classifier
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Ensemble.adaboost_regressor module
16 | -------------------------------------------
17 | 
18 | .. automodule:: chapters.Ensemble.adaboost_regressor
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Ensemble.gradientboosting_classifier module
24 | ----------------------------------------------------
25 | 
26 | .. automodule:: chapters.Ensemble.gradientboosting_classifier
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Ensemble.gradientboosting_regressor module
32 | ---------------------------------------------------
33 | 
34 | .. automodule:: chapters.Ensemble.gradientboosting_regressor
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Ensemble.randomforest_classifier module
40 | ------------------------------------------------
41 | 
42 | .. automodule:: chapters.Ensemble.randomforest_classifier
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.Ensemble.randomforest_regressor module
48 | -----------------------------------------------
49 | 
50 | .. automodule:: chapters.Ensemble.randomforest_regressor
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | 
56 | Module contents
57 | ---------------
58 | 
59 | .. automodule:: chapters.Ensemble
60 |     :members:
61 |     :undoc-members:
62 |     :show-inheritance:
63 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.KNN_Dimension_Reduction.txt:
--------------------------------------------------------------------------------
 1 | chapters.KNN_Dimension_Reduction package
 2 | ========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.KNN_Dimension_Reduction.isomap module
 8 | ----------------------------------------------
 9 | 
10 | .. automodule:: chapters.KNN_Dimension_Reduction.isomap
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.KNN_Dimension_Reduction.kneighbors_classifier module
16 | -------------------------------------------------------------
17 | 
18 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_classifier
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.KNN_Dimension_Reduction.kneighbors_regressor module
24 | ------------------------------------------------------------
25 | 
26 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_regressor
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.KNN_Dimension_Reduction.kpca module
32 | --------------------------------------------
33 | 
34 | .. automodule:: chapters.KNN_Dimension_Reduction.kpca
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.KNN_Dimension_Reduction.lle module
40 | -------------------------------------------
41 | 
42 | .. automodule:: chapters.KNN_Dimension_Reduction.lle
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.KNN_Dimension_Reduction.mds module
48 | -------------------------------------------
49 | 
50 | .. automodule:: chapters.KNN_Dimension_Reduction.mds
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | chapters.KNN_Dimension_Reduction.pca module
56 | -------------------------------------------
57 | 
58 | .. automodule:: chapters.KNN_Dimension_Reduction.pca
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | 
64 | Module contents
65 | ---------------
66 | 
67 | .. automodule:: chapters.KNN_Dimension_Reduction
68 |     :members:
69 |     :undoc-members:
70 |     :show-inheritance:
71 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Kaggle.txt:
--------------------------------------------------------------------------------
 1 | chapters.Kaggle package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Kaggle.data_clean module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: chapters.Kaggle.data_clean
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Kaggle.data_preprocess module
16 | --------------------------------------
17 | 
18 | .. automodule:: chapters.Kaggle.data_preprocess
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Kaggle.grid_search module
24 | ----------------------------------
25 | 
26 | .. automodule:: chapters.Kaggle.grid_search
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Kaggle.learning_validation_curve module
32 | ------------------------------------------------
33 | 
34 | .. automodule:: chapters.Kaggle.learning_validation_curve
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: chapters.Kaggle
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Linear.txt:
--------------------------------------------------------------------------------
 1 | chapters.Linear package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Linear.elasticnet module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: chapters.Linear.elasticnet
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Linear.lasso module
16 | ----------------------------
17 | 
18 | .. automodule:: chapters.Linear.lasso
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Linear.lda module
24 | --------------------------
25 | 
26 | .. automodule:: chapters.Linear.lda
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Linear.linear_regression module
32 | ----------------------------------------
33 | 
34 | .. automodule:: chapters.Linear.linear_regression
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Linear.logistic_regression module
40 | ------------------------------------------
41 | 
42 | .. automodule:: chapters.Linear.logistic_regression
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.Linear.ridge module
48 | ----------------------------
49 | 
50 | .. automodule:: chapters.Linear.ridge
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | 
56 | Module contents
57 | ---------------
58 | 
59 | .. automodule:: chapters.Linear
60 |     :members:
61 |     :undoc-members:
62 |     :show-inheritance:
63 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Model_Selection.txt:
--------------------------------------------------------------------------------
 1 | chapters.Model_Selection package
 2 | ================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Model_Selection.classification_metrics module
 8 | ------------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Model_Selection.classification_metrics
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Model_Selection.data_splittion module
16 | ----------------------------------------------
17 | 
18 | .. automodule:: chapters.Model_Selection.data_splittion
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Model_Selection.grid_search module
24 | -------------------------------------------
25 | 
26 | .. automodule:: chapters.Model_Selection.grid_search
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Model_Selection.learning_curve module
32 | ----------------------------------------------
33 | 
34 | .. automodule:: chapters.Model_Selection.learning_curve
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Model_Selection.loss_function module
40 | ---------------------------------------------
41 | 
42 | .. automodule:: chapters.Model_Selection.loss_function
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.Model_Selection.regression_metrics module
48 | --------------------------------------------------
49 | 
50 | .. automodule:: chapters.Model_Selection.regression_metrics
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | chapters.Model_Selection.validation_curve module
56 | ------------------------------------------------
57 | 
58 | .. automodule:: chapters.Model_Selection.validation_curve
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | 
64 | Module contents
65 | ---------------
66 | 
67 | .. automodule:: chapters.Model_Selection
68 |     :members:
69 |     :undoc-members:
70 |     :show-inheritance:
71 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Perceptron_Neural_Network.txt:
--------------------------------------------------------------------------------
 1 | chapters.Perceptron_Neural_Network package
 2 | ==========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Perceptron_Neural_Network.neural_network module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Perceptron_Neural_Network.neural_network_iris module
16 | -------------------------------------------------------------
17 | 
18 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network_iris
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Perceptron_Neural_Network.perceptron module
24 | ----------------------------------------------------
25 | 
26 | .. automodule:: chapters.Perceptron_Neural_Network.perceptron
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | 
32 | Module contents
33 | ---------------
34 | 
35 | .. automodule:: chapters.Perceptron_Neural_Network
36 |     :members:
37 |     :undoc-members:
38 |     :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.PreProcessing.txt:
--------------------------------------------------------------------------------
 1 | chapters.PreProcessing package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.PreProcessing.binarize module
 8 | --------------------------------------
 9 | 
10 | .. automodule:: chapters.PreProcessing.binarize
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.PreProcessing.dictionary_learning module
16 | -------------------------------------------------
17 | 
18 | .. automodule:: chapters.PreProcessing.dictionary_learning
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.PreProcessing.feature_selection_bagging module
24 | -------------------------------------------------------
25 | 
26 | .. automodule:: chapters.PreProcessing.feature_selection_bagging
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.PreProcessing.feature_selection_embeded module
32 | -------------------------------------------------------
33 | 
34 | .. automodule:: chapters.PreProcessing.feature_selection_embeded
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.PreProcessing.feature_selection_filter module
40 | ------------------------------------------------------
41 | 
42 | .. automodule:: chapters.PreProcessing.feature_selection_filter
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.PreProcessing.normalize module
48 | ---------------------------------------
49 | 
50 | .. automodule:: chapters.PreProcessing.normalize
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | chapters.PreProcessing.onehot_encode module
56 | -------------------------------------------
57 | 
58 | .. automodule:: chapters.PreProcessing.onehot_encode
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | chapters.PreProcessing.pipeline module
64 | --------------------------------------
65 | 
66 | .. automodule:: chapters.PreProcessing.pipeline
67 |     :members:
68 |     :undoc-members:
69 |     :show-inheritance:
70 | 
71 | chapters.PreProcessing.standardize module
72 | -----------------------------------------
73 | 
74 | .. automodule:: chapters.PreProcessing.standardize
75 |     :members:
76 |     :undoc-members:
77 |     :show-inheritance:
78 | 
79 | 
80 | Module contents
81 | ---------------
82 | 
83 | .. automodule:: chapters.PreProcessing
84 |     :members:
85 |     :undoc-members:
86 |     :show-inheritance:
87 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.SVM.txt:
--------------------------------------------------------------------------------
 1 | chapters.SVM package
 2 | ====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.SVM.SVC module
 8 | -----------------------
 9 | 
10 | .. automodule:: chapters.SVM.SVC
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.SVM.SVR module
16 | -----------------------
17 | 
18 | .. automodule:: chapters.SVM.SVR
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.SVM.linearSVC module
24 | -----------------------------
25 | 
26 | .. automodule:: chapters.SVM.linearSVC
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.SVM.linearSVR module
32 | -----------------------------
33 | 
34 | .. automodule:: chapters.SVM.linearSVR
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: chapters.SVM
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.Semi_Supervised_Learning.txt:
--------------------------------------------------------------------------------
 1 | chapters.Semi_Supervised_Learning package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Semi_Supervised_Learning.labelPropagation module
 8 | ---------------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Semi_Supervised_Learning.labelPropagation
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Semi_Supervised_Learning.labelSpreading module
16 | -------------------------------------------------------
17 | 
18 | .. automodule:: chapters.Semi_Supervised_Learning.labelSpreading
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: chapters.Semi_Supervised_Learning
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/chapters.txt:
--------------------------------------------------------------------------------
 1 | chapters package
 2 | ================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     chapters.Bayesian
10 |     chapters.Cluster_EM
11 |     chapters.Decision_Tree
12 |     chapters.Ensemble
13 |     chapters.KNN_Dimension_Reduction
14 |     chapters.Kaggle
15 |     chapters.Linear
16 |     chapters.Model_Selection
17 |     chapters.Perceptron_Neural_Network
18 |     chapters.PreProcessing
19 |     chapters.SVM
20 |     chapters.Semi_Supervised_Learning
21 | 
22 | Module contents
23 | ---------------
24 | 
25 | .. automodule:: chapters
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/index.txt:
--------------------------------------------------------------------------------
 1 | .. book documentation master file, created by
 2 |    sphinx-quickstart on Wed Aug 17 17:09:32 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to book's documentation!
 7 | ================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 | 
15 | 
16 | Indices and tables
17 | ==================
18 | 
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | * :ref:`search`
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/modules.txt:
--------------------------------------------------------------------------------
1 | chapters
2 | ========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    chapters
8 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/ajax-loader.gif


--------------------------------------------------------------------------------
/docs/build/html/_static/comment-bright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/comment-bright.png


--------------------------------------------------------------------------------
/docs/build/html/_static/comment-close.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/comment-close.png


--------------------------------------------------------------------------------
/docs/build/html/_static/comment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/comment.png


--------------------------------------------------------------------------------
/docs/build/html/_static/down-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/down-pressed.png


--------------------------------------------------------------------------------
/docs/build/html/_static/down.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/down.png


--------------------------------------------------------------------------------
/docs/build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/file.png


--------------------------------------------------------------------------------
/docs/build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/minus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/plus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
51 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
52 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
53 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
54 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
55 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
56 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
57 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
58 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
59 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
60 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
61 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
62 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
63 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
64 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
65 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/build/html/_static/up-pressed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/up-pressed.png


--------------------------------------------------------------------------------
/docs/build/html/_static/up.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/up.png


--------------------------------------------------------------------------------
/docs/build/html/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Welcome to book’s documentation! &mdash; book 1.0 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '1.0',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="_static/doctools.js"></script>
 26 |     <link rel="top" title="book 1.0 documentation" href="#" /> 
 27 |   </head>
 28 |   <body role="document">
 29 |     <div class="related" role="navigation" aria-label="related navigation">
 30 |       <h3>Navigation</h3>
 31 |       <ul>
 32 |         <li class="right" style="margin-right: 10px">
 33 |           <a href="genindex.html" title="General Index"
 34 |              accesskey="I">index</a></li>
 35 |         <li class="right" >
 36 |           <a href="py-modindex.html" title="Python Module Index"
 37 |              >modules</a> |</li>
 38 |         <li class="nav-item nav-item-0"><a href="#">book 1.0 documentation</a> &raquo;</li> 
 39 |       </ul>
 40 |     </div>  
 41 | 
 42 |     <div class="document">
 43 |       <div class="documentwrapper">
 44 |         <div class="bodywrapper">
 45 |           <div class="body" role="main">
 46 |             
 47 |   <div class="section" id="welcome-to-book-s-documentation">
 48 | <h1>Welcome to book&#8217;s documentation!<a class="headerlink" href="#welcome-to-book-s-documentation" title="Permalink to this headline">¶</a></h1>
 49 | <p>Contents:</p>
 50 | <div class="toctree-wrapper compound">
 51 | <ul class="simple">
 52 | </ul>
 53 | </div>
 54 | </div>
 55 | <div class="section" id="indices-and-tables">
 56 | <h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Permalink to this headline">¶</a></h1>
 57 | <ul class="simple">
 58 | <li><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></li>
 59 | <li><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></li>
 60 | <li><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></li>
 61 | </ul>
 62 | </div>
 63 | 
 64 | 
 65 |           </div>
 66 |         </div>
 67 |       </div>
 68 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 69 |         <div class="sphinxsidebarwrapper">
 70 |   <h3><a href="#">Table Of Contents</a></h3>
 71 |   <ul>
 72 | <li><a class="reference internal" href="#">Welcome to book&#8217;s documentation!</a></li>
 73 | <li><a class="reference internal" href="#indices-and-tables">Indices and tables</a></li>
 74 | </ul>
 75 | 
 76 |   <div role="note" aria-label="source link">
 77 |     <h3>This Page</h3>
 78 |     <ul class="this-page-menu">
 79 |       <li><a href="_sources/index.txt"
 80 |             rel="nofollow">Show Source</a></li>
 81 |     </ul>
 82 |    </div>
 83 | <div id="searchbox" style="display: none" role="search">
 84 |   <h3>Quick search</h3>
 85 |     <form class="search" action="search.html" method="get">
 86 |       <input type="text" name="q" />
 87 |       <input type="submit" value="Go" />
 88 |       <input type="hidden" name="check_keywords" value="yes" />
 89 |       <input type="hidden" name="area" value="default" />
 90 |     </form>
 91 | </div>
 92 | <script type="text/javascript">$('#searchbox').show(0);</script>
 93 |         </div>
 94 |       </div>
 95 |       <div class="clearer"></div>
 96 |     </div>
 97 |     <div class="related" role="navigation" aria-label="related navigation">
 98 |       <h3>Navigation</h3>
 99 |       <ul>
100 |         <li class="right" style="margin-right: 10px">
101 |           <a href="genindex.html" title="General Index"
102 |              >index</a></li>
103 |         <li class="right" >
104 |           <a href="py-modindex.html" title="Python Module Index"
105 |              >modules</a> |</li>
106 |         <li class="nav-item nav-item-0"><a href="#">book 1.0 documentation</a> &raquo;</li> 
107 |       </ul>
108 |     </div>
109 |     <div class="footer" role="contentinfo">
110 |         &copy; Copyright 2016, huaxz1986.
111 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.4.1.
112 |     </div>
113 |   </body>
114 | </html>


--------------------------------------------------------------------------------
/docs/build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/objects.inv


--------------------------------------------------------------------------------
/docs/build/html/search.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Search &mdash; book 1.0 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '1.0',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true
 21 |       };
 22 |     </script>
 23 |     <script type="text/javascript" src="_static/jquery.js"></script>
 24 |     <script type="text/javascript" src="_static/underscore.js"></script>
 25 |     <script type="text/javascript" src="_static/doctools.js"></script>
 26 |     <script type="text/javascript" src="_static/searchtools.js"></script>
 27 |     <link rel="top" title="book 1.0 documentation" href="index.html" />
 28 |   <script type="text/javascript">
 29 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
 30 |   </script>
 31 |   
 32 |   <script type="text/javascript" id="searchindexloader"></script>
 33 |    
 34 | 
 35 |   </head>
 36 |   <body role="document">
 37 |     <div class="related" role="navigation" aria-label="related navigation">
 38 |       <h3>Navigation</h3>
 39 |       <ul>
 40 |         <li class="right" style="margin-right: 10px">
 41 |           <a href="genindex.html" title="General Index"
 42 |              accesskey="I">index</a></li>
 43 |         <li class="right" >
 44 |           <a href="py-modindex.html" title="Python Module Index"
 45 |              >modules</a> |</li>
 46 |         <li class="nav-item nav-item-0"><a href="index.html">book 1.0 documentation</a> &raquo;</li> 
 47 |       </ul>
 48 |     </div>  
 49 | 
 50 |     <div class="document">
 51 |       <div class="documentwrapper">
 52 |         <div class="bodywrapper">
 53 |           <div class="body" role="main">
 54 |             
 55 |   <h1 id="search-documentation">Search</h1>
 56 |   <div id="fallback" class="admonition warning">
 57 |   <script type="text/javascript">$('#fallback').hide();</script>
 58 |   <p>
 59 |     Please activate JavaScript to enable the search
 60 |     functionality.
 61 |   </p>
 62 |   </div>
 63 |   <p>
 64 |     From here you can search these documents. Enter your search
 65 |     words into the box below and click "search". Note that the search
 66 |     function will automatically search for all of the words. Pages
 67 |     containing fewer words won't appear in the result list.
 68 |   </p>
 69 |   <form action="" method="get">
 70 |     <input type="text" name="q" value="" />
 71 |     <input type="submit" value="search" />
 72 |     <span id="search-progress" style="padding-left: 10px"></span>
 73 |   </form>
 74 |   
 75 |   <div id="search-results">
 76 |   
 77 |   </div>
 78 | 
 79 |           </div>
 80 |         </div>
 81 |       </div>
 82 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 83 |         <div class="sphinxsidebarwrapper">
 84 |         </div>
 85 |       </div>
 86 |       <div class="clearer"></div>
 87 |     </div>
 88 |     <div class="related" role="navigation" aria-label="related navigation">
 89 |       <h3>Navigation</h3>
 90 |       <ul>
 91 |         <li class="right" style="margin-right: 10px">
 92 |           <a href="genindex.html" title="General Index"
 93 |              >index</a></li>
 94 |         <li class="right" >
 95 |           <a href="py-modindex.html" title="Python Module Index"
 96 |              >modules</a> |</li>
 97 |         <li class="nav-item nav-item-0"><a href="index.html">book 1.0 documentation</a> &raquo;</li> 
 98 |       </ul>
 99 |     </div>
100 |     <div class="footer" role="contentinfo">
101 |         &copy; Copyright 2016, huaxz1986.
102 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.4.1.
103 |     </div>
104 |   </body>
105 | </html>


--------------------------------------------------------------------------------
/docs/source/chapters.Bayesian.rst:
--------------------------------------------------------------------------------
 1 | chapters.Bayesian package
 2 | =========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Bayesian.bayesian module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: chapters.Bayesian.bayesian
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Bayesian.bernoulliNB module
16 | ------------------------------------
17 | 
18 | .. automodule:: chapters.Bayesian.bernoulliNB
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Bayesian.gaussianNB module
24 | -----------------------------------
25 | 
26 | .. automodule:: chapters.Bayesian.gaussianNB
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Bayesian.multinomialNB module
32 | --------------------------------------
33 | 
34 | .. automodule:: chapters.Bayesian.multinomialNB
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: chapters.Bayesian
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Cluster_EM.rst:
--------------------------------------------------------------------------------
 1 | chapters.Cluster_EM package
 2 | ===========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Cluster_EM.agglomerative_clustering module
 8 | ---------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Cluster_EM.agglomerative_clustering
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Cluster_EM.cluster module
16 | ----------------------------------
17 | 
18 | .. automodule:: chapters.Cluster_EM.cluster
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Cluster_EM.dbscan module
24 | ---------------------------------
25 | 
26 | .. automodule:: chapters.Cluster_EM.dbscan
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Cluster_EM.gmm module
32 | ------------------------------
33 | 
34 | .. automodule:: chapters.Cluster_EM.gmm
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Cluster_EM.kmeans module
40 | ---------------------------------
41 | 
42 | .. automodule:: chapters.Cluster_EM.kmeans
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | 
48 | Module contents
49 | ---------------
50 | 
51 | .. automodule:: chapters.Cluster_EM
52 |     :members:
53 |     :undoc-members:
54 |     :show-inheritance:
55 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Decision_Tree.rst:
--------------------------------------------------------------------------------
 1 | chapters.Decision_Tree package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Decision_Tree.decisiontree_classifier module
 8 | -----------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Decision_Tree.decisiontree_classifier
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Decision_Tree.decisiontree_regressor module
16 | ----------------------------------------------------
17 | 
18 | .. automodule:: chapters.Decision_Tree.decisiontree_regressor
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: chapters.Decision_Tree
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Ensemble.rst:
--------------------------------------------------------------------------------
 1 | chapters.Ensemble package
 2 | =========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Ensemble.adaboost_classifier module
 8 | --------------------------------------------
 9 | 
10 | .. automodule:: chapters.Ensemble.adaboost_classifier
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Ensemble.adaboost_regressor module
16 | -------------------------------------------
17 | 
18 | .. automodule:: chapters.Ensemble.adaboost_regressor
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Ensemble.gradientboosting_classifier module
24 | ----------------------------------------------------
25 | 
26 | .. automodule:: chapters.Ensemble.gradientboosting_classifier
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Ensemble.gradientboosting_regressor module
32 | ---------------------------------------------------
33 | 
34 | .. automodule:: chapters.Ensemble.gradientboosting_regressor
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Ensemble.randomforest_classifier module
40 | ------------------------------------------------
41 | 
42 | .. automodule:: chapters.Ensemble.randomforest_classifier
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.Ensemble.randomforest_regressor module
48 | -----------------------------------------------
49 | 
50 | .. automodule:: chapters.Ensemble.randomforest_regressor
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | 
56 | Module contents
57 | ---------------
58 | 
59 | .. automodule:: chapters.Ensemble
60 |     :members:
61 |     :undoc-members:
62 |     :show-inheritance:
63 | 


--------------------------------------------------------------------------------
/docs/source/chapters.KNN_Dimension_Reduction.rst:
--------------------------------------------------------------------------------
 1 | chapters.KNN_Dimension_Reduction package
 2 | ========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.KNN_Dimension_Reduction.isomap module
 8 | ----------------------------------------------
 9 | 
10 | .. automodule:: chapters.KNN_Dimension_Reduction.isomap
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.KNN_Dimension_Reduction.kneighbors_classifier module
16 | -------------------------------------------------------------
17 | 
18 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_classifier
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.KNN_Dimension_Reduction.kneighbors_regressor module
24 | ------------------------------------------------------------
25 | 
26 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_regressor
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.KNN_Dimension_Reduction.kpca module
32 | --------------------------------------------
33 | 
34 | .. automodule:: chapters.KNN_Dimension_Reduction.kpca
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.KNN_Dimension_Reduction.lle module
40 | -------------------------------------------
41 | 
42 | .. automodule:: chapters.KNN_Dimension_Reduction.lle
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.KNN_Dimension_Reduction.mds module
48 | -------------------------------------------
49 | 
50 | .. automodule:: chapters.KNN_Dimension_Reduction.mds
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | chapters.KNN_Dimension_Reduction.pca module
56 | -------------------------------------------
57 | 
58 | .. automodule:: chapters.KNN_Dimension_Reduction.pca
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | 
64 | Module contents
65 | ---------------
66 | 
67 | .. automodule:: chapters.KNN_Dimension_Reduction
68 |     :members:
69 |     :undoc-members:
70 |     :show-inheritance:
71 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Kaggle.rst:
--------------------------------------------------------------------------------
 1 | chapters.Kaggle package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Kaggle.data_clean module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: chapters.Kaggle.data_clean
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Kaggle.data_preprocess module
16 | --------------------------------------
17 | 
18 | .. automodule:: chapters.Kaggle.data_preprocess
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Kaggle.grid_search module
24 | ----------------------------------
25 | 
26 | .. automodule:: chapters.Kaggle.grid_search
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Kaggle.learning_validation_curve module
32 | ------------------------------------------------
33 | 
34 | .. automodule:: chapters.Kaggle.learning_validation_curve
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: chapters.Kaggle
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Linear.rst:
--------------------------------------------------------------------------------
 1 | chapters.Linear package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Linear.elasticnet module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: chapters.Linear.elasticnet
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Linear.lasso module
16 | ----------------------------
17 | 
18 | .. automodule:: chapters.Linear.lasso
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Linear.lda module
24 | --------------------------
25 | 
26 | .. automodule:: chapters.Linear.lda
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Linear.linear_regression module
32 | ----------------------------------------
33 | 
34 | .. automodule:: chapters.Linear.linear_regression
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Linear.logistic_regression module
40 | ------------------------------------------
41 | 
42 | .. automodule:: chapters.Linear.logistic_regression
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.Linear.ridge module
48 | ----------------------------
49 | 
50 | .. automodule:: chapters.Linear.ridge
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | 
56 | Module contents
57 | ---------------
58 | 
59 | .. automodule:: chapters.Linear
60 |     :members:
61 |     :undoc-members:
62 |     :show-inheritance:
63 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Model_Selection.rst:
--------------------------------------------------------------------------------
 1 | chapters.Model_Selection package
 2 | ================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Model_Selection.classification_metrics module
 8 | ------------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Model_Selection.classification_metrics
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Model_Selection.data_splittion module
16 | ----------------------------------------------
17 | 
18 | .. automodule:: chapters.Model_Selection.data_splittion
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Model_Selection.grid_search module
24 | -------------------------------------------
25 | 
26 | .. automodule:: chapters.Model_Selection.grid_search
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.Model_Selection.learning_curve module
32 | ----------------------------------------------
33 | 
34 | .. automodule:: chapters.Model_Selection.learning_curve
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.Model_Selection.loss_function module
40 | ---------------------------------------------
41 | 
42 | .. automodule:: chapters.Model_Selection.loss_function
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.Model_Selection.regression_metrics module
48 | --------------------------------------------------
49 | 
50 | .. automodule:: chapters.Model_Selection.regression_metrics
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | chapters.Model_Selection.validation_curve module
56 | ------------------------------------------------
57 | 
58 | .. automodule:: chapters.Model_Selection.validation_curve
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | 
64 | Module contents
65 | ---------------
66 | 
67 | .. automodule:: chapters.Model_Selection
68 |     :members:
69 |     :undoc-members:
70 |     :show-inheritance:
71 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Perceptron_Neural_Network.rst:
--------------------------------------------------------------------------------
 1 | chapters.Perceptron_Neural_Network package
 2 | ==========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Perceptron_Neural_Network.neural_network module
 8 | --------------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Perceptron_Neural_Network.neural_network_iris module
16 | -------------------------------------------------------------
17 | 
18 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network_iris
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.Perceptron_Neural_Network.perceptron module
24 | ----------------------------------------------------
25 | 
26 | .. automodule:: chapters.Perceptron_Neural_Network.perceptron
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | 
32 | Module contents
33 | ---------------
34 | 
35 | .. automodule:: chapters.Perceptron_Neural_Network
36 |     :members:
37 |     :undoc-members:
38 |     :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/docs/source/chapters.PreProcessing.rst:
--------------------------------------------------------------------------------
 1 | chapters.PreProcessing package
 2 | ==============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.PreProcessing.binarize module
 8 | --------------------------------------
 9 | 
10 | .. automodule:: chapters.PreProcessing.binarize
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.PreProcessing.dictionary_learning module
16 | -------------------------------------------------
17 | 
18 | .. automodule:: chapters.PreProcessing.dictionary_learning
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.PreProcessing.feature_selection_bagging module
24 | -------------------------------------------------------
25 | 
26 | .. automodule:: chapters.PreProcessing.feature_selection_bagging
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.PreProcessing.feature_selection_embeded module
32 | -------------------------------------------------------
33 | 
34 | .. automodule:: chapters.PreProcessing.feature_selection_embeded
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | chapters.PreProcessing.feature_selection_filter module
40 | ------------------------------------------------------
41 | 
42 | .. automodule:: chapters.PreProcessing.feature_selection_filter
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | chapters.PreProcessing.normalize module
48 | ---------------------------------------
49 | 
50 | .. automodule:: chapters.PreProcessing.normalize
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | chapters.PreProcessing.onehot_encode module
56 | -------------------------------------------
57 | 
58 | .. automodule:: chapters.PreProcessing.onehot_encode
59 |     :members:
60 |     :undoc-members:
61 |     :show-inheritance:
62 | 
63 | chapters.PreProcessing.pipeline module
64 | --------------------------------------
65 | 
66 | .. automodule:: chapters.PreProcessing.pipeline
67 |     :members:
68 |     :undoc-members:
69 |     :show-inheritance:
70 | 
71 | chapters.PreProcessing.standardize module
72 | -----------------------------------------
73 | 
74 | .. automodule:: chapters.PreProcessing.standardize
75 |     :members:
76 |     :undoc-members:
77 |     :show-inheritance:
78 | 
79 | 
80 | Module contents
81 | ---------------
82 | 
83 | .. automodule:: chapters.PreProcessing
84 |     :members:
85 |     :undoc-members:
86 |     :show-inheritance:
87 | 


--------------------------------------------------------------------------------
/docs/source/chapters.SVM.rst:
--------------------------------------------------------------------------------
 1 | chapters.SVM package
 2 | ====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.SVM.SVC module
 8 | -----------------------
 9 | 
10 | .. automodule:: chapters.SVM.SVC
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.SVM.SVR module
16 | -----------------------
17 | 
18 | .. automodule:: chapters.SVM.SVR
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | chapters.SVM.linearSVC module
24 | -----------------------------
25 | 
26 | .. automodule:: chapters.SVM.linearSVC
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | chapters.SVM.linearSVR module
32 | -----------------------------
33 | 
34 | .. automodule:: chapters.SVM.linearSVR
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | 
40 | Module contents
41 | ---------------
42 | 
43 | .. automodule:: chapters.SVM
44 |     :members:
45 |     :undoc-members:
46 |     :show-inheritance:
47 | 


--------------------------------------------------------------------------------
/docs/source/chapters.Semi_Supervised_Learning.rst:
--------------------------------------------------------------------------------
 1 | chapters.Semi_Supervised_Learning package
 2 | =========================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | chapters.Semi_Supervised_Learning.labelPropagation module
 8 | ---------------------------------------------------------
 9 | 
10 | .. automodule:: chapters.Semi_Supervised_Learning.labelPropagation
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | chapters.Semi_Supervised_Learning.labelSpreading module
16 | -------------------------------------------------------
17 | 
18 | .. automodule:: chapters.Semi_Supervised_Learning.labelSpreading
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: chapters.Semi_Supervised_Learning
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/chapters.rst:
--------------------------------------------------------------------------------
 1 | chapters package
 2 | ================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     chapters.Bayesian
10 |     chapters.Cluster_EM
11 |     chapters.Decision_Tree
12 |     chapters.Ensemble
13 |     chapters.KNN_Dimension_Reduction
14 |     chapters.Kaggle
15 |     chapters.Linear
16 |     chapters.Model_Selection
17 |     chapters.Perceptron_Neural_Network
18 |     chapters.PreProcessing
19 |     chapters.SVM
20 |     chapters.Semi_Supervised_Learning
21 | 
22 | Module contents
23 | ---------------
24 | 
25 | .. automodule:: chapters
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. book documentation master file, created by
 2 |    sphinx-quickstart on Wed Aug 17 17:09:32 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to book's documentation!
 7 | ================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 | 
15 | 
16 | Indices and tables
17 | ==================
18 | 
19 | * :ref:`genindex`
20 | * :ref:`modindex`
21 | * :ref:`search`
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | chapters
2 | ========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    chapters
8 | 


--------------------------------------------------------------------------------
/勘误.md:
--------------------------------------------------------------------------------
 1 | # 勘误
 2 | >推荐采用一些第三方 markdown 编辑器阅读。本人采用的是 typora
 3 | 
 4 | 1. 第6页第二段：符号错误：（感谢网友 周礼广 的提示）
 5 |    $$
 6 |     P(y=1/\mathbf{\vec x})=\frac{1}{1+e^{-z}},z=\mathbf{\vec w}\cdot \mathbf{\vec x}+b
 7 |    $$
 8 | 
 9 | 2. 第194页倒数第三段：公式未换行：（感谢网友 齐照辉 的提示）
10 | 
11 |    $$\nabla _{\mathbf {\vec w}}L(\mathbf{\vec w},b)=-\sum_{\mathbf{\vec x}_i \in M}y_i \mathbf{\vec x}_i\\ \nabla_bL(\mathbf{\vec w},b)=-\sum_{\mathbf{\vec x}_i \in M}y_i​$$
12 | 
13 |    以及
14 | 
15 |    $$ \mathbf{\vec w}\leftarrow \mathbf{\vec w}+\eta y_i\mathbf{\vec x}_i\\ b\leftarrow b+\eta y_i$$
16 | 
17 | 3. 第248页最后一段第一句：分子分母互换：（感谢网友 飞雪剑魂 的提示）
18 |    $$
19 |    \exp(2\alpha_m)=\frac{1-e_m}{e_m}
20 |    $$
21 | 
22 | 4. 第36 页倒数第二个公式：公式未换行（感谢网友　张显　的提示）
23 |    $$
24 |    C_{\alpha}=...\\
25 |    H(t)=...
26 |    $$
27 | 
28 | 5. 第45页，`score` 的公式：分母缺少部分内容（后续关于`score`的公式也依次修改）：
29 |    $$
30 |    \text{score}=1-\frac{\sum_{T_{test}}(y_i-\hat y)^2}{\sum_{T_{test}}(y_i-\bar y)^2}
31 |    $$
32 | 
33 | 


--------------------------------------------------------------------------------