├── LICENSE ├── README.md ├── chapters ├── Bayesian │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── bayesian.cpython-35.pyc │ │ ├── bernoulliNB.cpython-35.pyc │ │ ├── gaussianNB.cpython-35.pyc │ │ └── multinomialNB.cpython-35.pyc │ ├── bayesian.py │ ├── bernoulliNB.py │ ├── gaussianNB.py │ └── multinomialNB.py ├── Cluster_EM │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── agglomerative_clustering.cpython-35.pyc │ │ ├── cluster.cpython-35.pyc │ │ ├── dbscan.cpython-35.pyc │ │ ├── gmm.cpython-35.pyc │ │ └── kmeans.cpython-35.pyc │ ├── agglomerative_clustering.py │ ├── cluster.py │ ├── dbscan.py │ ├── gmm.py │ └── kmeans.py ├── Decision_Tree │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── decisiontree_classifier.cpython-35.pyc │ │ └── decisiontree_regressor.cpython-35.pyc │ ├── decisiontree_classifier.py │ └── decisiontree_regressor.py ├── Ensemble │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── adaboost_classifier.cpython-35.pyc │ │ ├── adaboost_regressor.cpython-35.pyc │ │ ├── gradientboosting_classifier.cpython-35.pyc │ │ ├── gradientboosting_regressor.cpython-35.pyc │ │ ├── randomforest_classifier.cpython-35.pyc │ │ └── randomforest_regressor.cpython-35.pyc │ ├── adaboost_classifier.py │ ├── adaboost_regressor.py │ ├── gradientboosting_classifier.py │ ├── gradientboosting_regressor.py │ ├── randomforest_classifier.py │ └── randomforest_regressor.py ├── KNN_Dimension_Reduction │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── isomap.cpython-35.pyc │ │ ├── kneighbors_classifier.cpython-35.pyc │ │ ├── kneighbors_regressor.cpython-35.pyc │ │ ├── kpca.cpython-35.pyc │ │ ├── lle.cpython-35.pyc │ │ ├── mds.cpython-35.pyc │ │ └── pca.cpython-35.pyc │ ├── isomap.py │ ├── kneighbors_classifier.py │ ├── kneighbors_regressor.py │ ├── kpca.py │ ├── lle.py │ ├── mds.py │ └── pca.py ├── Kaggle │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── data_clean.cpython-35.pyc │ │ ├── data_preprocess.cpython-35.pyc │ │ ├── grid_search.cpython-35.pyc │ │ └── learning_validation_curve.cpython-35.pyc │ ├── check_data.ipynb │ ├── data_clean.py │ ├── data_preprocess.py │ ├── grid_search.py │ └── learning_validation_curve.py ├── Linear │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── elasticnet.cpython-35.pyc │ │ ├── lasso.cpython-35.pyc │ │ ├── lda.cpython-35.pyc │ │ ├── linear_regression.cpython-35.pyc │ │ ├── logistic_regression.cpython-35.pyc │ │ └── ridge.cpython-35.pyc │ ├── elasticnet.py │ ├── lasso.py │ ├── lda.py │ ├── linear_regression.py │ ├── logistic_regression.py │ └── ridge.py ├── Model_Selection │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── classification_metrics.cpython-35.pyc │ │ ├── data_splittion.cpython-35.pyc │ │ ├── grid_search.cpython-35.pyc │ │ ├── learning_curve.cpython-35.pyc │ │ ├── loss_function.cpython-35.pyc │ │ ├── regression_metrics.cpython-35.pyc │ │ └── validation_curve.cpython-35.pyc │ ├── classification_metrics.py │ ├── data_splittion.py │ ├── grid_search.py │ ├── learning_curve.py │ ├── loss_function.py │ ├── regression_metrics.py │ └── validation_curve.py ├── Perceptron_Neural_Network │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── neural_network.cpython-35.pyc │ │ ├── neural_network_iris.cpython-35.pyc │ │ └── perceptron.cpython-35.pyc │ ├── neural_network.py │ ├── neural_network_iris.py │ └── perceptron.py ├── PreProcessing │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── binarize.cpython-35.pyc │ │ ├── dictionary_learning.cpython-35.pyc │ │ ├── feature_selection_bagging.cpython-35.pyc │ │ ├── feature_selection_embeded.cpython-35.pyc │ │ ├── feature_selection_filter.cpython-35.pyc │ │ ├── normalize.cpython-35.pyc │ │ ├── onehot_encode.cpython-35.pyc │ │ ├── pipeline.cpython-35.pyc │ │ └── standardize.cpython-35.pyc │ ├── binarize.py │ ├── dictionary_learning.py │ ├── feature_selection_bagging.py │ ├── feature_selection_embeded.py │ ├── feature_selection_filter.py │ ├── normalize.py │ ├── onehot_encode.py │ ├── pipeline.py │ └── standardize.py ├── SVM │ ├── SVC.py │ ├── SVR.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── SVC.cpython-35.pyc │ │ ├── SVR.cpython-35.pyc │ │ ├── __init__.cpython-35.pyc │ │ ├── linearSVC.cpython-35.pyc │ │ └── linearSVR.cpython-35.pyc │ ├── linearSVC.py │ └── linearSVR.py ├── Semi_Supervised_Learning │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── labelPropagation.cpython-35.pyc │ │ └── labelSpreading.cpython-35.pyc │ ├── labelPropagation.py │ └── labelSpreading.py ├── __init__.py └── __pycache__ │ └── __init__.cpython-35.pyc ├── docs ├── Makefile ├── build │ ├── doctrees │ │ ├── chapters.Bayesian.doctree │ │ ├── chapters.Cluster_EM.doctree │ │ ├── chapters.Decision_Tree.doctree │ │ ├── chapters.Ensemble.doctree │ │ ├── chapters.KNN_Dimension_Reduction.doctree │ │ ├── chapters.Kaggle.doctree │ │ ├── chapters.Linear.doctree │ │ ├── chapters.Model_Selection.doctree │ │ ├── chapters.Perceptron_Neural_Network.doctree │ │ ├── chapters.PreProcessing.doctree │ │ ├── chapters.SVM.doctree │ │ ├── chapters.Semi_Supervised_Learning.doctree │ │ ├── chapters.doctree │ │ ├── environment.pickle │ │ ├── index.doctree │ │ └── modules.doctree │ └── html │ │ ├── .buildinfo │ │ ├── _sources │ │ ├── chapters.Bayesian.txt │ │ ├── chapters.Cluster_EM.txt │ │ ├── chapters.Decision_Tree.txt │ │ ├── chapters.Ensemble.txt │ │ ├── chapters.KNN_Dimension_Reduction.txt │ │ ├── chapters.Kaggle.txt │ │ ├── chapters.Linear.txt │ │ ├── chapters.Model_Selection.txt │ │ ├── chapters.Perceptron_Neural_Network.txt │ │ ├── chapters.PreProcessing.txt │ │ ├── chapters.SVM.txt │ │ ├── chapters.Semi_Supervised_Learning.txt │ │ ├── chapters.txt │ │ ├── index.txt │ │ └── modules.txt │ │ ├── _static │ │ ├── ajax-loader.gif │ │ ├── basic.css │ │ ├── classic.css │ │ ├── comment-bright.png │ │ ├── comment-close.png │ │ ├── comment.png │ │ ├── doctools.js │ │ ├── down-pressed.png │ │ ├── down.png │ │ ├── file.png │ │ ├── jquery-1.11.1.js │ │ ├── jquery.js │ │ ├── minus.png │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── sidebar.js │ │ ├── underscore-1.3.1.js │ │ ├── underscore.js │ │ ├── up-pressed.png │ │ ├── up.png │ │ └── websupport.js │ │ ├── chapters.Bayesian.html │ │ ├── chapters.Cluster_EM.html │ │ ├── chapters.Decision_Tree.html │ │ ├── chapters.Ensemble.html │ │ ├── chapters.KNN_Dimension_Reduction.html │ │ ├── chapters.Kaggle.html │ │ ├── chapters.Linear.html │ │ ├── chapters.Model_Selection.html │ │ ├── chapters.Perceptron_Neural_Network.html │ │ ├── chapters.PreProcessing.html │ │ ├── chapters.SVM.html │ │ ├── chapters.Semi_Supervised_Learning.html │ │ ├── chapters.html │ │ ├── genindex.html │ │ ├── index.html │ │ ├── modules.html │ │ ├── objects.inv │ │ ├── py-modindex.html │ │ ├── search.html │ │ └── searchindex.js ├── make.bat └── source │ ├── chapters.Bayesian.rst │ ├── chapters.Cluster_EM.rst │ ├── chapters.Decision_Tree.rst │ ├── chapters.Ensemble.rst │ ├── chapters.KNN_Dimension_Reduction.rst │ ├── chapters.Kaggle.rst │ ├── chapters.Linear.rst │ ├── chapters.Model_Selection.rst │ ├── chapters.Perceptron_Neural_Network.rst │ ├── chapters.PreProcessing.rst │ ├── chapters.SVM.rst │ ├── chapters.Semi_Supervised_Learning.rst │ ├── chapters.rst │ ├── conf.py │ ├── index.rst │ └── modules.rst └── 勘误.md /README.md: -------------------------------------------------------------------------------- 1 | # ReadMe 2 | 3 | 另:本人搜集了个人笔记并整理成册,命名为《AI算法工程师手册》,详见:www.huaxiaozhuan.com 4 | 5 | ## 1. 源码结构 6 | 7 | 这里给出主要的目录结构。其中 `sphinx` 自动生成的目录和文件未全部列出。 8 | 9 | ``` 10 | book/ 11 | docs/ .......................> 说明文档 12 | make.bat ...............> sphinx 脚本 13 | build/...................> sphinx 生成的文档所在目录 14 | html/............> sphinx 生成的 HTML文档的目录 15 | source/..................> sphinx 的配置文件以及生成的 .rst 文件 16 | conf.py..........> sphinx 的配置文件 17 | chapters/ ........................> 源代码 18 | Bayesian/...................> 朴素贝叶斯和贝叶斯网络 19 | Cluster_EM/.................> 聚类和 EM 算法 20 | Decision_Tree/..............> 决策树 21 | Ensemble/...................> 集成学习 22 | KNN_Dimension_Reduction/....> KNN和降维 23 | Linear/.....................> 线性模型 24 | Model_Selection/............> 模型选择 25 | Perceptron_Neural_Network/..> 感知机和神经网络 26 | PreProcessing/..............> 数据预处理 27 | Semi_Supervised_Learning....> 半监督学习 28 | SVM/........................> 支持向量机 29 | Kaggle/.....................> Kaggle 实战 30 | ``` 31 | 32 | ## 2. 使用 sphinx 33 | 34 | 35 | 使用 `sphinx`自动生成文档主要利用了 `sphix`的 `autodoc` 功能。这里的 `conf.py` 已经配置好。生成文档需要两步: 36 | 37 | 1. 进入命令行后,切换到 `book/`文件夹下 38 | 2. 在命令行中输入命令: 39 | 40 | ``` 41 | sphinx-apidoc -o docs/source chapters 42 | ``` 43 | 该命令将会从 `chapters`目录下的`.py`文件中的抽取注释生成`.rst`文档(这些文档将被存放在 `docs/source/`目录下) 44 | 45 | 3. 在命令行中输入命令: 46 | 47 | ``` 48 | cd docs 49 | make html 50 | ``` 51 | 其中第一行命令是进入`docs/`目录。第二行命令是根据`.rst`文档生成 `html`文档(这些`html`文档位于`docs/build/html/`目录下 52 | 53 | ## 3. 修改主题 54 | 55 | 你可以修改生成的`HTML`文件的样式,这是通过修改`sphinx`的主题来实现的。 56 | 57 | 修改 `conf.py`的 `html_theme = 'classic'` 就能实现修改主题。这里我采用经典主题`'classic'`。内建的主题有: 58 | 59 | ``` 60 | 'alabaster'、'sphinx_rtd_theme'、'classic'、'sphinxdoc'、'scrolls'、'agogo'、 61 | 'traditional'、 'nature'、 'haiku'、'pyramid bizstyle' 62 | 63 | ``` 64 | 65 | ## 4. 源码注释 66 | 67 | 源码注释的格式为: 68 | 69 | ``` 70 | def func(a,b): 71 | ''' 72 | 函数的描述 73 | 74 | :param a: 参数 a 的描述 75 | :param b: 参数 b 的描述 76 | :return: 返回值的描述 77 | ''' 78 | pass 79 | ``` 80 | 81 | 这里要注意空行的空格的存在。如果没有这些空格和空行,则 `sphinx`可能会误判这些注释的意义。 82 | -------------------------------------------------------------------------------- /chapters/Bayesian/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__init__.py -------------------------------------------------------------------------------- /chapters/Bayesian/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Bayesian/__pycache__/bayesian.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/bayesian.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Bayesian/__pycache__/bernoulliNB.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/bernoulliNB.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Bayesian/__pycache__/gaussianNB.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/gaussianNB.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Bayesian/__pycache__/multinomialNB.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Bayesian/__pycache__/multinomialNB.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Bayesian/bayesian.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 贝叶斯分类器和贝叶斯网络 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 贝叶斯分类器 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn import datasets,cross_validation,naive_bayes 13 | import matplotlib.pyplot as plt 14 | from .gaussianNB import test_GaussianNB 15 | from .multinomialNB import test_MultinomialNB,test_MultinomialNB_alpha 16 | from .bernoulliNB import test_BernoulliNB,test_BernoulliNB_alpha,test_BernoulliNB_binarize 17 | def load_data(): 18 | ''' 19 | 加载用于分类问题的数据集。这里使用 scikit-learn 自带的 digits 数据集 20 | 21 | :return: 一个元组,用于分类问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记 22 | ''' 23 | digits=datasets.load_digits() # 加载 scikit-learn 自带的 digits 数据集 24 | return cross_validation.train_test_split(digits.data,digits.target, 25 | test_size=0.25,random_state=0,stratify=digits.target) #分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 26 | def show_digits(): 27 | ''' 28 | 绘制 digits 数据集。这里只是绘制数据集中前 25 个样本的图片。 29 | 30 | :return: None 31 | ''' 32 | digits=datasets.load_digits() 33 | fig=plt.figure() 34 | print("vector from images 0:",digits.data[0]) 35 | for i in range(25): 36 | ax=fig.add_subplot(5,5,i+1) 37 | ax.imshow(digits.images[i],cmap=plt.cm.gray_r, interpolation='nearest') 38 | plt.show() 39 | 40 | if __name__=='__main__': 41 | X_train,X_test,y_train,y_test=load_data() # 产生用于分类问题的数据集 42 | test_GaussianNB(X_train,X_test,y_train,y_test) # 调用 test_GaussianNB 43 | test_MultinomialNB(X_train,X_test,y_train,y_test) # 调用 test_MultinomialNB 44 | test_MultinomialNB_alpha(X_train,X_test,y_train,y_test) # 调用 test_MultinomialNB_alpha 45 | test_BernoulliNB(X_train,X_test,y_train,y_test) # 调用 test_BernoulliNB 46 | test_BernoulliNB_alpha(X_train,X_test,y_train,y_test) # 调用 test_BernoulliNB_alpha 47 | test_BernoulliNB_binarize(X_train,X_test,y_train,y_test) # 调用 test_BernoulliNB_binarize 48 | -------------------------------------------------------------------------------- /chapters/Bayesian/bernoulliNB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 贝叶斯分类器和贝叶斯网络 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | BernoulliNB 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn import naive_bayes 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | def test_BernoulliNB(*data): 17 | ''' 18 | 测试 BernoulliNB 的用法 19 | 20 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 21 | :return: None 22 | ''' 23 | X_train,X_test,y_train,y_test=data 24 | cls=naive_bayes.BernoulliNB() 25 | cls.fit(X_train,y_train) 26 | print('Training Score: %.2f' % cls.score(X_train,y_train)) 27 | print('Testing Score: %.2f' % cls.score(X_test, y_test)) 28 | def test_BernoulliNB_alpha(*data): 29 | ''' 30 | 测试 BernoulliNB 的预测性能随 alpha 参数的影响 31 | 32 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 33 | :return: None 34 | ''' 35 | X_train,X_test,y_train,y_test=data 36 | alphas=np.logspace(-2,5,num=200) 37 | train_scores=[] 38 | test_scores=[] 39 | for alpha in alphas: 40 | cls=naive_bayes.BernoulliNB(alpha=alpha) 41 | cls.fit(X_train,y_train) 42 | train_scores.append(cls.score(X_train,y_train)) 43 | test_scores.append(cls.score(X_test, y_test)) 44 | 45 | ## 绘图 46 | fig=plt.figure() 47 | ax=fig.add_subplot(1,1,1) 48 | ax.plot(alphas,train_scores,label="Training Score") 49 | ax.plot(alphas,test_scores,label="Testing Score") 50 | ax.set_xlabel(r"$\alpha$") 51 | ax.set_ylabel("score") 52 | ax.set_ylim(0,1.0) 53 | ax.set_title("BernoulliNB") 54 | ax.set_xscale("log") 55 | ax.legend(loc="best") 56 | plt.show() 57 | def test_BernoulliNB_binarize(*data): 58 | ''' 59 | 测试 BernoulliNB 的预测性能随 binarize 参数的影响 60 | 61 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 62 | :return: None 63 | ''' 64 | X_train,X_test,y_train,y_test=data 65 | min_x=min(np.min(X_train.ravel()),np.min(X_test.ravel()))-0.1 66 | max_x=max(np.max(X_train.ravel()),np.max(X_test.ravel()))+0.1 67 | binarizes=np.linspace(min_x,max_x,endpoint=True,num=100) 68 | train_scores=[] 69 | test_scores=[] 70 | for binarize in binarizes: 71 | cls=naive_bayes.BernoulliNB(binarize=binarize) 72 | cls.fit(X_train,y_train) 73 | train_scores.append(cls.score(X_train,y_train)) 74 | test_scores.append(cls.score(X_test, y_test)) 75 | 76 | ## 绘图 77 | fig=plt.figure() 78 | ax=fig.add_subplot(1,1,1) 79 | ax.plot(binarizes,train_scores,label="Training Score") 80 | ax.plot(binarizes,test_scores,label="Testing Score") 81 | ax.set_xlabel("binarize") 82 | ax.set_ylabel("score") 83 | ax.set_ylim(0,1.0) 84 | ax.set_xlim(min_x-1,max_x+1) 85 | ax.set_title("BernoulliNB") 86 | ax.legend(loc="best") 87 | plt.show() 88 | -------------------------------------------------------------------------------- /chapters/Bayesian/gaussianNB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 贝叶斯分类器和贝叶斯网络 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | GaussianNB 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn import naive_bayes 12 | 13 | def test_GaussianNB(*data): 14 | ''' 15 | 测试 GaussianNB 的用法 16 | 17 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 18 | :return: None 19 | ''' 20 | X_train,X_test,y_train,y_test=data 21 | cls=naive_bayes.GaussianNB() 22 | cls.fit(X_train,y_train) 23 | print('Training Score: %.2f' % cls.score(X_train,y_train)) 24 | print('Testing Score: %.2f' % cls.score(X_test, y_test)) -------------------------------------------------------------------------------- /chapters/Bayesian/multinomialNB.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 贝叶斯分类器和贝叶斯网络 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | MultinomialNB 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn import naive_bayes 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | 16 | def test_MultinomialNB(*data): 17 | ''' 18 | 测试 MultinomialNB 的用法 19 | 20 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 21 | :return: None 22 | ''' 23 | X_train,X_test,y_train,y_test=data 24 | cls=naive_bayes.MultinomialNB() 25 | cls.fit(X_train,y_train) 26 | print('Training Score: %.2f' % cls.score(X_train,y_train)) 27 | print('Testing Score: %.2f' % cls.score(X_test, y_test)) 28 | def test_MultinomialNB_alpha(*data): 29 | ''' 30 | 测试 MultinomialNB 的预测性能随 alpha 参数的影响 31 | 32 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 33 | :return: None 34 | ''' 35 | X_train,X_test,y_train,y_test=data 36 | alphas=np.logspace(-2,5,num=200) 37 | train_scores=[] 38 | test_scores=[] 39 | for alpha in alphas: 40 | cls=naive_bayes.MultinomialNB(alpha=alpha) 41 | cls.fit(X_train,y_train) 42 | train_scores.append(cls.score(X_train,y_train)) 43 | test_scores.append(cls.score(X_test, y_test)) 44 | 45 | ## 绘图 46 | fig=plt.figure() 47 | ax=fig.add_subplot(1,1,1) 48 | ax.plot(alphas,train_scores,label="Training Score") 49 | ax.plot(alphas,test_scores,label="Testing Score") 50 | ax.set_xlabel(r"$\alpha$") 51 | ax.set_ylabel("score") 52 | ax.set_ylim(0,1.0) 53 | ax.set_title("MultinomialNB") 54 | ax.set_xscale("log") 55 | plt.show() 56 | -------------------------------------------------------------------------------- /chapters/Cluster_EM/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__init__.py -------------------------------------------------------------------------------- /chapters/Cluster_EM/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Cluster_EM/__pycache__/agglomerative_clustering.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/agglomerative_clustering.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Cluster_EM/__pycache__/cluster.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/cluster.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Cluster_EM/__pycache__/dbscan.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/dbscan.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Cluster_EM/__pycache__/gmm.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/gmm.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Cluster_EM/__pycache__/kmeans.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Cluster_EM/__pycache__/kmeans.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Cluster_EM/agglomerative_clustering.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 聚类和EM算法 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | AgglomerativeClustering 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn import cluster 12 | from sklearn.metrics import adjusted_rand_score 13 | import matplotlib.pyplot as plt 14 | 15 | def test_AgglomerativeClustering(*data): 16 | ''' 17 | 测试 AgglomerativeClustering 的用法 18 | 19 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 20 | :return: None 21 | ''' 22 | X,labels_true=data 23 | clst=cluster.AgglomerativeClustering() 24 | predicted_labels=clst.fit_predict(X) 25 | print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels)) 26 | def test_AgglomerativeClustering_nclusters(*data): 27 | ''' 28 | 测试 AgglomerativeClustering 的聚类结果随 n_clusters 参数的影响 29 | 30 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 31 | :return: None 32 | ''' 33 | X,labels_true=data 34 | nums=range(1,50) 35 | ARIs=[] 36 | for num in nums: 37 | clst=cluster.AgglomerativeClustering(n_clusters=num) 38 | predicted_labels=clst.fit_predict(X) 39 | ARIs.append(adjusted_rand_score(labels_true,predicted_labels)) 40 | 41 | ## 绘图 42 | fig=plt.figure() 43 | ax=fig.add_subplot(1,1,1) 44 | ax.plot(nums,ARIs,marker="+") 45 | ax.set_xlabel("n_clusters") 46 | ax.set_ylabel("ARI") 47 | fig.suptitle("AgglomerativeClustering") 48 | plt.show() 49 | def test_AgglomerativeClustering_linkage(*data): 50 | ''' 51 | 测试 AgglomerativeClustering 的聚类结果随链接方式的影响 52 | 53 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 54 | :return: None 55 | ''' 56 | X,labels_true=data 57 | nums=range(1,50) 58 | fig=plt.figure() 59 | ax=fig.add_subplot(1,1,1) 60 | 61 | linkages=['ward','complete','average'] 62 | markers="+o*" 63 | for i, linkage in enumerate(linkages): 64 | ARIs=[] 65 | for num in nums: 66 | clst=cluster.AgglomerativeClustering(n_clusters=num,linkage=linkage) 67 | predicted_labels=clst.fit_predict(X) 68 | ARIs.append(adjusted_rand_score(labels_true,predicted_labels)) 69 | ax.plot(nums,ARIs,marker=markers[i],label="linkage:%s"%linkage) 70 | 71 | ax.set_xlabel("n_clusters") 72 | ax.set_ylabel("ARI") 73 | ax.legend(loc="best") 74 | fig.suptitle("AgglomerativeClustering") 75 | plt.show() -------------------------------------------------------------------------------- /chapters/Cluster_EM/cluster.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 聚类和EM算法 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 聚类 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | from sklearn.datasets.samples_generator import make_blobs 15 | # from .agglomerative_clustering import test_AgglomerativeClustering,test_AgglomerativeClustering_nclusters,test_AgglomerativeClustering_linkage 16 | # from .dbscan import test_DBSCAN,test_DBSCAN_epsilon,test_DBSCAN_min_samples 17 | from chapters.Cluster_EM.gmm import test_GMM,test_GMM_cov_type,test_GMM_n_components 18 | # from .kmeans import test_Kmeans,test_Kmeans_n_init,test_Kmeans_nclusters 19 | 20 | def create_data(centers,num=100,std=0.7): 21 | ''' 22 | 生成用于聚类的数据集 23 | 24 | :param centers: 聚类的中心点组成的数组。如果中心点是二维的,则产生的每个样本都是二维的。 25 | :param num: 样本数 26 | :param std: 每个簇中样本的标准差 27 | :return: 用于聚类的数据集。是一个元组,第一个元素为样本集,第二个元素为样本集的真实簇分类标记 28 | ''' 29 | X, labels_true = make_blobs(n_samples=num, centers=centers, cluster_std=std) 30 | return X,labels_true 31 | def plot_data(*data): 32 | ''' 33 | 绘制用于聚类的数据集 34 | 35 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 36 | :return: None 37 | ''' 38 | X,labels_true=data 39 | labels=np.unique(labels_true) 40 | fig=plt.figure() 41 | ax=fig.add_subplot(1,1,1) 42 | colors='rgbyckm' # 每个簇的样本标记不同的颜色 43 | for i,label in enumerate(labels): 44 | position=labels_true==label 45 | ax.scatter(X[position,0],X[position,1],label="cluster %d"%label, 46 | color=colors[i%len(colors)]) 47 | 48 | ax.legend(loc="best",framealpha=0.5) 49 | ax.set_xlabel("X[0]") 50 | ax.set_ylabel("Y[1]") 51 | ax.set_title("data") 52 | plt.show() 53 | 54 | if __name__=='__main__': 55 | centers=[[1,1],[2,2],[1,2],[10,20]] # 用于产生聚类的中心点 56 | X,labels_true=create_data(centers,1000,0.5) # 产生用于聚类的数据集 57 | # plot_data(X,labels_true) # 绘制用于聚类的数据集 58 | # test_Kmeans(X,labels_true) # 调用 test_Kmeans 函数 59 | # test_Kmeans_nclusters(X,labels_true) # 调用 test_Kmeans_nclusters 函数 60 | # test_Kmeans_n_init(X,labels_true) # 调用 test_Kmeans_n_init 函数 61 | # test_DBSCAN(X,labels_true) # 调用 test_DBSCAN 函数 62 | # test_DBSCAN_epsilon(X,labels_true) # 调用 test_DBSCAN_epsilon 函数 63 | # test_DBSCAN_min_samples(X,labels_true) # 调用 test_DBSCAN_min_samples 函数 64 | # test_AgglomerativeClustering(X,labels_true) # 调用 test_AgglomerativeClustering 函数 65 | # test_AgglomerativeClustering_nclusters(X,labels_true) # 调用 test_AgglomerativeClustering_nclusters 函数 66 | # test_AgglomerativeClustering_linkage(X,labels_true) # 调用 test_AgglomerativeClustering_linkage 函数 67 | # test_GMM(X,labels_true) # 调用 test_GMM 函数 68 | # test_GMM_n_components(X,labels_true) # 调用 test_GMM_n_components 函数 69 | test_GMM_cov_type(X,labels_true) # 调用 test_GMM_cov_type 函数 70 | 71 | -------------------------------------------------------------------------------- /chapters/Cluster_EM/dbscan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 聚类和EM算法 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | DBSCAN 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn import cluster 12 | from sklearn.metrics import adjusted_rand_score 13 | import matplotlib.pyplot as plt 14 | 15 | def test_DBSCAN(*data): 16 | ''' 17 | 测试 DBSCAN 的用法 18 | 19 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 20 | :return: None 21 | ''' 22 | X,labels_true=data 23 | clst=cluster.DBSCAN() 24 | predicted_labels=clst.fit_predict(X) 25 | print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels)) 26 | print("Core sample num:%d"%len(clst.core_sample_indices_)) 27 | def test_DBSCAN_epsilon(*data): 28 | ''' 29 | 测试 DBSCAN 的聚类结果随 eps 参数的影响 30 | 31 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 32 | :return: None 33 | ''' 34 | X,labels_true=data 35 | epsilons=np.logspace(-1,1.5) 36 | ARIs=[] 37 | Core_nums=[] 38 | for epsilon in epsilons: 39 | clst=cluster.DBSCAN(eps=epsilon) 40 | predicted_labels=clst.fit_predict(X) 41 | ARIs.append( adjusted_rand_score(labels_true,predicted_labels)) 42 | Core_nums.append(len(clst.core_sample_indices_)) 43 | 44 | ## 绘图 45 | fig=plt.figure() 46 | ax=fig.add_subplot(1,2,1) 47 | ax.plot(epsilons,ARIs,marker='+') 48 | ax.set_xscale('log') 49 | ax.set_xlabel(r"$\epsilon$") 50 | ax.set_ylim(0,1) 51 | ax.set_ylabel('ARI') 52 | 53 | ax=fig.add_subplot(1,2,2) 54 | ax.plot(epsilons,Core_nums,marker='o') 55 | ax.set_xscale('log') 56 | ax.set_xlabel(r"$\epsilon$") 57 | ax.set_ylabel('Core_Nums') 58 | 59 | fig.suptitle("DBSCAN") 60 | plt.show() 61 | def test_DBSCAN_min_samples(*data): 62 | ''' 63 | 测试 DBSCAN 的聚类结果随 min_samples 参数的影响 64 | 65 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 66 | :return: None 67 | ''' 68 | X,labels_true=data 69 | min_samples=range(1,100) 70 | ARIs=[] 71 | Core_nums=[] 72 | for num in min_samples: 73 | clst=cluster.DBSCAN(min_samples=num) 74 | predicted_labels=clst.fit_predict(X) 75 | ARIs.append( adjusted_rand_score(labels_true,predicted_labels)) 76 | Core_nums.append(len(clst.core_sample_indices_)) 77 | 78 | ## 绘图 79 | fig=plt.figure() 80 | ax=fig.add_subplot(1,2,1) 81 | ax.plot(min_samples,ARIs,marker='+') 82 | ax.set_xlabel( "min_samples") 83 | ax.set_ylim(0,1) 84 | ax.set_ylabel('ARI') 85 | 86 | ax=fig.add_subplot(1,2,2) 87 | ax.plot(min_samples,Core_nums,marker='o') 88 | ax.set_xlabel( "min_samples") 89 | ax.set_ylabel('Core_Nums') 90 | 91 | fig.suptitle("DBSCAN") 92 | plt.show() 93 | -------------------------------------------------------------------------------- /chapters/Cluster_EM/gmm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 聚类和EM算法 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | GMM 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn import mixture 12 | from sklearn.metrics import adjusted_rand_score 13 | import matplotlib.pyplot as plt 14 | 15 | def test_GMM(*data): 16 | ''' 17 | 测试 GMM 的用法 18 | 19 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 20 | :return: None 21 | ''' 22 | X,labels_true=data 23 | clst=mixture.GaussianMixture() 24 | clst.fit(X) 25 | predicted_labels=clst.predict(X) 26 | print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels)) 27 | def test_GMM_n_components(*data): 28 | ''' 29 | 测试 GMM 的聚类结果随 n_components 参数的影响 30 | 31 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 32 | :return: None 33 | ''' 34 | X,labels_true=data 35 | nums=range(1,50) 36 | ARIs=[] 37 | for num in nums: 38 | clst=mixture.GaussianMixture(n_components=num) 39 | clst.fit(X) 40 | predicted_labels=clst.predict(X) 41 | ARIs.append(adjusted_rand_score(labels_true,predicted_labels)) 42 | 43 | ## 绘图 44 | fig=plt.figure() 45 | ax=fig.add_subplot(1,1,1) 46 | ax.plot(nums,ARIs,marker="+") 47 | ax.set_xlabel("n_components") 48 | ax.set_ylabel("ARI") 49 | fig.suptitle("GMM") 50 | plt.show() 51 | def test_GMM_cov_type(*data): 52 | ''' 53 | 测试 GMM 的聚类结果随协方差类型的影响 54 | 55 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 56 | :return: None 57 | ''' 58 | X,labels_true=data 59 | nums=range(1,50) 60 | 61 | cov_types=['spherical','tied','diag','full'] 62 | markers="+o*s" 63 | fig=plt.figure() 64 | ax=fig.add_subplot(1,1,1) 65 | 66 | for i ,cov_type in enumerate(cov_types): 67 | ARIs=[] 68 | for num in nums: 69 | clst=mixture.GaussianMixture(n_components=num,covariance_type=cov_type) 70 | clst.fit(X) 71 | predicted_labels=clst.predict(X) 72 | ARIs.append(adjusted_rand_score(labels_true,predicted_labels)) 73 | ax.plot(nums,ARIs,marker=markers[i],label="covariance_type:%s"%cov_type) 74 | 75 | ax.set_xlabel("n_components") 76 | ax.legend(loc="best") 77 | ax.set_ylabel("ARI") 78 | fig.suptitle("GMM") 79 | plt.show() 80 | -------------------------------------------------------------------------------- /chapters/Cluster_EM/kmeans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 聚类和EM算法 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | KMeans 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn import cluster 12 | from sklearn.metrics import adjusted_rand_score 13 | import matplotlib.pyplot as plt 14 | 15 | def test_Kmeans(*data): 16 | ''' 17 | 测试 KMeans 的用法 18 | 19 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 20 | :return: None 21 | ''' 22 | X,labels_true=data 23 | clst=cluster.KMeans() 24 | clst.fit(X) 25 | predicted_labels=clst.predict(X) 26 | print("ARI:%s"% adjusted_rand_score(labels_true,predicted_labels)) 27 | print("Sum center distance %s"%clst.inertia_) 28 | def test_Kmeans_nclusters(*data): 29 | ''' 30 | 测试 KMeans 的聚类结果随 n_clusters 参数的影响 31 | 32 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 33 | :return: None 34 | ''' 35 | X,labels_true=data 36 | nums=range(1,50) 37 | ARIs=[] 38 | Distances=[] 39 | for num in nums: 40 | clst=cluster.KMeans(n_clusters=num) 41 | clst.fit(X) 42 | predicted_labels=clst.predict(X) 43 | ARIs.append(adjusted_rand_score(labels_true,predicted_labels)) 44 | Distances.append(clst.inertia_) 45 | 46 | ## 绘图 47 | fig=plt.figure() 48 | ax=fig.add_subplot(1,2,1) 49 | ax.plot(nums,ARIs,marker="+") 50 | ax.set_xlabel("n_clusters") 51 | ax.set_ylabel("ARI") 52 | ax=fig.add_subplot(1,2,2) 53 | ax.plot(nums,Distances,marker='o') 54 | ax.set_xlabel("n_clusters") 55 | ax.set_ylabel("inertia_") 56 | fig.suptitle("KMeans") 57 | plt.show() 58 | def test_Kmeans_n_init(*data): 59 | ''' 60 | 测试 KMeans 的聚类结果随 n_init 和 init 参数的影响 61 | 62 | :param data: 可变参数。它是一个元组。元组元素依次为:第一个元素为样本集,第二个元素为样本集的真实簇分类标记 63 | :return: None 64 | ''' 65 | X,labels_true=data 66 | nums=range(1,50) 67 | ## 绘图 68 | fig=plt.figure() 69 | 70 | ARIs_k=[] 71 | Distances_k=[] 72 | ARIs_r=[] 73 | Distances_r=[] 74 | for num in nums: 75 | clst=cluster.KMeans(n_init=num,init='k-means++') 76 | clst.fit(X) 77 | predicted_labels=clst.predict(X) 78 | ARIs_k.append(adjusted_rand_score(labels_true,predicted_labels)) 79 | Distances_k.append(clst.inertia_) 80 | 81 | clst=cluster.KMeans(n_init=num,init='random') 82 | clst.fit(X) 83 | predicted_labels=clst.predict(X) 84 | ARIs_r.append(adjusted_rand_score(labels_true,predicted_labels)) 85 | Distances_r.append(clst.inertia_) 86 | 87 | ax=fig.add_subplot(1,2,1) 88 | ax.plot(nums,ARIs_k,marker="+",label="k-means++") 89 | ax.plot(nums,ARIs_r,marker="+",label="random") 90 | ax.set_xlabel("n_init") 91 | ax.set_ylabel("ARI") 92 | ax.set_ylim(0,1) 93 | ax.legend(loc='best') 94 | ax=fig.add_subplot(1,2,2) 95 | ax.plot(nums,Distances_k,marker='o',label="k-means++") 96 | ax.plot(nums,Distances_r,marker='o',label="random") 97 | ax.set_xlabel("n_init") 98 | ax.set_ylabel("inertia_") 99 | ax.legend(loc='best') 100 | 101 | fig.suptitle("KMeans") 102 | plt.show() 103 | -------------------------------------------------------------------------------- /chapters/Decision_Tree/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__init__.py -------------------------------------------------------------------------------- /chapters/Decision_Tree/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Decision_Tree/__pycache__/decisiontree_classifier.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__pycache__/decisiontree_classifier.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Decision_Tree/__pycache__/decisiontree_regressor.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Decision_Tree/__pycache__/decisiontree_regressor.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Decision_Tree/decisiontree_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 决策树 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | DecisionTreeClassifier 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | from sklearn.tree import DecisionTreeClassifier 13 | from sklearn import datasets 14 | from sklearn import cross_validation 15 | import matplotlib.pyplot as plt 16 | def load_data(): 17 | ''' 18 | 加载用于分类问题的数据集。数据集采用 scikit-learn 自带的 iris 数据集 19 | 20 | :return: 一个元组,用于分类问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记 21 | ''' 22 | iris=datasets.load_iris() # scikit-learn 自带的 iris 数据集 23 | X_train=iris.data 24 | y_train=iris.target 25 | return cross_validation.train_test_split(X_train, y_train,test_size=0.25, 26 | random_state=0,stratify=y_train)# 分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 27 | def test_DecisionTreeClassifier(*data): 28 | ''' 29 | 测试 DecisionTreeClassifier 的用法 30 | 31 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 32 | :return: None 33 | ''' 34 | X_train,X_test,y_train,y_test=data 35 | clf = DecisionTreeClassifier() 36 | clf.fit(X_train, y_train) 37 | 38 | print("Training score:%f"%(clf.score(X_train,y_train))) 39 | print("Testing score:%f"%(clf.score(X_test,y_test))) 40 | def test_DecisionTreeClassifier_criterion(*data): 41 | ''' 42 | 测试 DecisionTreeClassifier 的预测性能随 criterion 参数的影响 43 | 44 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 45 | :return: None 46 | ''' 47 | X_train,X_test,y_train,y_test=data 48 | criterions=['gini','entropy'] 49 | for criterion in criterions: 50 | clf = DecisionTreeClassifier(criterion=criterion) 51 | clf.fit(X_train, y_train) 52 | print("criterion:%s"%criterion) 53 | print("Training score:%f"%(clf.score(X_train,y_train))) 54 | print("Testing score:%f"%(clf.score(X_test,y_test))) 55 | def test_DecisionTreeClassifier_splitter(*data): 56 | ''' 57 | 测试 DecisionTreeClassifier 的预测性能随划分类型的影响 58 | 59 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 60 | :return: None 61 | ''' 62 | X_train,X_test,y_train,y_test=data 63 | splitters=['best','random'] 64 | for splitter in splitters: 65 | clf = DecisionTreeClassifier(splitter=splitter) 66 | clf.fit(X_train, y_train) 67 | print("splitter:%s"%splitter) 68 | print("Training score:%f"%(clf.score(X_train,y_train))) 69 | print("Testing score:%f"%(clf.score(X_test,y_test))) 70 | def test_DecisionTreeClassifier_depth(*data,maxdepth): 71 | ''' 72 | 测试 DecisionTreeClassifier 的预测性能随 max_depth 参数的影响 73 | 74 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 75 | :param maxdepth: 一个整数,用于 DecisionTreeClassifier 的 max_depth 参数 76 | :return: None 77 | ''' 78 | X_train,X_test,y_train,y_test=data 79 | depths=np.arange(1,maxdepth) 80 | training_scores=[] 81 | testing_scores=[] 82 | for depth in depths: 83 | clf = DecisionTreeClassifier(max_depth=depth) 84 | clf.fit(X_train, y_train) 85 | training_scores.append(clf.score(X_train,y_train)) 86 | testing_scores.append(clf.score(X_test,y_test)) 87 | 88 | ## 绘图 89 | fig=plt.figure() 90 | ax=fig.add_subplot(1,1,1) 91 | ax.plot(depths,training_scores,label="traing score",marker='o') 92 | ax.plot(depths,testing_scores,label="testing score",marker='*') 93 | ax.set_xlabel("maxdepth") 94 | ax.set_ylabel("score") 95 | ax.set_title("Decision Tree Classification") 96 | ax.legend(framealpha=0.5,loc='best') 97 | plt.show() 98 | if __name__=='__main__': 99 | X_train,X_test,y_train,y_test=load_data() # 产生用于分类问题的数据集 100 | test_DecisionTreeClassifier(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeClassifier 101 | # test_DecisionTreeClassifier_criterion(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeClassifier_criterion 102 | # test_DecisionTreeClassifier_splitter(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeClassifier_splitter 103 | # test_DecisionTreeClassifier_depth(X_train,X_test,y_train,y_test,maxdepth=100) # 调用 test_DecisionTreeClassifier_depth 104 | -------------------------------------------------------------------------------- /chapters/Decision_Tree/decisiontree_regressor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 决策树 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | DecisionTreeRegressor 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | from sklearn.tree import DecisionTreeRegressor 13 | from sklearn import cross_validation 14 | import matplotlib.pyplot as plt 15 | def creat_data(n): 16 | ''' 17 | 产生用于回归问题的数据集 18 | 19 | :param n: 数据集容量 20 | :return: 返回一个元组,元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 21 | ''' 22 | np.random.seed(0) 23 | X = 5 * np.random.rand(n, 1) 24 | y = np.sin(X).ravel() 25 | noise_num=(int)(n/5) 26 | y[::5] += 3 * (0.5 - np.random.rand(noise_num)) # 每第5个样本,就在该样本的值上添加噪音 27 | return cross_validation.train_test_split(X, y, 28 | test_size=0.25,random_state=1) # 拆分原始数据集为训练集和测试集,其中测试集大小为元素数据集大小的 1/4 29 | def test_DecisionTreeRegressor(*data): 30 | ''' 31 | 测试 DecisionTreeRegressor 的用法 32 | 33 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 34 | :return: None 35 | ''' 36 | X_train,X_test,y_train,y_test=data 37 | regr = DecisionTreeRegressor() 38 | regr.fit(X_train, y_train) 39 | print("Training score:%f"%(regr.score(X_train,y_train))) 40 | print("Testing score:%f"%(regr.score(X_test,y_test))) 41 | ##绘图 42 | fig=plt.figure() 43 | ax=fig.add_subplot(1,1,1) 44 | X = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] 45 | Y = regr.predict(X) 46 | ax.scatter(X_train, y_train, label="train sample",c='g') 47 | ax.scatter(X_test, y_test, label="test sample",c='r') 48 | ax.plot(X, Y, label="predict_value", linewidth=2,alpha=0.5) 49 | ax.set_xlabel("data") 50 | ax.set_ylabel("target") 51 | ax.set_title("Decision Tree Regression") 52 | ax.legend(framealpha=0.5) 53 | plt.show() 54 | def test_DecisionTreeRegressor_splitter(*data): 55 | ''' 56 | 测试 DecisionTreeRegressor 预测性能随划分类型的影响 57 | 58 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 59 | :return: None 60 | ''' 61 | X_train,X_test,y_train,y_test=data 62 | splitters=['best','random'] 63 | for splitter in splitters: 64 | regr = DecisionTreeRegressor(splitter=splitter) 65 | regr.fit(X_train, y_train) 66 | print("Splitter %s"%splitter) 67 | print("Training score:%f"%(regr.score(X_train,y_train))) 68 | print("Testing score:%f"%(regr.score(X_test,y_test))) 69 | def test_DecisionTreeRegressor_depth(*data,maxdepth): 70 | ''' 71 | 测试 DecisionTreeRegressor 预测性能随 max_depth 的影响 72 | 73 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 74 | :param maxdepth: 一个整数,它作为 DecisionTreeRegressor 的 max_depth 参数 75 | :return: None 76 | ''' 77 | X_train,X_test,y_train,y_test=data 78 | depths=np.arange(1,maxdepth) 79 | training_scores=[] 80 | testing_scores=[] 81 | for depth in depths: 82 | regr = DecisionTreeRegressor(max_depth=depth) 83 | regr.fit(X_train, y_train) 84 | training_scores.append(regr.score(X_train,y_train)) 85 | testing_scores.append(regr.score(X_test,y_test)) 86 | 87 | ## 绘图 88 | fig=plt.figure() 89 | ax=fig.add_subplot(1,1,1) 90 | ax.plot(depths,training_scores,label="traing score") 91 | ax.plot(depths,testing_scores,label="testing score") 92 | ax.set_xlabel("maxdepth") 93 | ax.set_ylabel("score") 94 | ax.set_title("Decision Tree Regression") 95 | ax.legend(framealpha=0.5) 96 | plt.show() 97 | if __name__=='__main__': 98 | X_train,X_test,y_train,y_test=creat_data(100) # 产生用于回归问题的数据集 99 | test_DecisionTreeRegressor(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeRegressor 100 | # test_DecisionTreeRegressor_splitter(X_train,X_test,y_train,y_test) # 调用 test_DecisionTreeRegressor_splitter 101 | # test_DecisionTreeRegressor_depth(X_train,X_test,y_train,y_test,maxdepth=20) # 调用 test_DecisionTreeRegressor_depth -------------------------------------------------------------------------------- /chapters/Ensemble/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__init__.py -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/adaboost_classifier.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/adaboost_classifier.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/adaboost_regressor.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/adaboost_regressor.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/gradientboosting_classifier.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/gradientboosting_classifier.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/gradientboosting_regressor.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/gradientboosting_regressor.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/randomforest_classifier.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/randomforest_classifier.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/__pycache__/randomforest_regressor.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Ensemble/__pycache__/randomforest_regressor.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Ensemble/adaboost_regressor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 集成学习 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | AdaBoostRegressor 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | from sklearn import datasets,cross_validation,ensemble 15 | 16 | def load_data_regression(): 17 | ''' 18 | 加载用于回归问题的数据集 19 | 20 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 21 | ''' 22 | diabetes = datasets.load_diabetes() #使用 scikit-learn 自带的一个糖尿病病人的数据集 23 | return cross_validation.train_test_split(diabetes.data,diabetes.target, 24 | test_size=0.25,random_state=0) # 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 25 | 26 | def test_AdaBoostRegressor(*data): 27 | ''' 28 | 测试 AdaBoostRegressor 的用法,绘制 AdaBoostRegressor 的预测性能随基础回归器数量的影响 29 | 30 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 31 | :return: None 32 | ''' 33 | X_train,X_test,y_train,y_test=data 34 | regr=ensemble.AdaBoostRegressor() 35 | regr.fit(X_train,y_train) 36 | ## 绘图 37 | fig=plt.figure() 38 | ax=fig.add_subplot(1,1,1) 39 | estimators_num=len(regr.estimators_) 40 | X=range(1,estimators_num+1) 41 | ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score") 42 | ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score") 43 | ax.set_xlabel("estimator num") 44 | ax.set_ylabel("score") 45 | ax.legend(loc="best") 46 | ax.set_title("AdaBoostRegressor") 47 | plt.show() 48 | def test_AdaBoostRegressor_base_regr(*data): 49 | ''' 50 | 测试 AdaBoostRegressor 的预测性能随基础回归器数量的和基础回归器类型的影响 51 | 52 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 53 | :return: None 54 | ''' 55 | from sklearn.svm import LinearSVR 56 | X_train,X_test,y_train,y_test=data 57 | fig=plt.figure() 58 | regrs=[ensemble.AdaBoostRegressor(), # 基础回归器为默认类型 59 | ensemble.AdaBoostRegressor(base_estimator=LinearSVR(epsilon=0.01,C=100))] # 基础回归器为 LinearSVR 60 | labels=["Decision Tree Regressor","Linear SVM Regressor"] 61 | for i ,regr in enumerate(regrs): 62 | ax=fig.add_subplot(2,1,i+1) 63 | regr.fit(X_train,y_train) 64 | ## 绘图 65 | estimators_num=len(regr.estimators_) 66 | X=range(1,estimators_num+1) 67 | ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score") 68 | ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score") 69 | ax.set_xlabel("estimator num") 70 | ax.set_ylabel("score") 71 | ax.legend(loc="lower right") 72 | ax.set_ylim(-1,1) 73 | ax.set_title("Base_Estimator:%s"%labels[i]) 74 | plt.suptitle("AdaBoostRegressor") 75 | plt.show() 76 | def test_AdaBoostRegressor_learning_rate(*data): 77 | ''' 78 | 测试 AdaBoostRegressor 的预测性能随学习率的影响 79 | 80 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 81 | :return: None 82 | ''' 83 | X_train,X_test,y_train,y_test=data 84 | learning_rates=np.linspace(0.01,1) 85 | fig=plt.figure() 86 | ax=fig.add_subplot(1,1,1) 87 | traing_scores=[] 88 | testing_scores=[] 89 | for learning_rate in learning_rates: 90 | regr=ensemble.AdaBoostRegressor(learning_rate=learning_rate,n_estimators=500) 91 | regr.fit(X_train,y_train) 92 | traing_scores.append(regr.score(X_train,y_train)) 93 | testing_scores.append(regr.score(X_test,y_test)) 94 | ax.plot(learning_rates,traing_scores,label="Traing score") 95 | ax.plot(learning_rates,testing_scores,label="Testing score") 96 | ax.set_xlabel("learning rate") 97 | ax.set_ylabel("score") 98 | ax.legend(loc="best") 99 | ax.set_title("AdaBoostRegressor") 100 | plt.show() 101 | def test_AdaBoostRegressor_loss(*data): 102 | ''' 103 | 测试 AdaBoostRegressor 的预测性能随损失函数类型的影响 104 | 105 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 106 | :return: None 107 | ''' 108 | X_train,X_test,y_train,y_test=data 109 | losses=['linear','square','exponential'] 110 | fig=plt.figure() 111 | ax=fig.add_subplot(1,1,1) 112 | for i ,loss in enumerate(losses): 113 | regr=ensemble.AdaBoostRegressor(loss=loss,n_estimators=30) 114 | regr.fit(X_train,y_train) 115 | ## 绘图 116 | estimators_num=len(regr.estimators_) 117 | X=range(1,estimators_num+1) 118 | ax.plot(list(X),list(regr.staged_score(X_train,y_train)), 119 | label="Traing score:loss=%s"%loss) 120 | ax.plot(list(X),list(regr.staged_score(X_test,y_test)), 121 | label="Testing score:loss=%s"%loss) 122 | ax.set_xlabel("estimator num") 123 | ax.set_ylabel("score") 124 | ax.legend(loc="lower right") 125 | ax.set_ylim(-1,1) 126 | plt.suptitle("AdaBoostRegressor") 127 | plt.show() 128 | 129 | if __name__=='__main__': 130 | X_train,X_test,y_train,y_test=load_data_regression()# 获取回归数据 131 | test_AdaBoostRegressor(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor 132 | # test_AdaBoostRegressor_base_regr(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor_base_regr 133 | # test_AdaBoostRegressor_learning_rate(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor_learning_rate 134 | # test_AdaBoostRegressor_loss(X_train,X_test,y_train,y_test)# 调用 test_AdaBoostRegressor_loss -------------------------------------------------------------------------------- /chapters/Ensemble/randomforest_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | 集成学习 3 | ~~~~~~~~~~~~~~~~ 4 | 5 | RandomForestClassifier 6 | 7 | :copyright: (c) 2016 by the huaxz1986. 8 | :license: lgpl-3.0, see LICENSE for more details. 9 | """ 10 | 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets,cross_validation,ensemble 14 | def load_data_classification(): 15 | ''' 16 | 加载用于分类问题的数据集 17 | 18 | :return: 一个元组,用于分类问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记 19 | ''' 20 | digits=datasets.load_digits() # 使用 scikit-learn 自带的 digits 数据集 21 | return cross_validation.train_test_split(digits.data,digits.target, 22 | test_size=0.25,random_state=0,stratify=digits.target) # 分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 23 | def test_RandomForestClassifier(*data): 24 | ''' 25 | 测试 RandomForestClassifier 的用法 26 | 27 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 28 | :return: None 29 | ''' 30 | X_train,X_test,y_train,y_test=data 31 | clf=ensemble.RandomForestClassifier() 32 | clf.fit(X_train,y_train) 33 | print("Traing Score:%f"%clf.score(X_train,y_train)) 34 | print("Testing Score:%f"%clf.score(X_test,y_test)) 35 | def test_RandomForestClassifier_num(*data): 36 | ''' 37 | 测试 RandomForestClassifier 的预测性能随 n_estimators 参数的影响 38 | 39 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 40 | :return: None 41 | ''' 42 | X_train,X_test,y_train,y_test=data 43 | nums=np.arange(1,100,step=2) 44 | fig=plt.figure() 45 | ax=fig.add_subplot(1,1,1) 46 | testing_scores=[] 47 | training_scores=[] 48 | for num in nums: 49 | clf=ensemble.RandomForestClassifier(n_estimators=num) 50 | clf.fit(X_train,y_train) 51 | training_scores.append(clf.score(X_train,y_train)) 52 | testing_scores.append(clf.score(X_test,y_test)) 53 | ax.plot(nums,training_scores,label="Training Score") 54 | ax.plot(nums,testing_scores,label="Testing Score") 55 | ax.set_xlabel("estimator num") 56 | ax.set_ylabel("score") 57 | ax.legend(loc="lower right") 58 | ax.set_ylim(0,1.05) 59 | plt.suptitle("RandomForestClassifier") 60 | plt.show() 61 | def test_RandomForestClassifier_max_depth(*data): 62 | ''' 63 | 测试 RandomForestClassifier 的预测性能随 max_depth 参数的影响 64 | 65 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 66 | :return: None 67 | ''' 68 | X_train,X_test,y_train,y_test=data 69 | maxdepths=range(1,20) 70 | fig=plt.figure() 71 | ax=fig.add_subplot(1,1,1) 72 | testing_scores=[] 73 | training_scores=[] 74 | for max_depth in maxdepths: 75 | clf=ensemble.RandomForestClassifier(max_depth=max_depth) 76 | clf.fit(X_train,y_train) 77 | training_scores.append(clf.score(X_train,y_train)) 78 | testing_scores.append(clf.score(X_test,y_test)) 79 | ax.plot(maxdepths,training_scores,label="Training Score") 80 | ax.plot(maxdepths,testing_scores,label="Testing Score") 81 | ax.set_xlabel("max_depth") 82 | ax.set_ylabel("score") 83 | ax.legend(loc="lower right") 84 | ax.set_ylim(0,1.05) 85 | plt.suptitle("RandomForestClassifier") 86 | plt.show() 87 | def test_RandomForestClassifier_max_features(*data): 88 | ''' 89 | 测试 RandomForestClassifier 的预测性能随 max_features 参数的影响 90 | 91 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 92 | :return: None 93 | ''' 94 | X_train,X_test,y_train,y_test=data 95 | max_features=np.linspace(0.01,1.0) 96 | fig=plt.figure() 97 | ax=fig.add_subplot(1,1,1) 98 | testing_scores=[] 99 | training_scores=[] 100 | for max_feature in max_features: 101 | clf=ensemble.RandomForestClassifier(max_features=max_feature) 102 | clf.fit(X_train,y_train) 103 | training_scores.append(clf.score(X_train,y_train)) 104 | testing_scores.append(clf.score(X_test,y_test)) 105 | ax.plot(max_features,training_scores,label="Training Score") 106 | ax.plot(max_features,testing_scores,label="Testing Score") 107 | ax.set_xlabel("max_feature") 108 | ax.set_ylabel("score") 109 | ax.legend(loc="lower right") 110 | ax.set_ylim(0,1.05) 111 | plt.suptitle("RandomForestClassifier") 112 | plt.show() 113 | if __name__=='__main__': 114 | X_train,X_test,y_train,y_test=load_data_classification() # 获取分类数据 115 | test_RandomForestClassifier(X_train,X_test,y_train,y_test) # 调用 test_RandomForestClassifier 116 | # test_RandomForestClassifier_num(X_train,X_test,y_train,y_test) # 调用 test_RandomForestClassifier_num 117 | # test_RandomForestClassifier_max_depth(X_train,X_test,y_train,y_test) # 调用 test_RandomForestClassifier_max_depth 118 | # test_RandomForestClassifier_max_features(X_train,X_test,y_train,y_test) # 调用 test_RandomForestClassifier_max_features -------------------------------------------------------------------------------- /chapters/Ensemble/randomforest_regressor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 集成学习 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | RandomForestRegressor 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets,cross_validation,ensemble 14 | def load_data_regression(): 15 | ''' 16 | 加载用于回归问题的数据集 17 | 18 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 19 | ''' 20 | diabetes = datasets.load_diabetes() #使用 scikit-learn 自带的一个糖尿病病人的数据集 21 | return cross_validation.train_test_split(diabetes.data,diabetes.target, 22 | test_size=0.25,random_state=0) # 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 23 | def test_RandomForestRegressor(*data): 24 | ''' 25 | 测试 RandomForestRegressor 的用法 26 | 27 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 28 | :return: None 29 | ''' 30 | X_train,X_test,y_train,y_test=data 31 | regr=ensemble.RandomForestRegressor() 32 | regr.fit(X_train,y_train) 33 | print("Traing Score:%f"%regr.score(X_train,y_train)) 34 | print("Testing Score:%f"%regr.score(X_test,y_test)) 35 | def test_RandomForestRegressor_num(*data): 36 | ''' 37 | 测试 RandomForestRegressor 的预测性能随 n_estimators 参数的影响 38 | 39 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 40 | :return: None 41 | ''' 42 | X_train,X_test,y_train,y_test=data 43 | nums=np.arange(1,100,step=2) 44 | fig=plt.figure() 45 | ax=fig.add_subplot(1,1,1) 46 | testing_scores=[] 47 | training_scores=[] 48 | for num in nums: 49 | regr=ensemble.RandomForestRegressor(n_estimators=num) 50 | regr.fit(X_train,y_train) 51 | training_scores.append(regr.score(X_train,y_train)) 52 | testing_scores.append(regr.score(X_test,y_test)) 53 | ax.plot(nums,training_scores,label="Training Score") 54 | ax.plot(nums,testing_scores,label="Testing Score") 55 | ax.set_xlabel("estimator num") 56 | ax.set_ylabel("score") 57 | ax.legend(loc="lower right") 58 | ax.set_ylim(-1,1) 59 | plt.suptitle("RandomForestRegressor") 60 | plt.show() 61 | def test_RandomForestRegressor_max_depth(*data): 62 | ''' 63 | 测试 RandomForestRegressor 的预测性能随 max_depth 参数的影响 64 | 65 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 66 | :return: None 67 | ''' 68 | X_train,X_test,y_train,y_test=data 69 | maxdepths=range(1,20) 70 | fig=plt.figure() 71 | ax=fig.add_subplot(1,1,1) 72 | testing_scores=[] 73 | training_scores=[] 74 | for max_depth in maxdepths: 75 | regr=ensemble.RandomForestRegressor(max_depth=max_depth) 76 | regr.fit(X_train,y_train) 77 | training_scores.append(regr.score(X_train,y_train)) 78 | testing_scores.append(regr.score(X_test,y_test)) 79 | ax.plot(maxdepths,training_scores,label="Training Score") 80 | ax.plot(maxdepths,testing_scores,label="Testing Score") 81 | ax.set_xlabel("max_depth") 82 | ax.set_ylabel("score") 83 | ax.legend(loc="lower right") 84 | ax.set_ylim(0,1.05) 85 | plt.suptitle("RandomForestRegressor") 86 | plt.show() 87 | def test_RandomForestRegressor_max_features(*data): 88 | ''' 89 | 测试 RandomForestRegressor 的预测性能随 max_features 参数的影响 90 | 91 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 92 | :return: None 93 | ''' 94 | X_train,X_test,y_train,y_test=data 95 | max_features=np.linspace(0.01,1.0) 96 | fig=plt.figure() 97 | ax=fig.add_subplot(1,1,1) 98 | testing_scores=[] 99 | training_scores=[] 100 | for max_feature in max_features: 101 | regr=ensemble.RandomForestRegressor(max_features=max_feature) 102 | regr.fit(X_train,y_train) 103 | training_scores.append(regr.score(X_train,y_train)) 104 | testing_scores.append(regr.score(X_test,y_test)) 105 | ax.plot(max_features,training_scores,label="Training Score") 106 | ax.plot(max_features,testing_scores,label="Testing Score") 107 | ax.set_xlabel("max_feature") 108 | ax.set_ylabel("score") 109 | ax.legend(loc="lower right") 110 | ax.set_ylim(0,1.05) 111 | plt.suptitle("RandomForestRegressor") 112 | plt.show() 113 | if __name__=='__main__': 114 | X_train,X_test,y_train,y_test=load_data_regression() # 获取回归数据 115 | test_RandomForestRegressor(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor 116 | # test_RandomForestRegressor_num(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor_num 117 | # test_RandomForestRegressor_max_depth(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor_max_depth 118 | # test_RandomForestRegressor_max_features(X_train,X_test,y_train,y_test) # 调用 test_RandomForestRegressor_max_features 119 | 120 | -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__init__.py -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/isomap.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/isomap.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_classifier.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_classifier.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_regressor.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/kneighbors_regressor.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/kpca.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/kpca.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/lle.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/lle.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/mds.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/mds.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/__pycache__/pca.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/KNN_Dimension_Reduction/__pycache__/pca.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/isomap.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kNN和降维 4 | ~~~~~~~~~~ 5 | 6 | Isomap 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import datasets,manifold 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于降维的数据 18 | 19 | :return: 一个元组,依次为训练样本集和样本集的标记 20 | ''' 21 | iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集 22 | return iris.data,iris.target 23 | 24 | def test_Isomap(*data): 25 | ''' 26 | 测试 Isomap 的用法 27 | 28 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 29 | :return: None 30 | ''' 31 | X,y=data 32 | for n in [4,3,2,1]: # 依次考察降维目标为 4维、3维、2维、1维 33 | isomap=manifold.Isomap(n_components=n) 34 | isomap.fit(X) 35 | print('reconstruction_error(n_components=%d) : %s'% 36 | (n, isomap.reconstruction_error())) 37 | def plot_Isomap_k(*data): 38 | ''' 39 | 测试 Isomap 中 n_neighbors 参数的影响,其中降维至 2维 40 | 41 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 42 | :return: None 43 | ''' 44 | X,y=data 45 | Ks=[1,5,25,y.size-1] # n_neighbors参数的候选值的集合 46 | 47 | fig=plt.figure() 48 | for i, k in enumerate(Ks): 49 | isomap=manifold.Isomap(n_components=2,n_neighbors=k) 50 | X_r=isomap.fit_transform(X)#原始数据集转换到二维 51 | 52 | ax=fig.add_subplot(2,2,i+1)## 两行两列,每个单元显示不同 n_neighbors 参数的 Isomap 的效果图 53 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 54 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 55 | for label ,color in zip( np.unique(y),colors): 56 | position=y==label 57 | ax.scatter(X_r[position,0],X_r[position,1],label="target= %d" 58 | %label,color=color) 59 | 60 | ax.set_xlabel("X[0]") 61 | ax.set_ylabel("X[1]") 62 | ax.legend(loc="best") 63 | ax.set_title("k=%d"%k) 64 | plt.suptitle("Isomap") 65 | plt.show() 66 | def plot_Isomap_k_d1(*data): 67 | ''' 68 | 测试 Isomap 中 n_neighbors 参数的影响,其中降维至 1维 69 | 70 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 71 | :return: None 72 | ''' 73 | X,y=data 74 | Ks=[1,5,25,y.size-1]# n_neighbors参数的候选值的集合 75 | 76 | fig=plt.figure() 77 | for i, k in enumerate(Ks): 78 | isomap=manifold.Isomap(n_components=1,n_neighbors=k) 79 | X_r=isomap.fit_transform(X)#原始数据集转换到 1 维 80 | 81 | ax=fig.add_subplot(2,2,i+1)## 两行两列,每个单元显示不同 n_neighbors 参数的 Isomap 的效果图 82 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 83 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 84 | for label ,color in zip( np.unique(y),colors): 85 | position=y==label 86 | ax.scatter(X_r[position],np.zeros_like(X_r[position]), 87 | label="target= %d"%label,color=color) 88 | 89 | ax.set_xlabel("X") 90 | ax.set_ylabel("Y") 91 | ax.legend(loc="best") 92 | ax.set_title("k=%d"%k) 93 | plt.suptitle("Isomap") 94 | plt.show() 95 | if __name__=='__main__': 96 | X,y=load_data() # 产生用于降维的数据集 97 | test_Isomap(X,y) # 调用 test_Isomap 98 | #plot_Isomap_k(X,y) # 调用 plot_Isomap_k 99 | #plot_Isomap_k_d1(X,y) # 调用 plot_Isomap_k_d1 100 | -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/kneighbors_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kNN和降维 4 | ~~~~~~~~~~ 5 | 6 | KNN分类和回归模型 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import neighbors, datasets,cross_validation 14 | 15 | def load_classification_data(): 16 | ''' 17 | 加载分类模型使用的数据集。 18 | 19 | :return: 一个元组,依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 20 | ''' 21 | digits=datasets.load_digits() # 使用 scikit-learn 自带的手写识别数据集 Digit Dataset 22 | X_train=digits.data 23 | y_train=digits.target 24 | return cross_validation.train_test_split(X_train, y_train,test_size=0.25, 25 | random_state=0,stratify=y_train) # 进行分层采样拆分,测试集大小占 1/4 26 | def test_KNeighborsClassifier(*data): 27 | ''' 28 | 测试 KNeighborsClassifier 的用法 29 | 30 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 31 | :return: None 32 | ''' 33 | X_train,X_test,y_train,y_test=data 34 | clf=neighbors.KNeighborsClassifier() 35 | clf.fit(X_train,y_train) 36 | print("Training Score:%f"%clf.score(X_train,y_train)) 37 | print("Testing Score:%f"%clf.score(X_test,y_test)) 38 | def test_KNeighborsClassifier_k_w(*data): 39 | ''' 40 | 测试 KNeighborsClassifier 中 n_neighbors 和 weights 参数的影响 41 | 42 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 43 | :return: None 44 | ''' 45 | X_train,X_test,y_train,y_test=data 46 | Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int') 47 | weights=['uniform','distance'] 48 | 49 | fig=plt.figure() 50 | ax=fig.add_subplot(1,1,1) 51 | ### 绘制不同 weights 下, 预测得分随 n_neighbors 的曲线 52 | for weight in weights: 53 | training_scores=[] 54 | testing_scores=[] 55 | for K in Ks: 56 | clf=neighbors.KNeighborsClassifier(weights=weight,n_neighbors=K) 57 | clf.fit(X_train,y_train) 58 | testing_scores.append(clf.score(X_test,y_test)) 59 | training_scores.append(clf.score(X_train,y_train)) 60 | ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight) 61 | ax.plot(Ks,training_scores,label="training score:weight=%s"%weight) 62 | ax.legend(loc='best') 63 | ax.set_xlabel("K") 64 | ax.set_ylabel("score") 65 | ax.set_ylim(0,1.05) 66 | ax.set_title("KNeighborsClassifier") 67 | plt.show() 68 | def test_KNeighborsClassifier_k_p(*data): 69 | ''' 70 | 测试 KNeighborsClassifier 中 n_neighbors 和 p 参数的影响 71 | 72 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 73 | :return: None 74 | ''' 75 | X_train,X_test,y_train,y_test=data 76 | Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int') 77 | Ps=[1,2,10] 78 | 79 | fig=plt.figure() 80 | ax=fig.add_subplot(1,1,1) 81 | ### 绘制不同 p 下, 预测得分随 n_neighbors 的曲线 82 | for P in Ps: 83 | training_scores=[] 84 | testing_scores=[] 85 | for K in Ks: 86 | clf=neighbors.KNeighborsClassifier(p=P,n_neighbors=K) 87 | clf.fit(X_train,y_train) 88 | testing_scores.append(clf.score(X_test,y_test)) 89 | training_scores.append(clf.score(X_train,y_train)) 90 | ax.plot(Ks,testing_scores,label="testing score:p=%d"%P) 91 | ax.plot(Ks,training_scores,label="training score:p=%d"%P) 92 | ax.legend(loc='best') 93 | ax.set_xlabel("K") 94 | ax.set_ylabel("score") 95 | ax.set_ylim(0,1.05) 96 | ax.set_title("KNeighborsClassifier") 97 | plt.show() 98 | 99 | if __name__=='__main__': 100 | X_train,X_test,y_train,y_test=load_classification_data() # 获取分类模型的数据集 101 | #test_KNeighborsClassifier(X_train,X_test,y_train,y_test) # 调用 test_KNeighborsClassifier 102 | #test_KNeighborsClassifier_k_w(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsClassifier_k_w 103 | #test_KNeighborsClassifier_k_p(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsClassifier_k_p -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/kneighbors_regressor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kNN和降维 4 | ~~~~~~~~~~ 5 | 6 | KNeighborsRegressor 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import neighbors, cross_validation 14 | 15 | def create_regression_data(n): 16 | ''' 17 | 创建回归模型使用的数据集 18 | 19 | :param n: 数据集大小 20 | :return: 一个元组,依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 21 | ''' 22 | X =5 * np.random.rand(n, 1) 23 | y = np.sin(X).ravel() 24 | y[::5] += 1 * (0.5 - np.random.rand(int(n/5))) # 每隔 5 个样本就在样本的值上添加噪音 25 | return cross_validation.train_test_split(X, y,test_size=0.25,random_state=0)# 进行简单拆分,测试集大小占 1/4 26 | 27 | def test_KNeighborsRegressor(*data): 28 | ''' 29 | 测试 KNeighborsRegressor 的用法 30 | 31 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 32 | :return: None 33 | ''' 34 | X_train,X_test,y_train,y_test=data 35 | regr=neighbors.KNeighborsRegressor() 36 | regr.fit(X_train,y_train) 37 | print("Training Score:%f"%regr.score(X_train,y_train)) 38 | print("Testing Score:%f"%regr.score(X_test,y_test)) 39 | def test_KNeighborsRegressor_k_w(*data): 40 | ''' 41 | 测试 KNeighborsRegressor 中 n_neighbors 和 weights 参数的影响 42 | 43 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 44 | :return: None 45 | ''' 46 | X_train,X_test,y_train,y_test=data 47 | Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int') 48 | weights=['uniform','distance'] 49 | 50 | fig=plt.figure() 51 | ax=fig.add_subplot(1,1,1) 52 | ### 绘制不同 weights 下, 预测得分随 n_neighbors 的曲线 53 | for weight in weights: 54 | training_scores=[] 55 | testing_scores=[] 56 | for K in Ks: 57 | regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K) 58 | regr.fit(X_train,y_train) 59 | testing_scores.append(regr.score(X_test,y_test)) 60 | training_scores.append(regr.score(X_train,y_train)) 61 | ax.plot(Ks,testing_scores,label="testing score:weight=%s"%weight) 62 | ax.plot(Ks,training_scores,label="training score:weight=%s"%weight) 63 | ax.legend(loc='best') 64 | ax.set_xlabel("K") 65 | ax.set_ylabel("score") 66 | ax.set_ylim(0,1.05) 67 | ax.set_title("KNeighborsRegressor") 68 | plt.show() 69 | def test_KNeighborsRegressor_k_p(*data): 70 | ''' 71 | 测试 KNeighborsRegressor 中 n_neighbors 和 p 参数的影响 72 | 73 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 74 | :return: None 75 | ''' 76 | X_train,X_test,y_train,y_test=data 77 | Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int') 78 | Ps=[1,2,10] 79 | 80 | fig=plt.figure() 81 | ax=fig.add_subplot(1,1,1) 82 | ### 绘制不同 p 下, 预测得分随 n_neighbors 的曲线 83 | for P in Ps: 84 | training_scores=[] 85 | testing_scores=[] 86 | for K in Ks: 87 | regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K) 88 | regr.fit(X_train,y_train) 89 | testing_scores.append(regr.score(X_test,y_test)) 90 | training_scores.append(regr.score(X_train,y_train)) 91 | ax.plot(Ks,testing_scores,label="testing score:p=%d"%P) 92 | ax.plot(Ks,training_scores,label="training score:p=%d"%P) 93 | ax.legend(loc='best') 94 | ax.set_xlabel("K") 95 | ax.set_ylabel("score") 96 | ax.set_ylim(0,1.05) 97 | ax.set_title("KNeighborsRegressor") 98 | plt.show() 99 | 100 | if __name__=='__main__': 101 | X_train,X_test,y_train,y_test=create_regression_data(1000)# 获取回归模型的数据集 102 | test_KNeighborsRegressor(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsRegressor 103 | #test_KNeighborsRegressor_k_w(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsRegressor_k_w 104 | #test_KNeighborsRegressor_k_p(X_train,X_test,y_train,y_test)# 调用 test_KNeighborsRegressor_k_p -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/lle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kNN和降维 4 | ~~~~~~~~~~ 5 | 6 | LocallyLinearEmbedding 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import datasets,manifold 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于降维的数据 18 | 19 | :return: 一个元组,依次为训练样本集和样本集的标记 20 | ''' 21 | iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集 22 | return iris.data,iris.target 23 | def test_LocallyLinearEmbedding(*data): 24 | ''' 25 | 测试 LocallyLinearEmbedding 的用法 26 | 27 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 28 | :return: None 29 | ''' 30 | X,y=data 31 | for n in [4,3,2,1]:# 依次考察降维目标为 4维、3维、2维、1维 32 | lle=manifold.LocallyLinearEmbedding(n_components=n) 33 | lle.fit(X) 34 | print('reconstruction_error(n_components=%d) : %s'% 35 | (n, lle.reconstruction_error_)) 36 | def plot_LocallyLinearEmbedding_k(*data): 37 | ''' 38 | 测试 LocallyLinearEmbedding 中 n_neighbors 参数的影响,其中降维至 2维 39 | 40 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 41 | :return: None 42 | ''' 43 | X,y=data 44 | Ks=[1,5,25,y.size-1]# n_neighbors参数的候选值的集合 45 | 46 | fig=plt.figure() 47 | for i, k in enumerate(Ks): 48 | lle=manifold.LocallyLinearEmbedding(n_components=2,n_neighbors=k) 49 | X_r=lle.fit_transform(X)#原始数据集转换到二维 50 | 51 | ax=fig.add_subplot(2,2,i+1)## 两行两列,每个单元显示不同 n_neighbors 参数的 LocallyLinearEmbedding 的效果图 52 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 53 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 54 | for label ,color in zip( np.unique(y),colors): 55 | position=y==label 56 | ax.scatter(X_r[position,0],X_r[position,1],label="target= %d" 57 | %label,color=color) 58 | 59 | ax.set_xlabel("X[0]") 60 | ax.set_ylabel("X[1]") 61 | ax.legend(loc="best") 62 | ax.set_title("k=%d"%k) 63 | plt.suptitle("LocallyLinearEmbedding") 64 | plt.show() 65 | def plot_LocallyLinearEmbedding_k_d1(*data): 66 | ''' 67 | 测试 LocallyLinearEmbedding 中 n_neighbors 参数的影响,其中降维至 1维 68 | 69 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 70 | :return: None 71 | ''' 72 | X,y=data 73 | Ks=[1,5,25,y.size-1]# n_neighbors参数的候选值的集合 74 | 75 | fig=plt.figure() 76 | for i, k in enumerate(Ks): 77 | lle=manifold.LocallyLinearEmbedding(n_components=1,n_neighbors=k) 78 | X_r=lle.fit_transform(X)#原始数据集转换到 1 维 79 | 80 | ax=fig.add_subplot(2,2,i+1)## 两行两列,每个单元显示不同 n_neighbors 参数的 LocallyLinearEmbedding 的效果图 81 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 82 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 83 | for label ,color in zip( np.unique(y),colors): 84 | position=y==label 85 | ax.scatter(X_r[position],np.zeros_like(X_r[position]), 86 | label="target= %d"%label,color=color) 87 | 88 | ax.set_xlabel("X") 89 | ax.set_ylabel("Y") 90 | ax.legend(loc="best") 91 | ax.set_title("k=%d"%k) 92 | plt.suptitle("LocallyLinearEmbedding") 93 | plt.show() 94 | if __name__=='__main__': 95 | X,y=load_data() # 产生用于降维的数据集 96 | test_LocallyLinearEmbedding(X,y) # 调用 test_LocallyLinearEmbedding 97 | #plot_LocallyLinearEmbedding_k(X,y) # 调用 plot_LocallyLinearEmbedding_k 98 | #plot_LocallyLinearEmbedding_k_d1(X,y) # 调用 plot_LocallyLinearEmbedding_k_d1 -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/mds.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kNN和降维 4 | ~~~~~~~~~~ 5 | 6 | MDS 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import datasets,manifold 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于降维的数据 18 | 19 | :return: 一个元组,依次为训练样本集和样本集的标记 20 | ''' 21 | iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集 22 | return iris.data,iris.target 23 | 24 | def test_MDS(*data): 25 | ''' 26 | 测试 MDS 的用法 27 | 28 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 29 | :return: None 30 | ''' 31 | X,y=data 32 | for n in [4,3,2,1]: # 依次考察降维目标为 4维、3维、2维、1维 33 | mds=manifold.MDS(n_components=n) 34 | mds.fit(X) 35 | print('stress(n_components=%d) : %s'% (n, str(mds.stress_))) 36 | def plot_MDS(*data): 37 | ''' 38 | 绘制经过 使用 MDS 降维到二维之后的样本点 39 | 40 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 41 | :return: None 42 | ''' 43 | X,y=data 44 | mds=manifold.MDS(n_components=2) 45 | X_r=mds.fit_transform(X) #原始数据集转换到二维 46 | 47 | ### 绘制二维图形 48 | fig=plt.figure() 49 | ax=fig.add_subplot(1,1,1) 50 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 51 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),)# 颜色集合,不同标记的样本染不同的颜色 52 | for label ,color in zip( np.unique(y),colors): 53 | position=y==label 54 | ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color) 55 | 56 | ax.set_xlabel("X[0]") 57 | ax.set_ylabel("X[1]") 58 | ax.legend(loc="best") 59 | ax.set_title("MDS") 60 | plt.show() 61 | if __name__=='__main__': 62 | X,y=load_data() # 产生用于降维的数据集 63 | test_MDS(X,y) # 调用 test_MDS 64 | #plot_MDS(X,y) # 调用 plot_MDS 65 | -------------------------------------------------------------------------------- /chapters/KNN_Dimension_Reduction/pca.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | kNN和降维 4 | ~~~~~~~~~~ 5 | 6 | PCA 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import datasets,decomposition 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于降维的数据 18 | 19 | :return: 一个元组,依次为训练样本集和样本集的标记 20 | ''' 21 | iris=datasets.load_iris()# 使用 scikit-learn 自带的 iris 数据集 22 | return iris.data,iris.target 23 | 24 | def test_PCA(*data): 25 | ''' 26 | 测试 PCA 的用法 27 | 28 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 29 | :return: None 30 | ''' 31 | X,y=data 32 | pca=decomposition.PCA(n_components=None) # 使用默认的 n_components 33 | pca.fit(X) 34 | print('explained variance ratio : %s'% str(pca.explained_variance_ratio_)) 35 | def plot_PCA(*data): 36 | ''' 37 | 绘制经过 PCA 降维到二维之后的样本点 38 | 39 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、训练样本的标记 40 | :return: None 41 | ''' 42 | X,y=data 43 | pca=decomposition.PCA(n_components=2) # 目标维度为2维 44 | pca.fit(X) 45 | X_r=pca.transform(X) # 原始数据集转换到二维 46 | ###### 绘制二维数据 ######## 47 | fig=plt.figure() 48 | ax=fig.add_subplot(1,1,1) 49 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 50 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合,不同标记的样本染不同的颜色 51 | for label ,color in zip( np.unique(y),colors): 52 | position=y==label 53 | ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color) 54 | 55 | ax.set_xlabel("X[0]") 56 | ax.set_ylabel("Y[0]") 57 | ax.legend(loc="best") 58 | ax.set_title("PCA") 59 | plt.show() 60 | if __name__=='__main__': 61 | X,y=load_data() # 产生用于降维的数据集 62 | test_PCA(X,y) # 调用 test_PCA 63 | #plot_PCA(X,y) # 调用 plot_PCA 64 | -------------------------------------------------------------------------------- /chapters/Kaggle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__init__.py -------------------------------------------------------------------------------- /chapters/Kaggle/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Kaggle/__pycache__/data_clean.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/data_clean.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Kaggle/__pycache__/data_preprocess.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/data_preprocess.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Kaggle/__pycache__/grid_search.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/grid_search.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Kaggle/__pycache__/learning_validation_curve.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Kaggle/__pycache__/learning_validation_curve.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Kaggle/grid_search.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | from sklearn.ensemble import GradientBoostingClassifier 3 | from sklearn.model_selection import GridSearchCV 4 | from sklearn.metrics import classification_report 5 | from data_clean import current_time 6 | from sklearn.model_selection import train_test_split 7 | from data_preprocess import Data_Preprocesser,Data_Cleaner 8 | 9 | def grid_search(tuned_parameters,data,train_size,seed): 10 | ''' 11 | 参数优化 12 | 13 | :param tuned_parameters: 待优化的参数字典 14 | :param data: 数据集 15 | :param train_size:训练集大小 16 | :param seed:用于生成随机数种子 17 | :return: 18 | ''' 19 | 20 | print("----- Begin run grid_search at %s -------"%current_time()) 21 | X=data[:,:-1] 22 | y=data[:,-1] 23 | X_train,X_test,y_train,y_test=train_test_split(X,y,train_size=train_size,stratify=data[:,-1],random_state=seed) 24 | clf=GridSearchCV(GradientBoostingClassifier(),tuned_parameters,cv=10,scoring="roc_auc") 25 | clf.fit(X_train,y_train) 26 | print("Best parameters set found:",clf.best_params_) 27 | print("Randomized Grid scores:") 28 | for params, mean_score, scores in clf.grid_scores_: 29 | print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) 30 | print("Optimized Score:",clf.score(X_test,y_test)) 31 | print("Detailed classification report:") 32 | y_true, y_pred = y_test, clf.predict(X_test) 33 | print(classification_report(y_true, y_pred)) 34 | print("----- End run grid_search at %s -------"%current_time()) 35 | 36 | if __name__=='__main__': 37 | clearner=Data_Cleaner("./data/people.csv",'./data/act_train.csv','./data/act_test.csv') 38 | result=clearner.load_data() 39 | preprocessor=Data_Preprocesser(*result) 40 | train_datas,test_datas=preprocessor.load_data() 41 | tuned_parameters={'subsample':[0.3,0.35,0.4,0.45,0.5,0.55,0.6], 42 | 'n_estimators':[30,35,50,100,150,200] 43 | , 44 | 'max_depth':[2,4,8,16,32]} 45 | grid_search(tuned_parameters,train_datas['type 7'],train_size=0.75,seed=0) -------------------------------------------------------------------------------- /chapters/Linear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__init__.py -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/elasticnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/elasticnet.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/lasso.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/lasso.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/lda.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/lda.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/linear_regression.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/linear_regression.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/logistic_regression.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/logistic_regression.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/__pycache__/ridge.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Linear/__pycache__/ridge.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Linear/elasticnet.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 广义线性模型 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | ElasticNet 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于回归问题的数据集 18 | 19 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 20 | ''' 21 | diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集 22 | return cross_validation.train_test_split(datasets.data,diabetes.target, 23 | test_size=0.25,random_state=0) # 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 24 | 25 | def test_ElasticNet(*data): 26 | ''' 27 | 测试 ElasticNet 的用法 28 | 29 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 30 | :return: None 31 | ''' 32 | X_train,X_test,y_train,y_test=data 33 | regr = linear_model.ElasticNet() 34 | regr.fit(X_train, y_train) 35 | print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_)) 36 | print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2)) 37 | print('Score: %.2f' % regr.score(X_test, y_test)) 38 | def test_ElasticNet_alpha_rho(*data): 39 | ''' 40 | 测试 ElasticNet 的预测性能随 alpha 和 l1_ratio 的影响 41 | 42 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 43 | :return: None 44 | ''' 45 | X_train,X_test,y_train,y_test=data 46 | alphas=np.logspace(-2,2) 47 | rhos=np.linspace(0.01,1) 48 | scores=[] 49 | for alpha in alphas: 50 | for rho in rhos: 51 | regr = linear_model.ElasticNet(alpha=alpha,l1_ratio=rho) 52 | regr.fit(X_train, y_train) 53 | scores.append(regr.score(X_test, y_test)) 54 | ## 绘图 55 | alphas, rhos = np.meshgrid(alphas, rhos) 56 | scores=np.array(scores).reshape(alphas.shape) 57 | from mpl_toolkits.mplot3d import Axes3D 58 | from matplotlib import cm 59 | fig=plt.figure() 60 | ax=Axes3D(fig) 61 | surf = ax.plot_surface(alphas, rhos, scores, rstride=1, cstride=1, cmap=cm.jet, 62 | linewidth=0, antialiased=False) 63 | fig.colorbar(surf, shrink=0.5, aspect=5) 64 | ax.set_xlabel(r"$\alpha$") 65 | ax.set_ylabel(r"$\rho$") 66 | ax.set_zlabel("score") 67 | ax.set_title("ElasticNet") 68 | plt.show() 69 | if __name__=='__main__': 70 | X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集 71 | test_ElasticNet(X_train,X_test,y_train,y_test) # 调用 test_ElasticNet 72 | # test_ElasticNet_alpha_rho(X_train,X_test,y_train,y_test) # 调用 test_ElasticNet_alpha_rho -------------------------------------------------------------------------------- /chapters/Linear/lasso.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 广义线性模型 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Lasso 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation 14 | def load_data(): 15 | ''' 16 | 加载用于回归问题的数据集 17 | 18 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 19 | ''' 20 | diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集 21 | return cross_validation.train_test_split(datasets.data,diabetes.target, 22 | test_size=0.25,random_state=0) # 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 23 | def test_Lasso(*data): 24 | ''' 25 | 测试 Lasso 的用法 26 | 27 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 28 | :return: None 29 | ''' 30 | X_train,X_test,y_train,y_test=data 31 | regr = linear_model.Lasso() 32 | regr.fit(X_train, y_train) 33 | print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_)) 34 | print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2)) 35 | print('Score: %.2f' % regr.score(X_test, y_test)) 36 | def test_Lasso_alpha(*data): 37 | ''' 38 | 测试 Lasso 的预测性能随 alpha 参数的影响 39 | 40 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 41 | :return: None 42 | ''' 43 | X_train,X_test,y_train,y_test=data 44 | alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000] 45 | scores=[] 46 | for i,alpha in enumerate(alphas): 47 | regr = linear_model.Lasso(alpha=alpha) 48 | regr.fit(X_train, y_train) 49 | scores.append(regr.score(X_test, y_test)) 50 | ## 绘图 51 | fig=plt.figure() 52 | ax=fig.add_subplot(1,1,1) 53 | ax.plot(alphas,scores) 54 | ax.set_xlabel(r"$\alpha$") 55 | ax.set_ylabel(r"score") 56 | ax.set_xscale('log') 57 | ax.set_title("Lasso") 58 | plt.show() 59 | if __name__=='__main__': 60 | X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集 61 | test_Lasso(X_train,X_test,y_train,y_test) # 调用 test_Lasso 62 | # test_Lasso_alpha(X_train,X_test,y_train,y_test) # 调用 test_Lasso_alpha 63 | -------------------------------------------------------------------------------- /chapters/Linear/lda.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 广义线性模型 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 线性判别分析 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, discriminant_analysis,cross_validation 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于分类问题的数据集 18 | 19 | :return: 一个元组,用于分类问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记 20 | ''' 21 | iris=datasets.load_iris() # 使用 scikit-learn 自带的 iris 数据集 22 | X_train=iris.data 23 | y_train=iris.target 24 | return cross_validation.train_test_split(X_train, y_train,test_size=0.25, 25 | random_state=0,stratify=y_train)# 分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 26 | def test_LinearDiscriminantAnalysis(*data): 27 | ''' 28 | 测试 LinearDiscriminantAnalysis 的用法 29 | 30 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 31 | :return: None 32 | ''' 33 | X_train,X_test,y_train,y_test=data 34 | lda = discriminant_analysis.LinearDiscriminantAnalysis() 35 | lda.fit(X_train, y_train) 36 | print('Coefficients:%s, intercept %s'%(lda.coef_,lda.intercept_)) 37 | print('Score: %.2f' % lda.score(X_test, y_test)) 38 | def plot_LDA(converted_X,y): 39 | ''' 40 | 绘制经过 LDA 转换后的数据 41 | 42 | :param converted_X: 经过 LDA转换后的样本集 43 | :param y: 样本集的标记 44 | :return: None 45 | ''' 46 | from mpl_toolkits.mplot3d import Axes3D 47 | fig=plt.figure() 48 | ax=Axes3D(fig) 49 | colors='rgb' 50 | markers='o*s' 51 | for target,color,marker in zip([0,1,2],colors,markers): 52 | pos=(y==target).ravel() 53 | X=converted_X[pos,:] 54 | ax.scatter(X[:,0], X[:,1], X[:,2],color=color,marker=marker, 55 | label="Label %d"%target) 56 | ax.legend(loc="best") 57 | fig.suptitle("Iris After LDA") 58 | plt.show() 59 | def run_plot_LDA(): 60 | ''' 61 | 执行 plot_LDA 。其中数据集来自于 load_data() 函数 62 | 63 | :return: None 64 | ''' 65 | X_train,X_test,y_train,y_test=load_data() 66 | X=np.vstack((X_train,X_test)) 67 | Y=np.vstack((y_train.reshape(y_train.size,1),y_test.reshape(y_test.size,1))) 68 | lda = discriminant_analysis.LinearDiscriminantAnalysis() 69 | lda.fit(X, Y) 70 | converted_X=np.dot(X,np.transpose(lda.coef_))+lda.intercept_ 71 | plot_LDA(converted_X,Y) 72 | def test_LinearDiscriminantAnalysis_solver(*data): 73 | ''' 74 | 测试 LinearDiscriminantAnalysis 的预测性能随 solver 参数的影响 75 | 76 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 77 | :return: None 78 | ''' 79 | X_train,X_test,y_train,y_test=data 80 | solvers=['svd','lsqr','eigen'] 81 | for solver in solvers: 82 | if(solver=='svd'): 83 | lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver) 84 | else: 85 | lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver, 86 | shrinkage=None) 87 | lda.fit(X_train, y_train) 88 | print('Score at solver=%s: %.2f' %(solver, lda.score(X_test, y_test))) 89 | def test_LinearDiscriminantAnalysis_shrinkage(*data): 90 | ''' 91 | 测试 LinearDiscriminantAnalysis 的预测性能随 shrinkage 参数的影响 92 | 93 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 94 | :return: None 95 | ''' 96 | X_train,X_test,y_train,y_test=data 97 | shrinkages=np.linspace(0.0,1.0,num=20) 98 | scores=[] 99 | for shrinkage in shrinkages: 100 | lda = discriminant_analysis.LinearDiscriminantAnalysis(solver='lsqr', 101 | shrinkage=shrinkage) 102 | lda.fit(X_train, y_train) 103 | scores.append(lda.score(X_test, y_test)) 104 | ## 绘图 105 | fig=plt.figure() 106 | ax=fig.add_subplot(1,1,1) 107 | ax.plot(shrinkages,scores) 108 | ax.set_xlabel(r"shrinkage") 109 | ax.set_ylabel(r"score") 110 | ax.set_ylim(0,1.05) 111 | ax.set_title("LinearDiscriminantAnalysis") 112 | plt.show() 113 | 114 | if __name__=='__main__': 115 | X_train,X_test,y_train,y_test=load_data() # 产生用于分类的数据集 116 | test_LinearDiscriminantAnalysis(X_train,X_test,y_train,y_test) # 调用 test_LinearDiscriminantAnalysis 117 | # run_plot_LDA() # 调用 run_plot_LDA 118 | # test_LinearDiscriminantAnalysis_solver(X_train,X_test,y_train,y_test) # 调用 test_LinearDiscriminantAnalysis_solver 119 | # test_LinearDiscriminantAnalysis_shrinkage(X_train,X_test,y_train,y_test) # 调用 test_LinearDiscriminantAnalysis_shrinkage -------------------------------------------------------------------------------- /chapters/Linear/linear_regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 广义线性模型 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | LinearRegression 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于回归问题的数据集 18 | 19 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 20 | ''' 21 | diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集 22 | return cross_validation.train_test_split(diabetes.data,diabetes.target, 23 | test_size=0.25,random_state=0) # 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 24 | def test_LinearRegression(*data): 25 | ''' 26 | 测试 LinearRegression 的用法 27 | 28 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 29 | :return: None 30 | ''' 31 | X_train,X_test,y_train,y_test=data 32 | regr = linear_model.LinearRegression() 33 | regr.fit(X_train, y_train) 34 | print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_)) 35 | print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2)) 36 | print('Score: %.2f' % regr.score(X_test, y_test)) 37 | if __name__=='__main__': 38 | X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集 39 | test_LinearRegression(X_train,X_test,y_train,y_test) # 调用 test_LinearRegression 40 | -------------------------------------------------------------------------------- /chapters/Linear/logistic_regression.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 广义线性模型 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Logistic 回归(也称作对数几率回归) 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于分类问题的数据集 18 | 19 | :return: 一个元组,用于分类问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记 20 | ''' 21 | iris=datasets.load_iris() # 使用 scikit-learn 自带的 iris 数据集 22 | X_train=iris.data 23 | y_train=iris.target 24 | return cross_validation.train_test_split(X_train, y_train,test_size=0.25, 25 | random_state=0,stratify=y_train)# 分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 26 | def test_LogisticRegression(*data): 27 | ''' 28 | 测试 LogisticRegression 的用法 29 | 30 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 31 | :return: None 32 | ''' 33 | X_train,X_test,y_train,y_test=data 34 | regr = linear_model.LogisticRegression() 35 | regr.fit(X_train, y_train) 36 | print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_)) 37 | print('Score: %.2f' % regr.score(X_test, y_test)) 38 | def test_LogisticRegression_multinomial(*data): 39 | ''' 40 | 测试 LogisticRegression 的预测性能随 multi_class 参数的影响 41 | 42 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 43 | :return: None 44 | ''' 45 | X_train,X_test,y_train,y_test=data 46 | regr = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs') 47 | regr.fit(X_train, y_train) 48 | print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_)) 49 | print('Score: %.2f' % regr.score(X_test, y_test)) 50 | def test_LogisticRegression_C(*data): 51 | ''' 52 | 测试 LogisticRegression 的预测性能随 C 参数的影响 53 | 54 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 55 | :return: None 56 | ''' 57 | X_train,X_test,y_train,y_test=data 58 | Cs=np.logspace(-2,4,num=100) 59 | scores=[] 60 | for C in Cs: 61 | regr = linear_model.LogisticRegression(C=C) 62 | regr.fit(X_train, y_train) 63 | scores.append(regr.score(X_test, y_test)) 64 | ## 绘图 65 | fig=plt.figure() 66 | ax=fig.add_subplot(1,1,1) 67 | ax.plot(Cs,scores) 68 | ax.set_xlabel(r"C") 69 | ax.set_ylabel(r"score") 70 | ax.set_xscale('log') 71 | ax.set_title("LogisticRegression") 72 | plt.show() 73 | 74 | if __name__=='__main__': 75 | X_train,X_test,y_train,y_test=load_data() # 加载用于分类的数据集 76 | test_LogisticRegression(X_train,X_test,y_train,y_test) # 调用 test_LogisticRegression 77 | # test_LogisticRegression_multinomial(X_train,X_test,y_train,y_test) # 调用 test_LogisticRegression_multinomial 78 | # test_LogisticRegression_C(X_train,X_test,y_train,y_test) # 调用 test_LogisticRegression_C -------------------------------------------------------------------------------- /chapters/Linear/ridge.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 广义线性模型 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 线性回归 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation 14 | 15 | def load_data(): 16 | ''' 17 | 加载用于回归问题的数据集 18 | 19 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 20 | ''' 21 | diabetes = datasets.load_diabetes()#使用 scikit-learn 自带的一个糖尿病病人的数据集 22 | return cross_validation.train_test_split(datasets.data,diabetes.target, 23 | test_size=0.25,random_state=0) # 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 24 | 25 | def test_Ridge(*data): 26 | ''' 27 | 测试 Ridge 的用法 28 | 29 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 30 | :return: None 31 | ''' 32 | X_train,X_test,y_train,y_test=data 33 | regr = linear_model.Ridge() 34 | regr.fit(X_train, y_train) 35 | print('Coefficients:%s, intercept %.2f'%(regr.coef_,regr.intercept_)) 36 | print("Residual sum of squares: %.2f"% np.mean((regr.predict(X_test) - y_test) ** 2)) 37 | print('Score: %.2f' % regr.score(X_test, y_test)) 38 | def test_Ridge_alpha(*data): 39 | ''' 40 | 测试 Ridge 的预测性能随 alpha 参数的影响 41 | 42 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 43 | :return: None 44 | ''' 45 | X_train,X_test,y_train,y_test=data 46 | alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000] 47 | scores=[] 48 | for i,alpha in enumerate(alphas): 49 | regr = linear_model.Ridge(alpha=alpha) 50 | regr.fit(X_train, y_train) 51 | scores.append(regr.score(X_test, y_test)) 52 | ## 绘图 53 | fig=plt.figure() 54 | ax=fig.add_subplot(1,1,1) 55 | ax.plot(alphas,scores) 56 | ax.set_xlabel(r"$\alpha$") 57 | ax.set_ylabel(r"score") 58 | ax.set_xscale('log') 59 | ax.set_title("Ridge") 60 | plt.show() 61 | if __name__=='__main__': 62 | X_train,X_test,y_train,y_test=load_data() # 产生用于回归问题的数据集 63 | test_Ridge(X_train,X_test,y_train,y_test) # 调用 test_Ridge 64 | # test_Ridge_alpha(X_train,X_test,y_train,y_test) # 调用 test_Ridge_alpha -------------------------------------------------------------------------------- /chapters/Model_Selection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__init__.py -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/classification_metrics.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/classification_metrics.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/data_splittion.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/data_splittion.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/grid_search.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/grid_search.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/learning_curve.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/learning_curve.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/loss_function.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/loss_function.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/regression_metrics.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/regression_metrics.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/__pycache__/validation_curve.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Model_Selection/__pycache__/validation_curve.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Model_Selection/data_splittion.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 模型选择 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 数据集切分 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,LeaveOneOut\ 12 | ,cross_val_score 13 | import numpy as np 14 | def test_train_test_split(): 15 | ''' 16 | 测试 train_test_split 的用法 17 | 18 | :return: None 19 | ''' 20 | X=[[1,2,3,4], 21 | [11,12,13,14], 22 | [21,22,23,24], 23 | [31,32,33,34], 24 | [41,42,43,44], 25 | [51,52,53,54], 26 | [61,62,63,64], 27 | [71,72,73,74]] 28 | y=[1,1,0,0,1,1,0,0] 29 | 30 | X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, random_state=0) # 切分,测试集大小为原始数据集大小的 40% 31 | print("X_train=",X_train) 32 | print("X_test=",X_test) 33 | print("y_train=",y_train) 34 | print("y_test=",y_test) 35 | X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.4, 36 | random_state=0,stratify=y) # 分层采样切分,测试集大小为原始数据集大小的 40% 37 | print("Stratify:X_train=",X_train) 38 | print("Stratify:X_test=",X_test) 39 | print("Stratify:y_train=",y_train) 40 | print("Stratify:y_test=",y_test) 41 | def test_KFold(): 42 | ''' 43 | 测试 KFold 的用法 44 | 45 | :return: None 46 | ''' 47 | X=np.array([[1,2,3,4], 48 | [11,12,13,14], 49 | [21,22,23,24], 50 | [31,32,33,34], 51 | [41,42,43,44], 52 | [51,52,53,54], 53 | [61,62,63,64], 54 | [71,72,73,74], 55 | [81,82,83,84]]) 56 | y=np.array([1,1,0,0,1,1,0,0,1]) 57 | 58 | folder=KFold(n_splits=3,random_state=0,shuffle=False) # 切分之前不混洗数据集 59 | for train_index,test_index in folder.split(X,y): 60 | print("Train Index:",train_index) 61 | print("Test Index:",test_index) 62 | print("X_train:",X[train_index]) 63 | print("X_test:",X[test_index]) 64 | print("") 65 | 66 | shuffle_folder=KFold(n_splits=3,random_state=0,shuffle=True) # 切分之前混洗数据集 67 | for train_index,test_index in shuffle_folder.split(X,y): 68 | print("Shuffled Train Index:",train_index) 69 | print("Shuffled Test Index:",test_index) 70 | print("Shuffled X_train:",X[train_index]) 71 | print("Shuffled X_test:",X[test_index]) 72 | print("") 73 | def test_StratifiedKFold(): 74 | ''' 75 | 测试 StratifiedKFold 的用法 76 | 77 | :return: None 78 | ''' 79 | X=np.array([[1,2,3,4], 80 | [11,12,13,14], 81 | [21,22,23,24], 82 | [31,32,33,34], 83 | [41,42,43,44], 84 | [51,52,53,54], 85 | [61,62,63,64], 86 | [71,72,73,74]]) 87 | 88 | y=np.array([1,1,0,0,1,1,0,0]) 89 | 90 | folder=KFold(n_splits=4,random_state=0,shuffle=False) 91 | stratified_folder=StratifiedKFold(n_splits=4,random_state=0,shuffle=False) 92 | for train_index,test_index in folder.split(X,y): 93 | print("Train Index:",train_index) 94 | print("Test Index:",test_index) 95 | print("y_train:",y[train_index]) 96 | print("y_test:",y[test_index]) 97 | print("") 98 | 99 | for train_index,test_index in stratified_folder.split(X,y): 100 | print("Stratified Train Index:",train_index) 101 | print("Stratified Test Index:",test_index) 102 | print("Stratified y_train:",y[train_index]) 103 | print("Stratified y_test:",y[test_index]) 104 | print("") 105 | def test_LeaveOneOut(): 106 | ''' 107 | 测试 LeaveOneOut 的用法 108 | 109 | :return: None 110 | ''' 111 | X=np.array([[1,2,3,4], 112 | [11,12,13,14], 113 | [21,22,23,24], 114 | [31,32,33,34]] 115 | ) 116 | y=np.array([1,1,0,0]) 117 | 118 | lo=LeaveOneOut(len(y)) 119 | for train_index,test_index in lo: 120 | print("Train Index:",train_index) 121 | print("Test Index:",test_index) 122 | print("X_train:",X[train_index]) 123 | print("X_test:",X[test_index]) 124 | print("") 125 | def test_cross_val_score(): 126 | ''' 127 | 测试 cross_val_score 的用法 128 | 129 | :return: None 130 | ''' 131 | from sklearn.datasets import load_digits 132 | from sklearn.svm import LinearSVC 133 | 134 | digits=load_digits() # 加载用于分类问题的数据集 135 | X=digits.data 136 | y=digits.target 137 | 138 | result=cross_val_score(LinearSVC(),X,y,cv=10) # 使用 LinearSVC 作为分类器 139 | print("Cross Val Score is:",result) 140 | 141 | 142 | if __name__=='__main__': 143 | # test_train_test_split() # 调用 test_train_test_split 144 | # test_KFold()# 调用 test_KFold 145 | test_StratifiedKFold()# 调用 test_StratifiedKFold 146 | # test_LeaveOneOut()# 调用 test_LeaveOneOut 147 | # test_cross_val_score()# 调用 test_cross_val_score -------------------------------------------------------------------------------- /chapters/Model_Selection/grid_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 模型选择 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 参数优化 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn.datasets import load_digits 12 | from sklearn.linear_model import LogisticRegression 13 | from sklearn.model_selection import GridSearchCV,RandomizedSearchCV 14 | from sklearn.metrics import classification_report 15 | from sklearn.model_selection import train_test_split 16 | import scipy 17 | 18 | def test_GridSearchCV(): 19 | ''' 20 | 测试 GridSearchCV 的用法。使用 LogisticRegression 作为分类器,主要优化 C、penalty、multi_class 等参数 21 | 22 | :return: None 23 | ''' 24 | ### 加载数据 25 | digits = load_digits() 26 | X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,test_size=0.25, 27 | random_state=0,stratify=digits.target) 28 | #### 参数优化 ###### 29 | tuned_parameters = [{'penalty': ['l1','l2'], 30 | 'C': [0.01,0.05,0.1,0.5,1,5,10,50,100], 31 | 'solver':['liblinear'], 32 | 'multi_class': ['ovr']}, 33 | 34 | {'penalty': ['l2'], 35 | 'C': [0.01,0.05,0.1,0.5,1,5,10,50,100], 36 | 'solver':['lbfgs'], 37 | 'multi_class': ['ovr','multinomial']}, 38 | ] 39 | clf=GridSearchCV(LogisticRegression(tol=1e-6),tuned_parameters,cv=10) 40 | clf.fit(X_train,y_train) 41 | print("Best parameters set found:",clf.best_params_) 42 | print("Grid scores:") 43 | for params, mean_score, scores in clf.grid_scores_: 44 | print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) 45 | 46 | print("Optimized Score:",clf.score(X_test,y_test)) 47 | print("Detailed classification report:") 48 | y_true, y_pred = y_test, clf.predict(X_test) 49 | print(classification_report(y_true, y_pred)) 50 | def test_RandomizedSearchCV(): 51 | ''' 52 | 测试 RandomizedSearchCV 的用法。使用 LogisticRegression 作为分类器,主要优化 C、multi_class 等参数。其中 C 的分布函数为指数分布 53 | 54 | :return: None 55 | ''' 56 | ### 加载数据 57 | digits = load_digits() 58 | X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target, 59 | test_size=0.25,random_state=0,stratify=digits.target) 60 | #### 参数优化 ###### 61 | tuned_parameters ={ 'C': scipy.stats.expon(scale=100), # 指数分布 62 | 'multi_class': ['ovr','multinomial']} 63 | clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6), 64 | tuned_parameters,cv=10,scoring="accuracy",n_iter=100) 65 | clf.fit(X_train,y_train) 66 | print("Best parameters set found:",clf.best_params_) 67 | print("Randomized Grid scores:") 68 | for params, mean_score, scores in clf.grid_scores_: 69 | print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params)) 70 | 71 | print("Optimized Score:",clf.score(X_test,y_test)) 72 | print("Detailed classification report:") 73 | y_true, y_pred = y_test, clf.predict(X_test) 74 | print(classification_report(y_true, y_pred)) 75 | 76 | if __name__=='__main__': 77 | test_GridSearchCV()# 调用 test_GridSearchCV 78 | # test_RandomizedSearchCV() # 调用 test_RandomizedSearchCV 79 | -------------------------------------------------------------------------------- /chapters/Model_Selection/learning_curve.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 模型选择 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 学习曲线 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn.datasets import load_digits 14 | from sklearn.svm import LinearSVC 15 | from sklearn.learning_curve import learning_curve 16 | 17 | def test_learning_curve(): 18 | ''' 19 | 测试 learning_curve 的用法 。验证对于 LinearSVC 分类器 ,数据集的大小对于预测性能的影响 20 | 21 | :return: 22 | ''' 23 | ### 加载数据 24 | digits = load_digits() 25 | X,y=digits.data,digits.target 26 | #### 获取学习曲线 ###### 27 | train_sizes=np.linspace(0.1,1.0,endpoint=True,dtype='float') 28 | abs_trains_sizes,train_scores, test_scores = learning_curve(LinearSVC(), 29 | X, y,cv=10, scoring="accuracy",train_sizes=train_sizes) 30 | ###### 对每个 C ,获取 10 折交叉上的预测得分上的均值和方差 ##### 31 | train_scores_mean = np.mean(train_scores, axis=1) 32 | train_scores_std = np.std(train_scores, axis=1) 33 | test_scores_mean = np.mean(test_scores, axis=1) 34 | test_scores_std = np.std(test_scores, axis=1) 35 | ####### 绘图 ###### 36 | fig=plt.figure() 37 | ax=fig.add_subplot(1,1,1) 38 | 39 | ax.plot(abs_trains_sizes, train_scores_mean, label="Training Accuracy", color="r") 40 | ax.fill_between(abs_trains_sizes, train_scores_mean - train_scores_std, 41 | train_scores_mean + train_scores_std, alpha=0.2, color="r") 42 | ax.plot(abs_trains_sizes, test_scores_mean, label="Testing Accuracy", color="g") 43 | ax.fill_between(abs_trains_sizes, test_scores_mean - test_scores_std, 44 | test_scores_mean + test_scores_std, alpha=0.2, color="g") 45 | 46 | ax.set_title("Learning Curve with LinearSVC") 47 | ax.set_xlabel("Sample Nums") 48 | ax.set_ylabel("Score") 49 | ax.set_ylim(0,1.1) 50 | ax.legend(loc='best') 51 | plt.show() 52 | 53 | if __name__=="__main__": 54 | test_learning_curve() # 调用 test_learning_curve -------------------------------------------------------------------------------- /chapters/Model_Selection/loss_function.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 模型选择 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 损失函数 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.metrics import zero_one_loss,log_loss 13 | 14 | 15 | def test_zero_one_loss(): 16 | ''' 17 | 测试 0-1 损失函数 18 | 19 | :return: None 20 | ''' 21 | y_true=[1,1,1,1,1,0,0,0,0,0] 22 | y_pred=[0,0,0,1,1,1,1,1,0,0] 23 | print("zero_one_loss:",zero_one_loss(y_true,y_pred,normalize=True)) 24 | print("zero_one_loss:",zero_one_loss(y_true,y_pred,normalize=False)) 25 | def test_log_loss(): 26 | ''' 27 | 测试对数损失函数 28 | 29 | :return: None 30 | ''' 31 | y_true=[1, 1, 1, 0, 0, 0] 32 | y_pred=[[0.1, 0.9], 33 | [0.2, 0.8], 34 | [0.3, 0.7], 35 | [0.7, 0.3], 36 | [0.8, 0.2], 37 | [0.9, 0.1]] 38 | print("log_loss:",log_loss(y_true,y_pred,normalize=True)) 39 | print("log_loss:",log_loss(y_true,y_pred,normalize=False)) 40 | 41 | if __name__=="__main__": 42 | test_zero_one_loss() # 调用 test_zero_one_loss 43 | # test_log_loss() # 调用 test_log_loss -------------------------------------------------------------------------------- /chapters/Model_Selection/regression_metrics.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 模型选择 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 回归问题性能度量 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn.metrics import mean_absolute_error,mean_squared_error 12 | 13 | def test_mean_absolute_error(): 14 | ''' 15 | 测试 mean_absolute_error 的用法 16 | 17 | :return: None 18 | ''' 19 | y_true=[1,1,1,1,1,2,2,2,0,0] 20 | y_pred=[0,0,0,1,1,1,0,0,0,0] 21 | 22 | print("Mean Absolute Error:",mean_absolute_error(y_true,y_pred)) 23 | def test_mean_squared_error(): 24 | ''' 25 | 测试 mean_squared_error 的用法 26 | 27 | :return: None 28 | ''' 29 | y_true=[1,1,1,1,1,2,2,2,0,0] 30 | y_pred=[0,0,0,1,1,1,0,0,0,0] 31 | 32 | print("Mean Absolute Error:",mean_absolute_error(y_true,y_pred)) 33 | print("Mean Square Error:",mean_squared_error(y_true,y_pred)) 34 | 35 | if __name__=="__main__": 36 | test_mean_absolute_error() # 调用 test_mean_absolute_error() 37 | # test_mean_squared_error() # 调用 test_mean_squared_error() -------------------------------------------------------------------------------- /chapters/Model_Selection/validation_curve.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 模型选择 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | 验证曲线 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn.datasets import load_digits 14 | from sklearn.svm import LinearSVC 15 | from sklearn.learning_curve import validation_curve 16 | 17 | def test_validation_curve(): 18 | ''' 19 | 测试 validation_curve 的用法 。验证对于 LinearSVC 分类器 , C 参数对于预测准确率的影响 20 | 21 | :return: None 22 | ''' 23 | ### 加载数据 24 | digits = load_digits() 25 | X,y=digits.data,digits.target 26 | #### 获取验证曲线 ###### 27 | param_name="C" 28 | param_range = np.logspace(-2, 2) 29 | train_scores, test_scores = validation_curve(LinearSVC(), X, y, param_name=param_name, 30 | param_range=param_range,cv=10, scoring="accuracy") 31 | ###### 对每个 C ,获取 10 折交叉上的预测得分上的均值和方差 ##### 32 | train_scores_mean = np.mean(train_scores, axis=1) 33 | train_scores_std = np.std(train_scores, axis=1) 34 | test_scores_mean = np.mean(test_scores, axis=1) 35 | test_scores_std = np.std(test_scores, axis=1) 36 | ####### 绘图 ###### 37 | fig=plt.figure() 38 | ax=fig.add_subplot(1,1,1) 39 | 40 | ax.semilogx(param_range, train_scores_mean, label="Training Accuracy", color="r") 41 | ax.fill_between(param_range, train_scores_mean - train_scores_std, 42 | train_scores_mean + train_scores_std, alpha=0.2, color="r") 43 | ax.semilogx(param_range, test_scores_mean, label="Testing Accuracy", color="g") 44 | ax.fill_between(param_range, test_scores_mean - test_scores_std, 45 | test_scores_mean + test_scores_std, alpha=0.2, color="g") 46 | 47 | ax.set_title("Validation Curve with LinearSVC") 48 | ax.set_xlabel("C") 49 | ax.set_ylabel("Score") 50 | ax.set_ylim(0,1.1) 51 | ax.legend(loc='best') 52 | plt.show() 53 | 54 | if __name__=='__main__': 55 | test_validation_curve() # 调用 test_validation_curve -------------------------------------------------------------------------------- /chapters/Perceptron_Neural_Network/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__init__.py -------------------------------------------------------------------------------- /chapters/Perceptron_Neural_Network/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Perceptron_Neural_Network/__pycache__/neural_network.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/neural_network.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Perceptron_Neural_Network/__pycache__/neural_network_iris.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/neural_network_iris.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Perceptron_Neural_Network/__pycache__/perceptron.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Perceptron_Neural_Network/__pycache__/perceptron.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Perceptron_Neural_Network/neural_network.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 感知机和神经网络 4 | ~~~~~~~~~~~~~~~~~~ 5 | 6 | 神经网络模型。注意 MLPClassifier 是 scikit-learn version 0.18 版本才出现的。截止目前,该版本还是开发版,官方提供的稳定版为 0.17。 7 | 所以为了运行本示例,需要手动下载编译安装 scikit-learn version 0.18 版 8 | 9 | :copyright: (c) 2016 by the huaxz1986. 10 | :license: lgpl-3.0, see LICENSE for more details. 11 | """ 12 | import numpy as np 13 | from matplotlib import pyplot as plt 14 | from sklearn.neural_network import MLPClassifier 15 | 16 | 17 | def creat_data_no_linear_2d(n): 18 | ''' 19 | 创建二维的线性不可分数据集 20 | 21 | :param n: 负例的数量 22 | :return: 线性不可分数据集,数据集大小为 2*n+n/10 ( n/10 是误差点的数量,误差点导致了线性不可分) 23 | ''' 24 | np.random.seed(1) 25 | x_11=np.random.randint(0,100,(n,1)) # 第一组:第一维坐标值 26 | x_12=10+np.random.randint(-5,5,(n,1,))# 第一组:第二维坐标值 27 | x_21=np.random.randint(0,100,(n,1))# 第二组:第一维坐标值 28 | x_22=20+np.random.randint(0,10,(n,1))# 第二组:第二维坐标值 29 | 30 | x_31=np.random.randint(0,100,(int(n/10),1))# 第三组:第一维坐标值 31 | x_32=20+np.random.randint(0,10,(int(n/10),1))# 第三组:第二维坐标值 32 | 33 | new_x_11=x_11*np.sqrt(2)/2-x_12*np.sqrt(2)/2## 沿第一维轴旋转45度 34 | new_x_12=x_11*np.sqrt(2)/2+x_12*np.sqrt(2)/2## 沿第一维轴旋转45度 35 | new_x_21=x_21*np.sqrt(2)/2-x_22*np.sqrt(2)/2## 沿第一维轴旋转45度 36 | new_x_22=x_21*np.sqrt(2)/2+x_22*np.sqrt(2)/2## 沿第一维轴旋转45度 37 | new_x_31=x_31*np.sqrt(2)/2-x_32*np.sqrt(2)/2## 沿第一维轴旋转45度 38 | new_x_32=x_31*np.sqrt(2)/2+x_32*np.sqrt(2)/2## 沿第一维轴旋转45度 39 | 40 | plus_samples=np.hstack([new_x_11,new_x_12,np.ones((n,1))]) # 拼接成正例数据集 41 | minus_samples=np.hstack([new_x_21,new_x_22,-np.ones((n,1))])# 拼接成负例数据集 42 | err_samples=np.hstack([new_x_31,new_x_32,np.ones((int(n/10),1))])# 拼接成正例数据集,它导致了线性不可分 43 | samples=np.vstack([plus_samples,minus_samples,err_samples]) # 拼接成数据集 44 | np.random.shuffle(samples) # 混洗数据 45 | return samples 46 | def plot_samples_2d(ax,samples): 47 | ''' 48 | 绘制二维数据集 49 | 50 | :param ax: Axes 实例,用于绘制图形 51 | :param samples: 二维数据集 52 | :return: None 53 | ''' 54 | Y=samples[:,-1] 55 | position_p=Y==1 ## 正类位置 56 | position_m=Y==-1 ## 负类位置 57 | ax.scatter(samples[position_p,0],samples[position_p,1], 58 | marker='+',label='+',color='b') 59 | ax.scatter(samples[position_m,0],samples[position_m,1], 60 | marker='^',label='-',color='y') 61 | def run_plot_samples_2d(): 62 | ''' 63 | 绘制二维线性不可分数据集 64 | 65 | :return: None 66 | ''' 67 | fig=plt.figure() 68 | ax=fig.add_subplot(1,1,1) 69 | data=creat_data_no_linear_2d(100) # 生成二维线性不可分数据集 70 | plot_samples_2d(ax,data) 71 | ax.legend(loc='best') 72 | plt.show() 73 | def predict_with_MLPClassifier(ax,train_data): 74 | ''' 75 | 使用 MLPClassifier绘制预测结果 76 | 77 | :param ax: Axes 实例,用于绘制图形 78 | :param train_data: 训练数据集 79 | :return: None 80 | ''' 81 | train_x=train_data[:,:-1] 82 | train_y=train_data[:,-1] 83 | clf=MLPClassifier(activation='logistic',max_iter=1000)# 构造分类器实例 84 | clf.fit(train_x,train_y) # 训练分类器 85 | print(clf.score(train_x,train_y)) # 查看在训练集上的评价预测精度 86 | 87 | ## 用训练好的训练集预测平面上每一点的输出## 88 | x_min, x_max = train_x[:, 0].min() - 1, train_x[:, 0].max() + 2 89 | y_min, y_max = train_x[:, 1].min() - 1, train_x[:, 1].max() + 2 90 | plot_step=1 91 | xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), 92 | np.arange(y_min, y_max, plot_step)) 93 | Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) 94 | Z = Z.reshape(xx.shape) 95 | ax.contourf(xx, yy, Z, cmap=plt.cm.Paired) 96 | def run_predict_with_MLPClassifier(): 97 | ''' 98 | 用 MLPClassifier 预测线性不可分数据集 99 | 100 | :return: None 101 | ''' 102 | data=creat_data_no_linear_2d(500) #生成线性不可分数据集 103 | fig=plt.figure() 104 | ax=fig.add_subplot(1,1,1) 105 | predict_with_MLPClassifier(ax,data) 106 | plot_samples_2d(ax,data) 107 | ax.legend(loc='best') 108 | plt.show() 109 | 110 | if __name__=='__main__': 111 | run_plot_samples_2d() # 调用 run_plot_samples_2d 112 | #run_predict_with_MLPClassifier() # 调用 run_predict_with_MLPClassifier -------------------------------------------------------------------------------- /chapters/PreProcessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__init__.py -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/binarize.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/binarize.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/dictionary_learning.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/dictionary_learning.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/feature_selection_bagging.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/feature_selection_bagging.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/feature_selection_embeded.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/feature_selection_embeded.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/feature_selection_filter.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/feature_selection_filter.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/normalize.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/normalize.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/onehot_encode.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/onehot_encode.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/pipeline.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/pipeline.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/__pycache__/standardize.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/PreProcessing/__pycache__/standardize.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/PreProcessing/binarize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 二元化 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn.preprocessing import Binarizer 12 | def test_Binarizer(): 13 | ''' 14 | 测试 Binarizer 的用法 15 | 16 | :return: None 17 | ''' 18 | X=[ [1,2,3,4,5], 19 | [5,4,3,2,1], 20 | [3,3,3,3,3,], 21 | [1,1,1,1,1] ] 22 | print("before transform:",X) 23 | binarizer=Binarizer(threshold=2.5) 24 | print("after transform:",binarizer.transform(X)) 25 | 26 | if __name__=='__main__': 27 | test_Binarizer() # 调用 test_Binarizer -------------------------------------------------------------------------------- /chapters/PreProcessing/dictionary_learning.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 字典学习 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | from sklearn.decomposition import DictionaryLearning 12 | 13 | def test_DictionaryLearning(): 14 | ''' 15 | 测试 DictionaryLearning 的用法 16 | 17 | :return: None 18 | ''' 19 | X=[[1,2,3,4,5], 20 | [6,7,8,9,10], 21 | [10,9,8,7,6,], 22 | [5,4,3,2,1] ] 23 | print("before transform:",X) 24 | dct=DictionaryLearning(n_components=3) 25 | dct.fit(X) 26 | print("components is :",dct.components_) 27 | print("after transform:",dct.transform(X)) 28 | 29 | if __name__=='__main__': 30 | test_DictionaryLearning() # 调用 test_DictionaryLearning 31 | -------------------------------------------------------------------------------- /chapters/PreProcessing/feature_selection_bagging.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 包裹式特征选择 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.feature_selection import RFE,RFECV 13 | from sklearn.svm import LinearSVC 14 | from sklearn.datasets import load_iris 15 | from sklearn import cross_validation 16 | 17 | def test_RFE(): 18 | ''' 19 | 测试 RFE 的用法,其中目标特征数量为 2 20 | 21 | :return: None 22 | ''' 23 | iris=load_iris() 24 | X=iris.data 25 | y=iris.target 26 | estimator=LinearSVC() 27 | selector=RFE(estimator=estimator,n_features_to_select=2) 28 | selector.fit(X,y) 29 | print("N_features %s"%selector.n_features_) 30 | print("Support is %s"%selector.support_) 31 | print("Ranking %s"%selector.ranking_) 32 | def test_RFECV(): 33 | ''' 34 | 测试 RFECV 的用法 35 | 36 | :return: None 37 | ''' 38 | iris=load_iris() 39 | X=iris.data 40 | y=iris.target 41 | estimator=LinearSVC() 42 | selector=RFECV(estimator=estimator,cv=3) 43 | selector.fit(X,y) 44 | print("N_features %s"%selector.n_features_) 45 | print("Support is %s"%selector.support_) 46 | print("Ranking %s"%selector.ranking_) 47 | print("Grid Scores %s"%selector.grid_scores_) 48 | def test_compare_with_no_feature_selection(): 49 | ''' 50 | 比较经过特征选择和未经特征选择的数据集,对 LinearSVC 的预测性能的区别 51 | 52 | :return: None 53 | ''' 54 | ### 加载数据 55 | iris=load_iris() 56 | X,y=iris.data,iris.target 57 | ### 特征提取 58 | estimator=LinearSVC() 59 | selector=RFE(estimator=estimator,n_features_to_select=2) 60 | X_t=selector.fit_transform(X,y) 61 | #### 切分测试集与验证集 62 | X_train,X_test,y_train,y_test=cross_validation.train_test_split(X, y, 63 | test_size=0.25,random_state=0,stratify=y) 64 | X_train_t,X_test_t,y_train_t,y_test_t=cross_validation.train_test_split(X_t, y, 65 | test_size=0.25,random_state=0,stratify=y) 66 | ### 测试与验证 67 | clf=LinearSVC() 68 | clf_t=LinearSVC() 69 | clf.fit(X_train,y_train) 70 | clf_t.fit(X_train_t,y_train_t) 71 | print("Original DataSet: test score=%s"%(clf.score(X_test,y_test))) 72 | print("Selected DataSet: test score=%s"%(clf_t.score(X_test_t,y_test_t))) 73 | if __name__=='__main__': 74 | test_RFE() # 调用 test_RFE 75 | test_compare_with_no_feature_selection() # 调用 test_compare_with_no_feature_selection 76 | test_RFECV() # 调用 test_RFECV -------------------------------------------------------------------------------- /chapters/PreProcessing/feature_selection_embeded.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 嵌入式特征选择 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.feature_selection import SelectFromModel 13 | from sklearn.svm import LinearSVC 14 | from sklearn.datasets import load_digits,load_diabetes 15 | import numpy as np 16 | import matplotlib.pyplot as plt 17 | from sklearn.linear_model import Lasso 18 | 19 | def test_SelectFromModel(): 20 | ''' 21 | 测试 SelectFromModel 的用法。 22 | 23 | :return: None 24 | ''' 25 | digits=load_digits() 26 | X=digits.data 27 | y=digits.target 28 | estimator=LinearSVC(penalty='l1',dual=False) 29 | selector=SelectFromModel(estimator=estimator,threshold='mean') 30 | selector.fit(X,y) 31 | selector.transform(X) 32 | print("Threshold %s"%selector.threshold_) 33 | print("Support is %s"%selector.get_support(indices=True)) 34 | def test_Lasso(*data): 35 | ''' 36 | 测试 alpha 与稀疏性的关系 37 | 38 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 39 | :return: None 40 | ''' 41 | X,y=data 42 | alphas=np.logspace(-2,2) 43 | zeros=[] 44 | for alpha in alphas: 45 | regr=Lasso(alpha=alpha) 46 | regr.fit(X,y) 47 | ### 计算零的个数 ### 48 | num=0 49 | for ele in regr.coef_: 50 | if abs(ele) < 1e-5:num+=1 51 | zeros.append(num) 52 | ##### 绘图 53 | fig=plt.figure() 54 | ax=fig.add_subplot(1,1,1) 55 | ax.plot(alphas,zeros) 56 | ax.set_xlabel(r"$\alpha$") 57 | ax.set_xscale("log") 58 | ax.set_ylim(0,X.shape[1]+1) 59 | ax.set_ylabel("zeros in coef") 60 | ax.set_title("Sparsity In Lasso") 61 | plt.show() 62 | def test_LinearSVC(*data): 63 | ''' 64 | 测试 C 与 稀疏性的关系 65 | 66 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 67 | :return: None 68 | ''' 69 | X,y=data 70 | Cs=np.logspace(-2,2) 71 | zeros=[] 72 | for C in Cs: 73 | clf=LinearSVC(C=C,penalty='l1',dual=False) 74 | clf.fit(X,y) 75 | ### 计算零的个数 ### 76 | num=0 77 | for row in clf.coef_: 78 | for ele in row: 79 | if abs(ele) < 1e-5:num+=1 80 | zeros.append(num) 81 | ##### 绘图 82 | fig=plt.figure() 83 | ax=fig.add_subplot(1,1,1) 84 | ax.plot(Cs,zeros) 85 | ax.set_xlabel("C") 86 | ax.set_xscale("log") 87 | ax.set_ylabel("zeros in coef") 88 | ax.set_title("Sparsity In SVM") 89 | plt.show() 90 | if __name__=='__main__': 91 | test_SelectFromModel() # 调用 test_SelectFromModel 92 | # data=load_diabetes() # 生成用于回归问题的数据集 93 | # test_Lasso(data.data,data.target) # 调用 test_Lasso 94 | # data=load_digits() # 生成用于分类问题的数据集 95 | # test_LinearSVC(data.data,data.target) # 调用 test_LinearSVC -------------------------------------------------------------------------------- /chapters/PreProcessing/feature_selection_filter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 过滤式特征选择 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.feature_selection import VarianceThreshold,SelectKBest,f_classif 13 | 14 | def test_VarianceThreshold(): 15 | ''' 16 | 测试 VarianceThreshold 的用法 17 | 18 | :return: None 19 | ''' 20 | X=[[100,1,2,3], 21 | [100,4,5,6], 22 | [100,7,8,9], 23 | [101,11,12,13]] 24 | selector=VarianceThreshold(1) 25 | selector.fit(X) 26 | print("Variances is %s"%selector.variances_) 27 | print("After transform is %s"%selector.transform(X)) 28 | print("The surport is %s"%selector.get_support(True)) 29 | print("After reverse transform is %s"% 30 | selector.inverse_transform(selector.transform(X))) 31 | def test_SelectKBest(): 32 | ''' 33 | 测试 SelectKBest 的用法,其中考察的特征指标是 f_classif 34 | 35 | :return: None 36 | ''' 37 | X=[ [1,2,3,4,5], 38 | [5,4,3,2,1], 39 | [3,3,3,3,3,], 40 | [1,1,1,1,1] ] 41 | y=[0,1,0,1] 42 | print("before transform:",X) 43 | selector=SelectKBest(score_func=f_classif,k=3) 44 | selector.fit(X,y) 45 | print("scores_:",selector.scores_) 46 | print("pvalues_:",selector.pvalues_) 47 | print("selected index:",selector.get_support(True)) 48 | print("after transform:",selector.transform(X)) 49 | if __name__=='__main__': 50 | test_VarianceThreshold() # 调用 test_VarianceThreshold 51 | # test_SelectKBest() # 调用 test_SelectKBest -------------------------------------------------------------------------------- /chapters/PreProcessing/normalize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 数据正则化 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.preprocessing import Normalizer 13 | def test_Normalizer(): 14 | ''' 15 | 测试 Normalizer 的用法 16 | 17 | :return: None 18 | ''' 19 | X=[ [1,2,3,4,5], 20 | [5,4,3,2,1], 21 | [1,3,5,2,4,], 22 | [2,4,1,3,5] ] 23 | print("before transform:",X) 24 | normalizer=Normalizer(norm='l2') 25 | print("after transform:",normalizer.transform(X)) 26 | 27 | if __name__=='__main__': 28 | test_Normalizer() # 调用 test_Normalizer -------------------------------------------------------------------------------- /chapters/PreProcessing/onehot_encode.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 独热码编码 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.preprocessing import OneHotEncoder 13 | def test_OneHotEncoder(): 14 | ''' 15 | 测试 OneHotEncoder 的用法 16 | 17 | :return: None 18 | ''' 19 | X=[ [1,2,3,4,5], 20 | [5,4,3,2,1], 21 | [3,3,3,3,3,], 22 | [1,1,1,1,1] ] 23 | print("before transform:",X) 24 | encoder=OneHotEncoder(sparse=False) 25 | encoder.fit(X) 26 | print("active_features_:",encoder.active_features_) 27 | print("feature_indices_:",encoder.feature_indices_) 28 | print("n_values_:",encoder.n_values_) 29 | print("after transform:",encoder.transform( [[1,2,3,4,5]])) 30 | if __name__=='__main__': 31 | test_OneHotEncoder() # 调用 test_OneHotEncoder -------------------------------------------------------------------------------- /chapters/PreProcessing/pipeline.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 流水线 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.svm import LinearSVC 13 | from sklearn.datasets import load_digits 14 | from sklearn import cross_validation 15 | from sklearn.linear_model import LogisticRegression 16 | from sklearn.pipeline import Pipeline 17 | def test_Pipeline(data): 18 | ''' 19 | 测试 Pipeline 的用法 20 | 21 | :param data: 一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 22 | :return: None 23 | ''' 24 | X_train,X_test,y_train,y_test=data 25 | steps=[("Linear_SVM",LinearSVC(C=1,penalty='l1',dual=False)), 26 | ("LogisticRegression",LogisticRegression(C=1))] 27 | pipeline=Pipeline(steps) 28 | pipeline.fit(X_train,y_train) 29 | print("Named steps:",pipeline.named_steps) 30 | print("Pipeline Score:",pipeline.score(X_test,y_test)) 31 | if __name__=='__main__': 32 | data=load_digits() # 生成用于分类问题的数据集 33 | test_Pipeline(cross_validation.train_test_split(data.data, data.target,test_size=0.25 34 | ,random_state=0,stratify=data.target)) # 调用 test_Pipeline 35 | -------------------------------------------------------------------------------- /chapters/PreProcessing/standardize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 数据预处理 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | 数据标准化 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | 12 | from sklearn.preprocessing import MinMaxScaler,MaxAbsScaler,StandardScaler 13 | 14 | def test_MinMaxScaler(): 15 | ''' 16 | 测试 MinMaxScaler 的用法 17 | 18 | :return: None 19 | ''' 20 | X=[ [1,5,1,2,10], 21 | [2,6,3,2,7], 22 | [3,7,5,6,4,], 23 | [4,8,7,8,1] ] 24 | print("before transform:",X) 25 | scaler=MinMaxScaler(feature_range=(0,2)) 26 | scaler.fit(X) 27 | print("min_ is :",scaler.min_) 28 | print("scale_ is :",scaler.scale_) 29 | print("data_max_ is :",scaler.data_max_) 30 | print("data_min_ is :",scaler.data_min_) 31 | print("data_range_ is :",scaler.data_range_) 32 | print("after transform:",scaler.transform(X)) 33 | def test_MaxAbsScaler(): 34 | ''' 35 | 测试 MaxAbsScaler 的用法 36 | 37 | :return: None 38 | ''' 39 | X=[ [1,5,1,2,10], 40 | [2,6,3,2,7], 41 | [3,7,5,6,4,], 42 | [4,8,7,8,1] ] 43 | print("before transform:",X) 44 | scaler=MaxAbsScaler() 45 | scaler.fit(X) 46 | print("scale_ is :",scaler.scale_) 47 | print("max_abs_ is :",scaler.max_abs_) 48 | print("after transform:",scaler.transform(X)) 49 | def test_StandardScaler(): 50 | ''' 51 | 测试 StandardScaler 的用法 52 | 53 | :return: None 54 | ''' 55 | X=[ [1,5,1,2,10], 56 | [2,6,3,2,7], 57 | [3,7,5,6,4,], 58 | [4,8,7,8,1] ] 59 | print("before transform:",X) 60 | scaler=StandardScaler() 61 | scaler.fit(X) 62 | print("scale_ is :",scaler.scale_) 63 | print("mean_ is :",scaler.mean_) 64 | print("var_ is :",scaler.var_) 65 | print("after transform:",scaler.transform(X)) 66 | 67 | if __name__=='__main__': 68 | test_MinMaxScaler() # 调用 test_MinMaxScaler 69 | # test_MaxAbsScaler() # 调用 test_MaxAbsScaler 70 | # test_MaxAbsScaler() # 调用 test_MaxAbsScaler -------------------------------------------------------------------------------- /chapters/SVM/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__init__.py -------------------------------------------------------------------------------- /chapters/SVM/__pycache__/SVC.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/SVC.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/SVM/__pycache__/SVR.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/SVR.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/SVM/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/SVM/__pycache__/linearSVC.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/linearSVC.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/SVM/__pycache__/linearSVR.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/SVM/__pycache__/linearSVR.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/SVM/linearSVC.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 支持向量机 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | LinearSVC 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation,svm 14 | 15 | def load_data_classfication(): 16 | ''' 17 | 加载用于分类问题的数据集 18 | 19 | :return: 一个元组,用于分类问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的标记、测试样本集对应的标记 20 | ''' 21 | iris=datasets.load_iris() # 使用 scikit-learn 自带的 iris 数据集 22 | X_train=iris.data 23 | y_train=iris.target 24 | return cross_validation.train_test_split(X_train, y_train,test_size=0.25, 25 | random_state=0,stratify=y_train) # 分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 26 | 27 | def test_LinearSVC(*data): 28 | ''' 29 | 测试 LinearSVC 的用法 30 | 31 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 32 | :return: None 33 | ''' 34 | X_train,X_test,y_train,y_test=data 35 | cls=svm.LinearSVC() 36 | cls.fit(X_train,y_train) 37 | print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_)) 38 | print('Score: %.2f' % cls.score(X_test, y_test)) 39 | def test_LinearSVC_loss(*data): 40 | ''' 41 | 测试 LinearSVC 的预测性能随损失函数的影响 42 | 43 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 44 | :return: None 45 | ''' 46 | X_train,X_test,y_train,y_test=data 47 | losses=['hinge','squared_hinge'] 48 | for loss in losses: 49 | cls=svm.LinearSVC(loss=loss) 50 | cls.fit(X_train,y_train) 51 | print("Loss:%f"%loss) 52 | print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_)) 53 | print('Score: %.2f' % cls.score(X_test, y_test)) 54 | def test_LinearSVC_L12(*data): 55 | ''' 56 | 测试 LinearSVC 的预测性能随正则化形式的影响 57 | 58 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 59 | :return: None 60 | ''' 61 | X_train,X_test,y_train,y_test=data 62 | L12=['l1','l2'] 63 | for p in L12: 64 | cls=svm.LinearSVC(penalty=p,dual=False) 65 | cls.fit(X_train,y_train) 66 | print("penalty:%s"%p) 67 | print('Coefficients:%s, intercept %s'%(cls.coef_,cls.intercept_)) 68 | print('Score: %.2f' % cls.score(X_test, y_test)) 69 | def test_LinearSVC_C(*data): 70 | ''' 71 | 测试 LinearSVC 的预测性能随参数 C 的影响 72 | 73 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记 74 | :return: None 75 | ''' 76 | X_train,X_test,y_train,y_test=data 77 | Cs=np.logspace(-2,1) 78 | train_scores=[] 79 | test_scores=[] 80 | for C in Cs: 81 | cls=svm.LinearSVC(C=C) 82 | cls.fit(X_train,y_train) 83 | train_scores.append(cls.score(X_train,y_train)) 84 | test_scores.append(cls.score(X_test,y_test)) 85 | 86 | ## 绘图 87 | fig=plt.figure() 88 | ax=fig.add_subplot(1,1,1) 89 | ax.plot(Cs,train_scores,label="Traing score") 90 | ax.plot(Cs,test_scores,label="Testing score") 91 | ax.set_xlabel(r"C") 92 | ax.set_ylabel(r"score") 93 | ax.set_xscale('log') 94 | ax.set_title("LinearSVC") 95 | ax.legend(loc='best') 96 | plt.show() 97 | if __name__=="__main__": 98 | X_train,X_test,y_train,y_test=load_data_classfication() # 生成用于分类的数据集 99 | test_LinearSVC(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC 100 | # test_LinearSVC_loss(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC_loss 101 | # test_LinearSVC_L12(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC_L12 102 | # test_LinearSVC_C(X_train,X_test,y_train,y_test) # 调用 test_LinearSVC_C -------------------------------------------------------------------------------- /chapters/SVM/linearSVR.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 支持向量机 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | LinearSVR 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import matplotlib.pyplot as plt 12 | import numpy as np 13 | from sklearn import datasets, linear_model,cross_validation,svm 14 | def load_data_regression(): 15 | ''' 16 | 加载用于回归问题的数据集 17 | 18 | :return: 一个元组,用于回归问题。元组元素依次为:训练样本集、测试样本集、训练样本集对应的值、测试样本集对应的值 19 | ''' 20 | diabetes = datasets.load_diabetes() #使用 scikit-learn 自带的一个糖尿病病人的数据集 21 | return cross_validation.train_test_split(diabetes.data,diabetes.target, 22 | test_size=0.25,random_state=0)# 拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4 23 | 24 | def test_LinearSVR(*data): 25 | ''' 26 | 测试 LinearSVR 的用法 27 | 28 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 29 | :return: None 30 | ''' 31 | X_train,X_test,y_train,y_test=data 32 | regr=svm.LinearSVR() 33 | regr.fit(X_train,y_train) 34 | print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_)) 35 | print('Score: %.2f' % regr.score(X_test, y_test)) 36 | def test_LinearSVR_loss(*data): 37 | ''' 38 | 测试 LinearSVR 的预测性能随不同损失函数的影响 39 | 40 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 41 | :return: 42 | ''' 43 | X_train,X_test,y_train,y_test=data 44 | losses=['epsilon_insensitive','squared_epsilon_insensitive'] 45 | for loss in losses: 46 | regr=svm.LinearSVR(loss=loss) 47 | regr.fit(X_train,y_train) 48 | print("loss:%s"%loss) 49 | print('Coefficients:%s, intercept %s'%(regr.coef_,regr.intercept_)) 50 | print('Score: %.2f' % regr.score(X_test, y_test)) 51 | def test_LinearSVR_epsilon(*data): 52 | ''' 53 | 测试 LinearSVR 的预测性能随 epsilon 参数的影响 54 | 55 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 56 | :return: None 57 | ''' 58 | X_train,X_test,y_train,y_test=data 59 | epsilons=np.logspace(-2,2) 60 | train_scores=[] 61 | test_scores=[] 62 | for epsilon in epsilons: 63 | regr=svm.LinearSVR(epsilon=epsilon,loss='squared_epsilon_insensitive') 64 | regr.fit(X_train,y_train) 65 | train_scores.append(regr.score(X_train, y_train)) 66 | test_scores.append(regr.score(X_test, y_test)) 67 | fig=plt.figure() 68 | ax=fig.add_subplot(1,1,1) 69 | ax.plot(epsilons,train_scores,label="Training score ",marker='+' ) 70 | ax.plot(epsilons,test_scores,label= " Testing score ",marker='o' ) 71 | ax.set_title( "LinearSVR_epsilon ") 72 | ax.set_xscale("log") 73 | ax.set_xlabel(r"$\epsilon$") 74 | ax.set_ylabel("score") 75 | ax.set_ylim(-1,1.05) 76 | ax.legend(loc="best",framealpha=0.5) 77 | plt.show() 78 | def test_LinearSVR_C(*data): 79 | ''' 80 | 测试 LinearSVR 的预测性能随 C 参数的影响 81 | 82 | :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值 83 | :return: None 84 | ''' 85 | X_train,X_test,y_train,y_test=data 86 | Cs=np.logspace(-1,2) 87 | train_scores=[] 88 | test_scores=[] 89 | for C in Cs: 90 | regr=svm.LinearSVR(epsilon=0.1,loss='squared_epsilon_insensitive',C=C) 91 | regr.fit(X_train,y_train) 92 | train_scores.append(regr.score(X_train, y_train)) 93 | test_scores.append(regr.score(X_test, y_test)) 94 | fig=plt.figure() 95 | ax=fig.add_subplot(1,1,1) 96 | ax.plot(Cs,train_scores,label="Training score ",marker='+' ) 97 | ax.plot(Cs,test_scores,label= " Testing score ",marker='o' ) 98 | ax.set_title( "LinearSVR_C ") 99 | ax.set_xscale("log") 100 | ax.set_xlabel(r"C") 101 | ax.set_ylabel("score") 102 | ax.set_ylim(-1,1.05) 103 | ax.legend(loc="best",framealpha=0.5) 104 | plt.show() 105 | if __name__=="__main__": 106 | X_train,X_test,y_train,y_test=load_data_regression() # 生成用于回归问题的数据集 107 | test_LinearSVR(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR 108 | # test_LinearSVR_loss(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR_loss 109 | # test_LinearSVR_epsilon(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR_epsilon 110 | # test_LinearSVR_C(X_train,X_test,y_train,y_test) # 调用 test_LinearSVR_C -------------------------------------------------------------------------------- /chapters/Semi_Supervised_Learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__init__.py -------------------------------------------------------------------------------- /chapters/Semi_Supervised_Learning/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Semi_Supervised_Learning/__pycache__/labelPropagation.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__pycache__/labelPropagation.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Semi_Supervised_Learning/__pycache__/labelSpreading.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/Semi_Supervised_Learning/__pycache__/labelSpreading.cpython-35.pyc -------------------------------------------------------------------------------- /chapters/Semi_Supervised_Learning/labelPropagation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 半监督学习 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | LabelPropagation 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import metrics 14 | from sklearn import datasets 15 | from sklearn.semi_supervised import LabelPropagation 16 | 17 | def load_data(): 18 | ''' 19 | 加载数据集 20 | 21 | :return: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 22 | ''' 23 | digits = datasets.load_digits() 24 | ###### 混洗样本 ######## 25 | rng = np.random.RandomState(0) 26 | indices = np.arange(len(digits.data)) # 样本下标集合 27 | rng.shuffle(indices) # 混洗样本下标集合 28 | X = digits.data[indices] 29 | y = digits.target[indices] 30 | ###### 生成未标记样本的下标集合 #### 31 | n_labeled_points = int(len(y)/10) # 只有 10% 的样本有标记 32 | unlabeled_indices = np.arange(len(y))[n_labeled_points:] # 后面 90% 的样本未标记 33 | 34 | return X,y,unlabeled_indices 35 | 36 | def test_LabelPropagation(*data): 37 | ''' 38 | 测试 LabelPropagation 的用法 39 | 40 | :param data: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 41 | :return: None 42 | ''' 43 | X,y,unlabeled_indices=data 44 | y_train=np.copy(y) # 必须拷贝,后面要用到 y 45 | y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1 46 | clf=LabelPropagation(max_iter=100,kernel='rbf',gamma=0.1) 47 | clf.fit(X,y_train) 48 | ### 获取预测准确率 49 | predicted_labels = clf.transduction_[unlabeled_indices] # 预测标记 50 | true_labels = y[unlabeled_indices] # 真实标记 51 | print("Accuracy:%f"%metrics.accuracy_score(true_labels,predicted_labels)) 52 | # 或者 print("Accuracy:%f"%clf.score(X[unlabeled_indices],true_labels)) 53 | def test_LabelPropagation_rbf(*data): 54 | ''' 55 | 测试 LabelPropagation 的 rbf 核时,预测性能随 alpha 和 gamma 的变化 56 | 57 | :param data: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 58 | :return: None 59 | ''' 60 | X,y,unlabeled_indices=data 61 | y_train=np.copy(y) # 必须拷贝,后面要用到 y 62 | y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1 63 | 64 | fig=plt.figure() 65 | ax=fig.add_subplot(1,1,1) 66 | alphas=np.linspace(0.01,1,num=10,endpoint=True) 67 | gammas=np.logspace(-2,2,num=50) 68 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 69 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合,不同曲线用不同颜色 70 | ## 训练并绘图 71 | for alpha,color in zip(alphas,colors): 72 | scores=[] 73 | for gamma in gammas: 74 | clf=LabelPropagation(max_iter=100,gamma=gamma,alpha=alpha,kernel='rbf') 75 | clf.fit(X,y_train) 76 | scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices])) 77 | ax.plot(gammas,scores,label=r"$\alpha=%s$"%alpha,color=color) 78 | 79 | ### 设置图形 80 | ax.set_xlabel(r"$\gamma$") 81 | ax.set_ylabel("score") 82 | ax.set_xscale("log") 83 | ax.legend(loc="best") 84 | ax.set_title("LabelPropagation rbf kernel") 85 | plt.show() 86 | def test_LabelPropagation_knn(*data): 87 | ''' 88 | 测试 LabelPropagation 的 knn 核时,预测性能随 alpha 和 n_neighbors 的变化 89 | 90 | :param data: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 91 | :return: None 92 | ''' 93 | X,y,unlabeled_indices=data 94 | y_train=np.copy(y) # 必须拷贝,后面要用到 y 95 | y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1 96 | 97 | fig=plt.figure() 98 | ax=fig.add_subplot(1,1,1) 99 | alphas=np.linspace(0.01,1,num=10,endpoint=True) 100 | Ks=[1,2,3,4,5,8,10,15,20,25,30,35,40,50] 101 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 102 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合,不同曲线用不同颜色 103 | ## 训练并绘图 104 | for alpha,color in zip(alphas,colors): 105 | scores=[] 106 | for K in Ks: 107 | clf=LabelPropagation(max_iter=100,n_neighbors=K,alpha=alpha,kernel='knn') 108 | clf.fit(X,y_train) 109 | scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices])) 110 | ax.plot(Ks,scores,label=r"$\alpha=%s$"%alpha,color=color) 111 | 112 | ### 设置图形 113 | ax.set_xlabel(r"$k$") 114 | ax.set_ylabel("score") 115 | ax.legend(loc="best") 116 | ax.set_title("LabelPropagation knn kernel") 117 | plt.show() 118 | if __name__=='__main__': 119 | data=load_data() # 获取半监督分类数据集 120 | #test_LabelPropagation(*data) # 调用 test_LabelPropagation 121 | #test_LabelPropagation_rbf(*data)# 调用 test_LabelPropagation_rbf 122 | test_LabelPropagation_knn(*data)# 调用 test_LabelPropagation_knn -------------------------------------------------------------------------------- /chapters/Semi_Supervised_Learning/labelSpreading.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 半监督学习 4 | ~~~~~~~~~~~~~~~~ 5 | 6 | LabelSpreading 7 | 8 | :copyright: (c) 2016 by the huaxz1986. 9 | :license: lgpl-3.0, see LICENSE for more details. 10 | """ 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | from sklearn import metrics 14 | from sklearn import datasets 15 | from sklearn.semi_supervised.label_propagation import LabelSpreading 16 | 17 | def load_data(): 18 | ''' 19 | 加载数据集 20 | 21 | :return: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 22 | ''' 23 | digits = datasets.load_digits() 24 | ###### 混洗样本 ######## 25 | rng = np.random.RandomState(0) 26 | indices = np.arange(len(digits.data)) # 样本下标集合 27 | rng.shuffle(indices) # 混洗样本下标集合 28 | X = digits.data[indices] 29 | y = digits.target[indices] 30 | ###### 生成未标记样本的下标集合 #### 31 | n_labeled_points = int(len(y)/10) # 只有 10% 的样本有标记 32 | unlabeled_indices = np.arange(len(y))[n_labeled_points:] # 后面 90% 的样本未标记 33 | 34 | return X,y,unlabeled_indices 35 | 36 | def test_LabelSpreading(*data): 37 | ''' 38 | 测试 LabelSpreading 的用法 39 | 40 | :param data: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 41 | :return: None 42 | ''' 43 | X,y,unlabeled_indices=data 44 | y_train=np.copy(y) # 必须拷贝,后面要用到 y 45 | y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1 46 | clf=LabelSpreading(max_iter=100,kernel='rbf',gamma=0.1) 47 | clf.fit(X,y_train) 48 | ### 获取预测准确率 49 | predicted_labels = clf.transduction_[unlabeled_indices] # 预测标记 50 | true_labels = y[unlabeled_indices] # 真实标记 51 | print("Accuracy:%f"%metrics.accuracy_score(true_labels,predicted_labels)) 52 | # 或者 print("Accuracy:%f"%clf.score(X[unlabeled_indices],true_labels)) 53 | def test_LabelSpreading_rbf(*data): 54 | ''' 55 | 测试 LabelSpreading 的 rbf 核时,预测性能随 alpha 和 gamma 的变化 56 | 57 | :param data: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 58 | :return: None 59 | ''' 60 | X,y,unlabeled_indices=data 61 | y_train=np.copy(y) # 必须拷贝,后面要用到 y 62 | y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1 63 | 64 | fig=plt.figure() 65 | ax=fig.add_subplot(1,1,1) 66 | alphas=np.linspace(0.01,1,num=10,endpoint=True) 67 | gammas=np.logspace(-2,2,num=50) 68 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 69 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合,不同曲线用不同颜色 70 | ## 训练并绘图 71 | for alpha,color in zip(alphas,colors): 72 | scores=[] 73 | for gamma in gammas: 74 | clf=LabelSpreading(max_iter=100,gamma=gamma,alpha=alpha,kernel='rbf') 75 | clf.fit(X,y_train) 76 | scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices])) 77 | ax.plot(gammas,scores,label=r"$\alpha=%s$"%alpha,color=color) 78 | 79 | ### 设置图形 80 | ax.set_xlabel(r"$\gamma$") 81 | ax.set_ylabel("score") 82 | ax.set_xscale("log") 83 | ax.legend(loc="best") 84 | ax.set_title("LabelSpreading rbf kernel") 85 | plt.show() 86 | def test_LabelSpreading_knn(*data): 87 | ''' 88 | 测试 LabelSpreading 的 knn 核时,预测性能随 alpha 和 n_neighbors 的变化 89 | 90 | 91 | :param data: 一个元组,依次为: 样本集合、样本标记集合、 未标记样本的下标集合 92 | :return: None 93 | ''' 94 | X,y,unlabeled_indices=data 95 | y_train=np.copy(y) # 必须拷贝,后面要用到 y 96 | y_train[unlabeled_indices]=-1 # 未标记样本的标记设定为 -1 97 | 98 | fig=plt.figure() 99 | ax=fig.add_subplot(1,1,1) 100 | alphas=np.linspace(0.01,1,num=10,endpoint=True) 101 | Ks=[1,2,3,4,5,8,10,15,20,25,30,35,40,50] 102 | colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0.5,0),(0,0.5,0.5),(0.5,0,0.5), 103 | (0.4,0.6,0),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2),) # 颜色集合,不同曲线用不同颜色 104 | ## 训练并绘图 105 | for alpha,color in zip(alphas,colors): 106 | scores=[] 107 | for K in Ks: 108 | clf=LabelSpreading(kernel='knn',max_iter=100,n_neighbors=K,alpha=alpha) 109 | clf.fit(X,y_train) 110 | scores.append(clf.score(X[unlabeled_indices],y[unlabeled_indices])) 111 | ax.plot(Ks,scores,label=r"$\alpha=%s$"%alpha,color=color) 112 | 113 | ### 设置图形 114 | ax.set_xlabel(r"$k$") 115 | ax.set_ylabel("score") 116 | ax.legend(loc="best") 117 | ax.set_title("LabelSpreading knn kernel") 118 | plt.show() 119 | if __name__=='__main__': 120 | data=load_data() # 获取半监督分类数据集 121 | #test_LabelSpreading(*data) # 调用 test_LabelSpreading 122 | #test_LabelSpreading_rbf(*data)# 调用 test_LabelSpreading_rbf 123 | test_LabelSpreading_knn(*data)# 调用 test_LabelSpreading_knn 124 | 125 | -------------------------------------------------------------------------------- /chapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/__init__.py -------------------------------------------------------------------------------- /chapters/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/chapters/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Bayesian.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Bayesian.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Cluster_EM.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Cluster_EM.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Decision_Tree.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Decision_Tree.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Ensemble.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Ensemble.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.KNN_Dimension_Reduction.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.KNN_Dimension_Reduction.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Kaggle.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Kaggle.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Linear.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Linear.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Model_Selection.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Model_Selection.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Perceptron_Neural_Network.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Perceptron_Neural_Network.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.PreProcessing.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.PreProcessing.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.SVM.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.SVM.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.Semi_Supervised_Learning.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.Semi_Supervised_Learning.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/chapters.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/chapters.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/environment.pickle -------------------------------------------------------------------------------- /docs/build/doctrees/index.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/index.doctree -------------------------------------------------------------------------------- /docs/build/doctrees/modules.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/doctrees/modules.doctree -------------------------------------------------------------------------------- /docs/build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 3b6ff7cf1b72d4e2c1632fab2716f079 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Bayesian.txt: -------------------------------------------------------------------------------- 1 | chapters.Bayesian package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Bayesian.bayesian module 8 | --------------------------------- 9 | 10 | .. automodule:: chapters.Bayesian.bayesian 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Bayesian.bernoulliNB module 16 | ------------------------------------ 17 | 18 | .. automodule:: chapters.Bayesian.bernoulliNB 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Bayesian.gaussianNB module 24 | ----------------------------------- 25 | 26 | .. automodule:: chapters.Bayesian.gaussianNB 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Bayesian.multinomialNB module 32 | -------------------------------------- 33 | 34 | .. automodule:: chapters.Bayesian.multinomialNB 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: chapters.Bayesian 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Cluster_EM.txt: -------------------------------------------------------------------------------- 1 | chapters.Cluster_EM package 2 | =========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Cluster_EM.agglomerative_clustering module 8 | --------------------------------------------------- 9 | 10 | .. automodule:: chapters.Cluster_EM.agglomerative_clustering 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Cluster_EM.cluster module 16 | ---------------------------------- 17 | 18 | .. automodule:: chapters.Cluster_EM.cluster 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Cluster_EM.dbscan module 24 | --------------------------------- 25 | 26 | .. automodule:: chapters.Cluster_EM.dbscan 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Cluster_EM.gmm module 32 | ------------------------------ 33 | 34 | .. automodule:: chapters.Cluster_EM.gmm 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Cluster_EM.kmeans module 40 | --------------------------------- 41 | 42 | .. automodule:: chapters.Cluster_EM.kmeans 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: chapters.Cluster_EM 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Decision_Tree.txt: -------------------------------------------------------------------------------- 1 | chapters.Decision_Tree package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Decision_Tree.decisiontree_classifier module 8 | ----------------------------------------------------- 9 | 10 | .. automodule:: chapters.Decision_Tree.decisiontree_classifier 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Decision_Tree.decisiontree_regressor module 16 | ---------------------------------------------------- 17 | 18 | .. automodule:: chapters.Decision_Tree.decisiontree_regressor 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: chapters.Decision_Tree 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Ensemble.txt: -------------------------------------------------------------------------------- 1 | chapters.Ensemble package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Ensemble.adaboost_classifier module 8 | -------------------------------------------- 9 | 10 | .. automodule:: chapters.Ensemble.adaboost_classifier 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Ensemble.adaboost_regressor module 16 | ------------------------------------------- 17 | 18 | .. automodule:: chapters.Ensemble.adaboost_regressor 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Ensemble.gradientboosting_classifier module 24 | ---------------------------------------------------- 25 | 26 | .. automodule:: chapters.Ensemble.gradientboosting_classifier 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Ensemble.gradientboosting_regressor module 32 | --------------------------------------------------- 33 | 34 | .. automodule:: chapters.Ensemble.gradientboosting_regressor 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Ensemble.randomforest_classifier module 40 | ------------------------------------------------ 41 | 42 | .. automodule:: chapters.Ensemble.randomforest_classifier 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.Ensemble.randomforest_regressor module 48 | ----------------------------------------------- 49 | 50 | .. automodule:: chapters.Ensemble.randomforest_regressor 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | 56 | Module contents 57 | --------------- 58 | 59 | .. automodule:: chapters.Ensemble 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.KNN_Dimension_Reduction.txt: -------------------------------------------------------------------------------- 1 | chapters.KNN_Dimension_Reduction package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.KNN_Dimension_Reduction.isomap module 8 | ---------------------------------------------- 9 | 10 | .. automodule:: chapters.KNN_Dimension_Reduction.isomap 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.KNN_Dimension_Reduction.kneighbors_classifier module 16 | ------------------------------------------------------------- 17 | 18 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_classifier 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.KNN_Dimension_Reduction.kneighbors_regressor module 24 | ------------------------------------------------------------ 25 | 26 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_regressor 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.KNN_Dimension_Reduction.kpca module 32 | -------------------------------------------- 33 | 34 | .. automodule:: chapters.KNN_Dimension_Reduction.kpca 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.KNN_Dimension_Reduction.lle module 40 | ------------------------------------------- 41 | 42 | .. automodule:: chapters.KNN_Dimension_Reduction.lle 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.KNN_Dimension_Reduction.mds module 48 | ------------------------------------------- 49 | 50 | .. automodule:: chapters.KNN_Dimension_Reduction.mds 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | chapters.KNN_Dimension_Reduction.pca module 56 | ------------------------------------------- 57 | 58 | .. automodule:: chapters.KNN_Dimension_Reduction.pca 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: chapters.KNN_Dimension_Reduction 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Kaggle.txt: -------------------------------------------------------------------------------- 1 | chapters.Kaggle package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Kaggle.data_clean module 8 | --------------------------------- 9 | 10 | .. automodule:: chapters.Kaggle.data_clean 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Kaggle.data_preprocess module 16 | -------------------------------------- 17 | 18 | .. automodule:: chapters.Kaggle.data_preprocess 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Kaggle.grid_search module 24 | ---------------------------------- 25 | 26 | .. automodule:: chapters.Kaggle.grid_search 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Kaggle.learning_validation_curve module 32 | ------------------------------------------------ 33 | 34 | .. automodule:: chapters.Kaggle.learning_validation_curve 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: chapters.Kaggle 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Linear.txt: -------------------------------------------------------------------------------- 1 | chapters.Linear package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Linear.elasticnet module 8 | --------------------------------- 9 | 10 | .. automodule:: chapters.Linear.elasticnet 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Linear.lasso module 16 | ---------------------------- 17 | 18 | .. automodule:: chapters.Linear.lasso 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Linear.lda module 24 | -------------------------- 25 | 26 | .. automodule:: chapters.Linear.lda 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Linear.linear_regression module 32 | ---------------------------------------- 33 | 34 | .. automodule:: chapters.Linear.linear_regression 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Linear.logistic_regression module 40 | ------------------------------------------ 41 | 42 | .. automodule:: chapters.Linear.logistic_regression 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.Linear.ridge module 48 | ---------------------------- 49 | 50 | .. automodule:: chapters.Linear.ridge 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | 56 | Module contents 57 | --------------- 58 | 59 | .. automodule:: chapters.Linear 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Model_Selection.txt: -------------------------------------------------------------------------------- 1 | chapters.Model_Selection package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Model_Selection.classification_metrics module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: chapters.Model_Selection.classification_metrics 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Model_Selection.data_splittion module 16 | ---------------------------------------------- 17 | 18 | .. automodule:: chapters.Model_Selection.data_splittion 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Model_Selection.grid_search module 24 | ------------------------------------------- 25 | 26 | .. automodule:: chapters.Model_Selection.grid_search 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Model_Selection.learning_curve module 32 | ---------------------------------------------- 33 | 34 | .. automodule:: chapters.Model_Selection.learning_curve 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Model_Selection.loss_function module 40 | --------------------------------------------- 41 | 42 | .. automodule:: chapters.Model_Selection.loss_function 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.Model_Selection.regression_metrics module 48 | -------------------------------------------------- 49 | 50 | .. automodule:: chapters.Model_Selection.regression_metrics 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | chapters.Model_Selection.validation_curve module 56 | ------------------------------------------------ 57 | 58 | .. automodule:: chapters.Model_Selection.validation_curve 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: chapters.Model_Selection 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Perceptron_Neural_Network.txt: -------------------------------------------------------------------------------- 1 | chapters.Perceptron_Neural_Network package 2 | ========================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Perceptron_Neural_Network.neural_network module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Perceptron_Neural_Network.neural_network_iris module 16 | ------------------------------------------------------------- 17 | 18 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network_iris 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Perceptron_Neural_Network.perceptron module 24 | ---------------------------------------------------- 25 | 26 | .. automodule:: chapters.Perceptron_Neural_Network.perceptron 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: chapters.Perceptron_Neural_Network 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.PreProcessing.txt: -------------------------------------------------------------------------------- 1 | chapters.PreProcessing package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.PreProcessing.binarize module 8 | -------------------------------------- 9 | 10 | .. automodule:: chapters.PreProcessing.binarize 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.PreProcessing.dictionary_learning module 16 | ------------------------------------------------- 17 | 18 | .. automodule:: chapters.PreProcessing.dictionary_learning 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.PreProcessing.feature_selection_bagging module 24 | ------------------------------------------------------- 25 | 26 | .. automodule:: chapters.PreProcessing.feature_selection_bagging 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.PreProcessing.feature_selection_embeded module 32 | ------------------------------------------------------- 33 | 34 | .. automodule:: chapters.PreProcessing.feature_selection_embeded 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.PreProcessing.feature_selection_filter module 40 | ------------------------------------------------------ 41 | 42 | .. automodule:: chapters.PreProcessing.feature_selection_filter 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.PreProcessing.normalize module 48 | --------------------------------------- 49 | 50 | .. automodule:: chapters.PreProcessing.normalize 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | chapters.PreProcessing.onehot_encode module 56 | ------------------------------------------- 57 | 58 | .. automodule:: chapters.PreProcessing.onehot_encode 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | chapters.PreProcessing.pipeline module 64 | -------------------------------------- 65 | 66 | .. automodule:: chapters.PreProcessing.pipeline 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | chapters.PreProcessing.standardize module 72 | ----------------------------------------- 73 | 74 | .. automodule:: chapters.PreProcessing.standardize 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | 80 | Module contents 81 | --------------- 82 | 83 | .. automodule:: chapters.PreProcessing 84 | :members: 85 | :undoc-members: 86 | :show-inheritance: 87 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.SVM.txt: -------------------------------------------------------------------------------- 1 | chapters.SVM package 2 | ==================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.SVM.SVC module 8 | ----------------------- 9 | 10 | .. automodule:: chapters.SVM.SVC 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.SVM.SVR module 16 | ----------------------- 17 | 18 | .. automodule:: chapters.SVM.SVR 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.SVM.linearSVC module 24 | ----------------------------- 25 | 26 | .. automodule:: chapters.SVM.linearSVC 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.SVM.linearSVR module 32 | ----------------------------- 33 | 34 | .. automodule:: chapters.SVM.linearSVR 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: chapters.SVM 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.Semi_Supervised_Learning.txt: -------------------------------------------------------------------------------- 1 | chapters.Semi_Supervised_Learning package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Semi_Supervised_Learning.labelPropagation module 8 | --------------------------------------------------------- 9 | 10 | .. automodule:: chapters.Semi_Supervised_Learning.labelPropagation 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Semi_Supervised_Learning.labelSpreading module 16 | ------------------------------------------------------- 17 | 18 | .. automodule:: chapters.Semi_Supervised_Learning.labelSpreading 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: chapters.Semi_Supervised_Learning 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/build/html/_sources/chapters.txt: -------------------------------------------------------------------------------- 1 | chapters package 2 | ================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | chapters.Bayesian 10 | chapters.Cluster_EM 11 | chapters.Decision_Tree 12 | chapters.Ensemble 13 | chapters.KNN_Dimension_Reduction 14 | chapters.Kaggle 15 | chapters.Linear 16 | chapters.Model_Selection 17 | chapters.Perceptron_Neural_Network 18 | chapters.PreProcessing 19 | chapters.SVM 20 | chapters.Semi_Supervised_Learning 21 | 22 | Module contents 23 | --------------- 24 | 25 | .. automodule:: chapters 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/build/html/_sources/index.txt: -------------------------------------------------------------------------------- 1 | .. book documentation master file, created by 2 | sphinx-quickstart on Wed Aug 17 17:09:32 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to book's documentation! 7 | ================================ 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | 23 | -------------------------------------------------------------------------------- /docs/build/html/_sources/modules.txt: -------------------------------------------------------------------------------- 1 | chapters 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | chapters 8 | -------------------------------------------------------------------------------- /docs/build/html/_static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/ajax-loader.gif -------------------------------------------------------------------------------- /docs/build/html/_static/comment-bright.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/comment-bright.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment-close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/comment-close.png -------------------------------------------------------------------------------- /docs/build/html/_static/comment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/comment.png -------------------------------------------------------------------------------- /docs/build/html/_static/down-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/down-pressed.png -------------------------------------------------------------------------------- /docs/build/html/_static/down.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/down.png -------------------------------------------------------------------------------- /docs/build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/file.png -------------------------------------------------------------------------------- /docs/build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/minus.png -------------------------------------------------------------------------------- /docs/build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/plus.png -------------------------------------------------------------------------------- /docs/build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ 11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 14 | .highlight .ge { font-style: italic } /* Generic.Emph */ 15 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 18 | .highlight .go { color: #333333 } /* Generic.Output */ 19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 20 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 28 | .highlight .kt { color: #902000 } /* Keyword.Type */ 29 | .highlight .m { color: #208050 } /* Literal.Number */ 30 | .highlight .s { color: #4070a0 } /* Literal.String */ 31 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 32 | .highlight .nb { color: #007020 } /* Name.Builtin */ 33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 34 | .highlight .no { color: #60add5 } /* Name.Constant */ 35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 37 | .highlight .ne { color: #007020 } /* Name.Exception */ 38 | .highlight .nf { color: #06287e } /* Name.Function */ 39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */ 46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 50 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 51 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 52 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 53 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 54 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 55 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 56 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 57 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 58 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 59 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 60 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 61 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 62 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 63 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 64 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 65 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/build/html/_static/up-pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/up-pressed.png -------------------------------------------------------------------------------- /docs/build/html/_static/up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/_static/up.png -------------------------------------------------------------------------------- /docs/build/html/index.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Welcome to book’s documentation! — book 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 41 | 42 |
43 |
44 |
45 |
46 | 47 |
48 |

Welcome to book’s documentation!

49 |

Contents:

50 |
51 |
    52 |
53 |
54 |
55 |
56 |

Indices and tables

57 | 62 |
63 | 64 | 65 |
66 |
67 |
68 | 95 |
96 |
97 | 109 | 113 | 114 | -------------------------------------------------------------------------------- /docs/build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huaxz1986/git_book/cf539065568c2588df87729c1c76303016933449/docs/build/html/objects.inv -------------------------------------------------------------------------------- /docs/build/html/search.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Search — book 1.0 documentation 10 | 11 | 12 | 13 | 14 | 23 | 24 | 25 | 26 | 27 | 28 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 49 | 50 |
51 |
52 |
53 |
54 | 55 |

Search

56 |
57 | 58 |

59 | Please activate JavaScript to enable the search 60 | functionality. 61 |

62 |
63 |

64 | From here you can search these documents. Enter your search 65 | words into the box below and click "search". Note that the search 66 | function will automatically search for all of the words. Pages 67 | containing fewer words won't appear in the result list. 68 |

69 |
70 | 71 | 72 | 73 |
74 | 75 |
76 | 77 |
78 | 79 |
80 |
81 |
82 | 86 |
87 |
88 | 100 | 104 | 105 | -------------------------------------------------------------------------------- /docs/source/chapters.Bayesian.rst: -------------------------------------------------------------------------------- 1 | chapters.Bayesian package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Bayesian.bayesian module 8 | --------------------------------- 9 | 10 | .. automodule:: chapters.Bayesian.bayesian 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Bayesian.bernoulliNB module 16 | ------------------------------------ 17 | 18 | .. automodule:: chapters.Bayesian.bernoulliNB 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Bayesian.gaussianNB module 24 | ----------------------------------- 25 | 26 | .. automodule:: chapters.Bayesian.gaussianNB 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Bayesian.multinomialNB module 32 | -------------------------------------- 33 | 34 | .. automodule:: chapters.Bayesian.multinomialNB 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: chapters.Bayesian 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/source/chapters.Cluster_EM.rst: -------------------------------------------------------------------------------- 1 | chapters.Cluster_EM package 2 | =========================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Cluster_EM.agglomerative_clustering module 8 | --------------------------------------------------- 9 | 10 | .. automodule:: chapters.Cluster_EM.agglomerative_clustering 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Cluster_EM.cluster module 16 | ---------------------------------- 17 | 18 | .. automodule:: chapters.Cluster_EM.cluster 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Cluster_EM.dbscan module 24 | --------------------------------- 25 | 26 | .. automodule:: chapters.Cluster_EM.dbscan 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Cluster_EM.gmm module 32 | ------------------------------ 33 | 34 | .. automodule:: chapters.Cluster_EM.gmm 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Cluster_EM.kmeans module 40 | --------------------------------- 41 | 42 | .. automodule:: chapters.Cluster_EM.kmeans 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | 48 | Module contents 49 | --------------- 50 | 51 | .. automodule:: chapters.Cluster_EM 52 | :members: 53 | :undoc-members: 54 | :show-inheritance: 55 | -------------------------------------------------------------------------------- /docs/source/chapters.Decision_Tree.rst: -------------------------------------------------------------------------------- 1 | chapters.Decision_Tree package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Decision_Tree.decisiontree_classifier module 8 | ----------------------------------------------------- 9 | 10 | .. automodule:: chapters.Decision_Tree.decisiontree_classifier 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Decision_Tree.decisiontree_regressor module 16 | ---------------------------------------------------- 17 | 18 | .. automodule:: chapters.Decision_Tree.decisiontree_regressor 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: chapters.Decision_Tree 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/chapters.Ensemble.rst: -------------------------------------------------------------------------------- 1 | chapters.Ensemble package 2 | ========================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Ensemble.adaboost_classifier module 8 | -------------------------------------------- 9 | 10 | .. automodule:: chapters.Ensemble.adaboost_classifier 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Ensemble.adaboost_regressor module 16 | ------------------------------------------- 17 | 18 | .. automodule:: chapters.Ensemble.adaboost_regressor 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Ensemble.gradientboosting_classifier module 24 | ---------------------------------------------------- 25 | 26 | .. automodule:: chapters.Ensemble.gradientboosting_classifier 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Ensemble.gradientboosting_regressor module 32 | --------------------------------------------------- 33 | 34 | .. automodule:: chapters.Ensemble.gradientboosting_regressor 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Ensemble.randomforest_classifier module 40 | ------------------------------------------------ 41 | 42 | .. automodule:: chapters.Ensemble.randomforest_classifier 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.Ensemble.randomforest_regressor module 48 | ----------------------------------------------- 49 | 50 | .. automodule:: chapters.Ensemble.randomforest_regressor 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | 56 | Module contents 57 | --------------- 58 | 59 | .. automodule:: chapters.Ensemble 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | -------------------------------------------------------------------------------- /docs/source/chapters.KNN_Dimension_Reduction.rst: -------------------------------------------------------------------------------- 1 | chapters.KNN_Dimension_Reduction package 2 | ======================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.KNN_Dimension_Reduction.isomap module 8 | ---------------------------------------------- 9 | 10 | .. automodule:: chapters.KNN_Dimension_Reduction.isomap 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.KNN_Dimension_Reduction.kneighbors_classifier module 16 | ------------------------------------------------------------- 17 | 18 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_classifier 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.KNN_Dimension_Reduction.kneighbors_regressor module 24 | ------------------------------------------------------------ 25 | 26 | .. automodule:: chapters.KNN_Dimension_Reduction.kneighbors_regressor 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.KNN_Dimension_Reduction.kpca module 32 | -------------------------------------------- 33 | 34 | .. automodule:: chapters.KNN_Dimension_Reduction.kpca 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.KNN_Dimension_Reduction.lle module 40 | ------------------------------------------- 41 | 42 | .. automodule:: chapters.KNN_Dimension_Reduction.lle 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.KNN_Dimension_Reduction.mds module 48 | ------------------------------------------- 49 | 50 | .. automodule:: chapters.KNN_Dimension_Reduction.mds 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | chapters.KNN_Dimension_Reduction.pca module 56 | ------------------------------------------- 57 | 58 | .. automodule:: chapters.KNN_Dimension_Reduction.pca 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: chapters.KNN_Dimension_Reduction 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /docs/source/chapters.Kaggle.rst: -------------------------------------------------------------------------------- 1 | chapters.Kaggle package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Kaggle.data_clean module 8 | --------------------------------- 9 | 10 | .. automodule:: chapters.Kaggle.data_clean 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Kaggle.data_preprocess module 16 | -------------------------------------- 17 | 18 | .. automodule:: chapters.Kaggle.data_preprocess 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Kaggle.grid_search module 24 | ---------------------------------- 25 | 26 | .. automodule:: chapters.Kaggle.grid_search 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Kaggle.learning_validation_curve module 32 | ------------------------------------------------ 33 | 34 | .. automodule:: chapters.Kaggle.learning_validation_curve 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: chapters.Kaggle 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/source/chapters.Linear.rst: -------------------------------------------------------------------------------- 1 | chapters.Linear package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Linear.elasticnet module 8 | --------------------------------- 9 | 10 | .. automodule:: chapters.Linear.elasticnet 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Linear.lasso module 16 | ---------------------------- 17 | 18 | .. automodule:: chapters.Linear.lasso 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Linear.lda module 24 | -------------------------- 25 | 26 | .. automodule:: chapters.Linear.lda 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Linear.linear_regression module 32 | ---------------------------------------- 33 | 34 | .. automodule:: chapters.Linear.linear_regression 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Linear.logistic_regression module 40 | ------------------------------------------ 41 | 42 | .. automodule:: chapters.Linear.logistic_regression 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.Linear.ridge module 48 | ---------------------------- 49 | 50 | .. automodule:: chapters.Linear.ridge 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | 56 | Module contents 57 | --------------- 58 | 59 | .. automodule:: chapters.Linear 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | -------------------------------------------------------------------------------- /docs/source/chapters.Model_Selection.rst: -------------------------------------------------------------------------------- 1 | chapters.Model_Selection package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Model_Selection.classification_metrics module 8 | ------------------------------------------------------ 9 | 10 | .. automodule:: chapters.Model_Selection.classification_metrics 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Model_Selection.data_splittion module 16 | ---------------------------------------------- 17 | 18 | .. automodule:: chapters.Model_Selection.data_splittion 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Model_Selection.grid_search module 24 | ------------------------------------------- 25 | 26 | .. automodule:: chapters.Model_Selection.grid_search 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.Model_Selection.learning_curve module 32 | ---------------------------------------------- 33 | 34 | .. automodule:: chapters.Model_Selection.learning_curve 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.Model_Selection.loss_function module 40 | --------------------------------------------- 41 | 42 | .. automodule:: chapters.Model_Selection.loss_function 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.Model_Selection.regression_metrics module 48 | -------------------------------------------------- 49 | 50 | .. automodule:: chapters.Model_Selection.regression_metrics 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | chapters.Model_Selection.validation_curve module 56 | ------------------------------------------------ 57 | 58 | .. automodule:: chapters.Model_Selection.validation_curve 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | 64 | Module contents 65 | --------------- 66 | 67 | .. automodule:: chapters.Model_Selection 68 | :members: 69 | :undoc-members: 70 | :show-inheritance: 71 | -------------------------------------------------------------------------------- /docs/source/chapters.Perceptron_Neural_Network.rst: -------------------------------------------------------------------------------- 1 | chapters.Perceptron_Neural_Network package 2 | ========================================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Perceptron_Neural_Network.neural_network module 8 | -------------------------------------------------------- 9 | 10 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Perceptron_Neural_Network.neural_network_iris module 16 | ------------------------------------------------------------- 17 | 18 | .. automodule:: chapters.Perceptron_Neural_Network.neural_network_iris 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.Perceptron_Neural_Network.perceptron module 24 | ---------------------------------------------------- 25 | 26 | .. automodule:: chapters.Perceptron_Neural_Network.perceptron 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | 32 | Module contents 33 | --------------- 34 | 35 | .. automodule:: chapters.Perceptron_Neural_Network 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/chapters.PreProcessing.rst: -------------------------------------------------------------------------------- 1 | chapters.PreProcessing package 2 | ============================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.PreProcessing.binarize module 8 | -------------------------------------- 9 | 10 | .. automodule:: chapters.PreProcessing.binarize 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.PreProcessing.dictionary_learning module 16 | ------------------------------------------------- 17 | 18 | .. automodule:: chapters.PreProcessing.dictionary_learning 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.PreProcessing.feature_selection_bagging module 24 | ------------------------------------------------------- 25 | 26 | .. automodule:: chapters.PreProcessing.feature_selection_bagging 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.PreProcessing.feature_selection_embeded module 32 | ------------------------------------------------------- 33 | 34 | .. automodule:: chapters.PreProcessing.feature_selection_embeded 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | chapters.PreProcessing.feature_selection_filter module 40 | ------------------------------------------------------ 41 | 42 | .. automodule:: chapters.PreProcessing.feature_selection_filter 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | chapters.PreProcessing.normalize module 48 | --------------------------------------- 49 | 50 | .. automodule:: chapters.PreProcessing.normalize 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | chapters.PreProcessing.onehot_encode module 56 | ------------------------------------------- 57 | 58 | .. automodule:: chapters.PreProcessing.onehot_encode 59 | :members: 60 | :undoc-members: 61 | :show-inheritance: 62 | 63 | chapters.PreProcessing.pipeline module 64 | -------------------------------------- 65 | 66 | .. automodule:: chapters.PreProcessing.pipeline 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | 71 | chapters.PreProcessing.standardize module 72 | ----------------------------------------- 73 | 74 | .. automodule:: chapters.PreProcessing.standardize 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | 80 | Module contents 81 | --------------- 82 | 83 | .. automodule:: chapters.PreProcessing 84 | :members: 85 | :undoc-members: 86 | :show-inheritance: 87 | -------------------------------------------------------------------------------- /docs/source/chapters.SVM.rst: -------------------------------------------------------------------------------- 1 | chapters.SVM package 2 | ==================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.SVM.SVC module 8 | ----------------------- 9 | 10 | .. automodule:: chapters.SVM.SVC 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.SVM.SVR module 16 | ----------------------- 17 | 18 | .. automodule:: chapters.SVM.SVR 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | chapters.SVM.linearSVC module 24 | ----------------------------- 25 | 26 | .. automodule:: chapters.SVM.linearSVC 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | chapters.SVM.linearSVR module 32 | ----------------------------- 33 | 34 | .. automodule:: chapters.SVM.linearSVR 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | 40 | Module contents 41 | --------------- 42 | 43 | .. automodule:: chapters.SVM 44 | :members: 45 | :undoc-members: 46 | :show-inheritance: 47 | -------------------------------------------------------------------------------- /docs/source/chapters.Semi_Supervised_Learning.rst: -------------------------------------------------------------------------------- 1 | chapters.Semi_Supervised_Learning package 2 | ========================================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | chapters.Semi_Supervised_Learning.labelPropagation module 8 | --------------------------------------------------------- 9 | 10 | .. automodule:: chapters.Semi_Supervised_Learning.labelPropagation 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | chapters.Semi_Supervised_Learning.labelSpreading module 16 | ------------------------------------------------------- 17 | 18 | .. automodule:: chapters.Semi_Supervised_Learning.labelSpreading 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: chapters.Semi_Supervised_Learning 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/chapters.rst: -------------------------------------------------------------------------------- 1 | chapters package 2 | ================ 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | chapters.Bayesian 10 | chapters.Cluster_EM 11 | chapters.Decision_Tree 12 | chapters.Ensemble 13 | chapters.KNN_Dimension_Reduction 14 | chapters.Kaggle 15 | chapters.Linear 16 | chapters.Model_Selection 17 | chapters.Perceptron_Neural_Network 18 | chapters.PreProcessing 19 | chapters.SVM 20 | chapters.Semi_Supervised_Learning 21 | 22 | Module contents 23 | --------------- 24 | 25 | .. automodule:: chapters 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. book documentation master file, created by 2 | sphinx-quickstart on Wed Aug 17 17:09:32 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to book's documentation! 7 | ================================ 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | 23 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | chapters 2 | ======== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | chapters 8 | -------------------------------------------------------------------------------- /勘误.md: -------------------------------------------------------------------------------- 1 | # 勘误 2 | >推荐采用一些第三方 markdown 编辑器阅读。本人采用的是 typora 3 | 4 | 1. 第6页第二段:符号错误:(感谢网友 周礼广 的提示) 5 | $$ 6 | P(y=1/\mathbf{\vec x})=\frac{1}{1+e^{-z}},z=\mathbf{\vec w}\cdot \mathbf{\vec x}+b 7 | $$ 8 | 9 | 2. 第194页倒数第三段:公式未换行:(感谢网友 齐照辉 的提示) 10 | 11 | $$\nabla _{\mathbf {\vec w}}L(\mathbf{\vec w},b)=-\sum_{\mathbf{\vec x}_i \in M}y_i \mathbf{\vec x}_i\\ \nabla_bL(\mathbf{\vec w},b)=-\sum_{\mathbf{\vec x}_i \in M}y_i​$$ 12 | 13 | 以及 14 | 15 | $$ \mathbf{\vec w}\leftarrow \mathbf{\vec w}+\eta y_i\mathbf{\vec x}_i\\ b\leftarrow b+\eta y_i$$ 16 | 17 | 3. 第248页最后一段第一句:分子分母互换:(感谢网友 飞雪剑魂 的提示) 18 | $$ 19 | \exp(2\alpha_m)=\frac{1-e_m}{e_m} 20 | $$ 21 | 22 | 4. 第36 页倒数第二个公式:公式未换行(感谢网友 张显 的提示) 23 | $$ 24 | C_{\alpha}=...\\ 25 | H(t)=... 26 | $$ 27 | 28 | 5. 第45页,`score` 的公式:分母缺少部分内容(后续关于`score`的公式也依次修改): 29 | $$ 30 | \text{score}=1-\frac{\sum_{T_{test}}(y_i-\hat y)^2}{\sum_{T_{test}}(y_i-\bar y)^2} 31 | $$ 32 | 33 | --------------------------------------------------------------------------------