├── .DS_Store ├── .ipynb_checkpoints ├── 2-2-认识Jupyter-Notebook-checkpoint.ipynb ├── 3-1-使用Python进行描述统计单变量-checkpoint.ipynb ├── 3-2-使用Python进行描述统计多变量-checkpoint.ipynb ├── 5-1-一元回归-checkpoint.ipynb ├── 6-1-各种概率分布-checkpoint.ipynb └── 7-3-Python中的Ridge回归与Lasso回归-checkpoint.ipynb ├── 2-2-认识Jupyter-Notebook.ipynb ├── 2-3-Python编程基础.ipynb ├── 2-4-1-sample_data.csv ├── 2-4-认识numpy与pandas.ipynb ├── 3-1-使用Python进行描述统计单变量.ipynb ├── 3-10-1-click_data.csv ├── 3-10-列联表检验.ipynb ├── 3-2-1-fish_multi.csv ├── 3-2-2-shoes.csv ├── 3-2-3-cov.csv ├── 3-2-使用Python进行描述统计多变量.ipynb ├── 3-3-2-fish_multi_2.csv ├── 3-3-基于matplotlib-seaborn的数据可视化.ipynb ├── 3-4-1-fish_length_100000.csv ├── 3-4-用Python模拟抽样.ipynb ├── 3-5-样本统计量的性质.ipynb ├── 3-6-正态分布及其应用.ipynb ├── 3-7-1-fish_length.csv ├── 3-7-参数估计.ipynb ├── 3-8-1-junk-food-weight.csv ├── 3-8-假设检验.ipynb ├── 3-9-1-paired-t-test.csv ├── 3-9-均值差的检验.ipynb ├── 5-1-1-beer.csv ├── 5-1-一元回归.ipynb ├── 5-2-方差分析.ipynb ├── 5-3-1-lm-model.csv ├── 5-3-含有多个解释变量的模型.ipynb ├── 6-1-各种概率分布.ipynb ├── 6-3-1-logistic-regression.csv ├── 6-3-logistic回归.ipynb ├── 6-4-广义线性模型的评估.ipynb ├── 6-5-1-poisson-regression.csv ├── 6-5-泊松回归.ipynb ├── 7-3-1-large-data.csv ├── 7-3-Python中的Ridge回归与Lasso回归.ipynb ├── 7-4-线性模型与神经网络.ipynb ├── README.md ├── _build ├── .DS_Store ├── .doctrees │ ├── 2-2-认识Jupyter-Notebook.doctree │ ├── 2-3-Python编程基础.doctree │ ├── 2-4-认识numpy与pandas.doctree │ ├── 3-1-使用Python进行描述统计单变量.doctree │ ├── 3-1-使用Python进行描述统计:单变量.doctree │ ├── 3-10-列联表检验.doctree │ ├── 3-2-使用Python进行描述统计多变量.doctree │ ├── 3-2-使用Python进行描述统计:多变量.doctree │ ├── 3-3-基于matplotlib-seaborn的数据可视化.doctree │ ├── 3-4-用Python模拟抽样.doctree │ ├── 3-5-样本统计量的性质.doctree │ ├── 3-6-正态分布及其应用.doctree │ ├── 3-7-参数估计.doctree │ ├── 3-8-假设检验.doctree │ ├── 3-9-均值差的检验.doctree │ ├── 5-1-一元回归.doctree │ ├── 5-2-方差分析.doctree │ ├── 5-3-含有多个解释变量的模型.doctree │ ├── 6-1-各种概率分布.doctree │ ├── 6-3-logistic回归.doctree │ ├── 6-4-广义线性模型的评估.doctree │ ├── 6-5-泊松回归.doctree │ ├── 7-3-Python中的Ridge回归与Lasso回归.doctree │ ├── 7-4-线性模型与神经网络.doctree │ ├── README.doctree │ ├── environment.pickle │ └── glue_cache.json ├── html │ ├── .DS_Store │ ├── .buildinfo │ ├── 2-2-认识Jupyter-Notebook.html │ ├── 2-3-Python编程基础.html │ ├── 2-4-认识numpy与pandas.html │ ├── 3-1-使用Python进行描述统计单变量.html │ ├── 3-10-列联表检验.html │ ├── 3-2-使用Python进行描述统计多变量.html │ ├── 3-3-基于matplotlib-seaborn的数据可视化.html │ ├── 3-4-用Python模拟抽样.html │ ├── 3-5-样本统计量的性质.html │ ├── 3-6-正态分布及其应用.html │ ├── 3-7-参数估计.html │ ├── 3-8-假设检验.html │ ├── 3-9-均值差的检验.html │ ├── 5-1-一元回归.html │ ├── 5-2-方差分析.html │ ├── 5-3-含有多个解释变量的模型.html │ ├── 6-1-各种概率分布.html │ ├── 6-3-logistic回归.html │ ├── 6-4-广义线性模型的评估.html │ ├── 6-5-泊松回归.html │ ├── 7-3-Python中的Ridge回归与Lasso回归.html │ ├── 7-4-线性模型与神经网络.html │ ├── README.html │ ├── _images │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_11_2.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_13_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_14_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_19_2.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_21_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_24_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_26_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_29_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_33_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_5_1.png │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_8_1.png │ │ ├── 3-4-用Python模拟抽样_20_2.png │ │ ├── 3-4-用Python模拟抽样_24_1.png │ │ ├── 3-4-用Python模拟抽样_25_2.png │ │ ├── 3-5-样本统计量的性质_11_2.png │ │ ├── 3-5-样本统计量的性质_16_1.png │ │ ├── 3-5-样本统计量的性质_22_1.png │ │ ├── 3-5-样本统计量的性质_27_1.png │ │ ├── 3-5-样本统计量的性质_30_1.png │ │ ├── 3-5-样本统计量的性质_41_1.png │ │ ├── 3-5-样本统计量的性质_43_3.png │ │ ├── 3-6-正态分布及其应用_23_2.png │ │ ├── 3-6-正态分布及其应用_25_1.png │ │ ├── 3-6-正态分布及其应用_26_2.png │ │ ├── 3-6-正态分布及其应用_9_1.png │ │ ├── 5-1-一元回归_18_1.png │ │ ├── 5-1-一元回归_40_2.png │ │ ├── 5-1-一元回归_41_2.png │ │ ├── 5-1-一元回归_43_0.png │ │ ├── 5-1-一元回归_48_1.png │ │ ├── 5-1-一元回归_5_1.png │ │ ├── 5-2-方差分析_5_1.png │ │ ├── 5-3-含有多个解释变量的模型_12_1.png │ │ ├── 5-3-含有多个解释变量的模型_5_1.png │ │ ├── 5-3-含有多个解释变量的模型_9_1.png │ │ ├── 6-1-各种概率分布_10_2.png │ │ ├── 6-1-各种概率分布_11_1.png │ │ ├── 6-1-各种概率分布_6_2.png │ │ ├── 6-3-logistic回归_16_1.png │ │ ├── 6-3-logistic回归_5_1.png │ │ ├── 6-5-泊松回归_10_1.png │ │ ├── 7-3-Python中的Ridge回归与Lasso回归_12_1.png │ │ ├── 7-3-Python中的Ridge回归与Lasso回归_22_2.png │ │ ├── 7-3-Python中的Ridge回归与Lasso回归_23_2.png │ │ └── 7-3-Python中的Ridge回归与Lasso回归_30_1.png │ ├── _panels_static │ │ ├── panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css │ │ └── panels-variables.06eb56fa6e07937060861dad626602ad.css │ ├── _sources │ │ ├── .DS_Store │ │ ├── 2-2-认识Jupyter-Notebook.ipynb │ │ ├── 2-3-Python编程基础.ipynb │ │ ├── 2-4-认识numpy与pandas.ipynb │ │ ├── 3-1-使用Python进行描述统计单变量.ipynb │ │ ├── 3-10-列联表检验.ipynb │ │ ├── 3-2-使用Python进行描述统计多变量.ipynb │ │ ├── 3-3-基于matplotlib-seaborn的数据可视化.ipynb │ │ ├── 3-4-用Python模拟抽样.ipynb │ │ ├── 3-5-样本统计量的性质.ipynb │ │ ├── 3-6-正态分布及其应用.ipynb │ │ ├── 3-7-参数估计.ipynb │ │ ├── 3-8-假设检验.ipynb │ │ ├── 3-9-均值差的检验.ipynb │ │ ├── 5-1-一元回归.ipynb │ │ ├── 5-2-方差分析.ipynb │ │ ├── 5-3-含有多个解释变量的模型.ipynb │ │ ├── 6-1-各种概率分布.ipynb │ │ ├── 6-3-logistic回归.ipynb │ │ ├── 6-4-广义线性模型的评估.ipynb │ │ ├── 6-5-泊松回归.ipynb │ │ ├── 7-3-Python中的Ridge回归与Lasso回归.ipynb │ │ ├── 7-4-线性模型与神经网络.ipynb │ │ └── README.md │ ├── _static │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-37.pyc │ │ ├── basic.css │ │ ├── clipboard.min.js │ │ ├── copy-button.svg │ │ ├── copybutton.css │ │ ├── copybutton.js │ │ ├── copybutton_funcs.js │ │ ├── css │ │ │ ├── blank.css │ │ │ ├── index.ff1ffe594081f20da1ef19478df9384b.css │ │ │ └── theme.css │ │ ├── doctools.js │ │ ├── documentation_options.js │ │ ├── file.png │ │ ├── images │ │ │ ├── logo_binder.svg │ │ │ ├── logo_colab.png │ │ │ └── logo_jupyterhub.svg │ │ ├── jquery-3.5.1.js │ │ ├── jquery.js │ │ ├── js │ │ │ └── index.be7d3bbb2ef33a8344ce.js │ │ ├── language_data.js │ │ ├── minus.png │ │ ├── mystnb.css │ │ ├── panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css │ │ ├── panels-variables.06eb56fa6e07937060861dad626602ad.css │ │ ├── plus.png │ │ ├── pygments.css │ │ ├── searchtools.js │ │ ├── sphinx-book-theme.12a9622fbb08dcb3a2a40b2c02b83a57.js │ │ ├── sphinx-book-theme.css │ │ ├── sphinx-book-theme.e2363ea40746bee74734a24ffefccd78.css │ │ ├── sphinx-thebe.css │ │ ├── sphinx-thebe.js │ │ ├── togglebutton.css │ │ ├── togglebutton.js │ │ ├── underscore-1.13.1.js │ │ ├── underscore.js │ │ ├── vendor │ │ │ └── fontawesome │ │ │ │ └── 5.13.0 │ │ │ │ ├── LICENSE.txt │ │ │ │ ├── css │ │ │ │ └── all.min.css │ │ │ │ └── webfonts │ │ │ │ ├── fa-brands-400.eot │ │ │ │ ├── fa-brands-400.svg │ │ │ │ ├── fa-brands-400.ttf │ │ │ │ ├── fa-brands-400.woff │ │ │ │ ├── fa-brands-400.woff2 │ │ │ │ ├── fa-regular-400.eot │ │ │ │ ├── fa-regular-400.svg │ │ │ │ ├── fa-regular-400.ttf │ │ │ │ ├── fa-regular-400.woff │ │ │ │ ├── fa-regular-400.woff2 │ │ │ │ ├── fa-solid-900.eot │ │ │ │ ├── fa-solid-900.svg │ │ │ │ ├── fa-solid-900.ttf │ │ │ │ ├── fa-solid-900.woff │ │ │ │ └── fa-solid-900.woff2 │ │ └── webpack-macros.html │ ├── genindex.html │ ├── index.html │ ├── objects.inv │ ├── reports │ │ ├── 2-3-Python编程基础.log │ │ ├── 3-7-参数估计.log │ │ └── 7-3-Python中的Ridge回归与Lasso回归.log │ ├── search.html │ └── searchindex.js └── jupyter_execute │ ├── 2-2-认识Jupyter-Notebook.ipynb │ ├── 2-2-认识Jupyter-Notebook.py │ ├── 2-3-Python编程基础.ipynb │ ├── 2-3-Python编程基础.py │ ├── 2-4-认识numpy与pandas.ipynb │ ├── 2-4-认识numpy与pandas.py │ ├── 3-1-使用Python进行描述统计单变量.ipynb │ ├── 3-1-使用Python进行描述统计单变量.py │ ├── 3-1-使用Python进行描述统计:单变量.ipynb │ ├── 3-1-使用Python进行描述统计:单变量.py │ ├── 3-10-列联表检验.ipynb │ ├── 3-10-列联表检验.py │ ├── 3-2-使用Python进行描述统计多变量.ipynb │ ├── 3-2-使用Python进行描述统计多变量.py │ ├── 3-2-使用Python进行描述统计:多变量.ipynb │ ├── 3-2-使用Python进行描述统计:多变量.py │ ├── 3-3-基于matplotlib-seaborn的数据可视化.ipynb │ ├── 3-3-基于matplotlib-seaborn的数据可视化.py │ ├── 3-3-基于matplotlib-seaborn的数据可视化_11_2.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_13_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_14_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_19_2.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_21_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_24_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_26_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_29_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_33_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_5_1.png │ ├── 3-3-基于matplotlib-seaborn的数据可视化_8_1.png │ ├── 3-4-用Python模拟抽样.ipynb │ ├── 3-4-用Python模拟抽样.py │ ├── 3-4-用Python模拟抽样_20_2.png │ ├── 3-4-用Python模拟抽样_24_1.png │ ├── 3-4-用Python模拟抽样_25_2.png │ ├── 3-5-样本统计量的性质.ipynb │ ├── 3-5-样本统计量的性质.py │ ├── 3-5-样本统计量的性质_11_2.png │ ├── 3-5-样本统计量的性质_16_1.png │ ├── 3-5-样本统计量的性质_22_1.png │ ├── 3-5-样本统计量的性质_27_1.png │ ├── 3-5-样本统计量的性质_30_1.png │ ├── 3-5-样本统计量的性质_41_1.png │ ├── 3-5-样本统计量的性质_43_3.png │ ├── 3-6-正态分布及其应用.ipynb │ ├── 3-6-正态分布及其应用.py │ ├── 3-6-正态分布及其应用_23_2.png │ ├── 3-6-正态分布及其应用_25_1.png │ ├── 3-6-正态分布及其应用_26_2.png │ ├── 3-6-正态分布及其应用_9_1.png │ ├── 3-7-参数估计.ipynb │ ├── 3-7-参数估计.py │ ├── 3-8-假设检验.ipynb │ ├── 3-8-假设检验.py │ ├── 3-9-均值差的检验.ipynb │ ├── 3-9-均值差的检验.py │ ├── 5-1-一元回归.ipynb │ ├── 5-1-一元回归.py │ ├── 5-1-一元回归_18_1.png │ ├── 5-1-一元回归_40_2.png │ ├── 5-1-一元回归_41_2.png │ ├── 5-1-一元回归_43_0.png │ ├── 5-1-一元回归_48_1.png │ ├── 5-1-一元回归_5_1.png │ ├── 5-2-方差分析.ipynb │ ├── 5-2-方差分析.py │ ├── 5-2-方差分析_5_1.png │ ├── 5-3-含有多个解释变量的模型.ipynb │ ├── 5-3-含有多个解释变量的模型.py │ ├── 5-3-含有多个解释变量的模型_12_1.png │ ├── 5-3-含有多个解释变量的模型_5_1.png │ ├── 5-3-含有多个解释变量的模型_9_1.png │ ├── 6-1-各种概率分布.ipynb │ ├── 6-1-各种概率分布.py │ ├── 6-1-各种概率分布_10_2.png │ ├── 6-1-各种概率分布_11_1.png │ ├── 6-1-各种概率分布_6_2.png │ ├── 6-3-logistic回归.ipynb │ ├── 6-3-logistic回归.py │ ├── 6-3-logistic回归_16_1.png │ ├── 6-3-logistic回归_5_1.png │ ├── 6-4-广义线性模型的评估.ipynb │ ├── 6-4-广义线性模型的评估.py │ ├── 6-5-泊松回归.ipynb │ ├── 6-5-泊松回归.py │ ├── 6-5-泊松回归_10_1.png │ ├── 7-3-Python中的Ridge回归与Lasso回归.ipynb │ ├── 7-3-Python中的Ridge回归与Lasso回归.py │ ├── 7-3-Python中的Ridge回归与Lasso回归_12_1.png │ ├── 7-3-Python中的Ridge回归与Lasso回归_22_2.png │ ├── 7-3-Python中的Ridge回归与Lasso回归_23_2.png │ ├── 7-3-Python中的Ridge回归与Lasso回归_30_1.png │ ├── 7-4-线性模型与神经网络.ipynb │ └── 7-4-线性模型与神经网络.py ├── _config.yml ├── _toc.yml └── deploy.sh /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/.DS_Store -------------------------------------------------------------------------------- /.ipynb_checkpoints/2-2-认识Jupyter-Notebook-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n", 8 | "\n", 9 | "## 第 2 节 认识 Jupyter Notebook" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 3. 执行代码" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "1" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### 5. Markdown 的用法" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "source": [ 43 | "```\n", 44 | "# 一级标题\n", 45 | "## 二级标题\n", 46 | "### 三级标题\n", 47 | "#### 四级标题\n", 48 | "\n", 49 | "-----------------\n", 50 | "- 列表条目\n", 51 | "- 列表条目\n", 52 | "\n", 53 | "-----------------\n", 54 | "\n", 55 | "1. 序号条目\n", 56 | "2. 序号条目\n", 57 | "```" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.7.6" 87 | }, 88 | "toc": { 89 | "base_numbering": 1, 90 | "nav_menu": {}, 91 | "number_sections": false, 92 | "sideBar": true, 93 | "skip_h1_title": false, 94 | "title_cell": "Table of Contents", 95 | "title_sidebar": "Contents", 96 | "toc_cell": false, 97 | "toc_position": {}, 98 | "toc_section_display": true, 99 | "toc_window_display": true 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /2-2-认识Jupyter-Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n", 8 | "\n", 9 | "## 第 2 节 认识 Jupyter Notebook" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 3. 执行代码" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "1" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### 5. Markdown 的用法" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "source": [ 43 | "```\n", 44 | "# 一级标题\n", 45 | "## 二级标题\n", 46 | "### 三级标题\n", 47 | "#### 四级标题\n", 48 | "\n", 49 | "-----------------\n", 50 | "- 列表条目\n", 51 | "- 列表条目\n", 52 | "\n", 53 | "-----------------\n", 54 | "\n", 55 | "1. 序号条目\n", 56 | "2. 序号条目\n", 57 | "```" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.7.6" 87 | }, 88 | "toc": { 89 | "base_numbering": 1, 90 | "nav_menu": {}, 91 | "number_sections": false, 92 | "sideBar": true, 93 | "skip_h1_title": false, 94 | "title_cell": "Table of Contents", 95 | "title_sidebar": "Contents", 96 | "toc_cell": false, 97 | "toc_position": {}, 98 | "toc_section_display": true, 99 | "toc_window_display": true 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /2-4-1-sample_data.csv: -------------------------------------------------------------------------------- 1 | col1,col2 2 | 1, A 3 | 2, A 4 | 3, B 5 | 4, B 6 | 5, C 7 | 6, C -------------------------------------------------------------------------------- /3-10-1-click_data.csv: -------------------------------------------------------------------------------- 1 | color,click,freq 2 | blue,click,20 3 | blue,not,230 4 | red,click,10 5 | red,not,40 6 | 7 | -------------------------------------------------------------------------------- /3-10-列联表检验.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 10 节 列联表检验\n", 8 | "\n", 9 | "## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 5. 实现:计算 p 值" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# 用于数值计算的库\n", 28 | "import numpy as np\n", 29 | "import pandas as pd\n", 30 | "import scipy as sp\n", 31 | "from scipy import stats\n", 32 | "\n", 33 | "# 用于绘图的库\n", 34 | "from matplotlib import pyplot as plt\n", 35 | "import seaborn as sns\n", 36 | "sns.set()\n", 37 | "\n", 38 | "# 设置浮点数打印精度\n", 39 | "%precision 3\n", 40 | "# 在 Jupyter Notebook 里显示图形\n", 41 | "%matplotlib inline" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "0.010" 53 | ] 54 | }, 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "# 计算 p 值\n", 62 | "1 - sp.stats.chi2.cdf(x = 6.667, df = 1)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### 6. 实现:列联表检验" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "scrolled": true 77 | }, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | " color click freq\n", 84 | "0 blue click 20\n", 85 | "1 blue not 230\n", 86 | "2 red click 10\n", 87 | "3 red not 40\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "# 读入数据\n", 93 | "click_data = pd.read_csv(\"3-10-1-click_data.csv\")\n", 94 | "print(click_data)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "click click not\n", 107 | "color \n", 108 | "blue 20 230\n", 109 | "red 10 40\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "# 转换为列联表\n", 115 | "cross = pd.pivot_table(\n", 116 | " data = click_data,\n", 117 | " values = \"freq\",\n", 118 | " aggfunc = \"sum\",\n", 119 | " index = \"color\",\n", 120 | " columns = \"click\"\n", 121 | ")\n", 122 | "print(cross)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "(6.667, 0.010, 1, array([[ 25., 225.],\n", 134 | " [ 5., 45.]]))" 135 | ] 136 | }, 137 | "execution_count": 5, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "# 进行检验\n", 144 | "sp.stats.chi2_contingency(cross, correction = False)" 145 | ] 146 | } 147 | ], 148 | "metadata": { 149 | "kernelspec": { 150 | "display_name": "Python 3", 151 | "language": "python", 152 | "name": "python3" 153 | }, 154 | "language_info": { 155 | "codemirror_mode": { 156 | "name": "ipython", 157 | "version": 3 158 | }, 159 | "file_extension": ".py", 160 | "mimetype": "text/x-python", 161 | "name": "python", 162 | "nbconvert_exporter": "python", 163 | "pygments_lexer": "ipython3", 164 | "version": "3.7.6" 165 | }, 166 | "toc": { 167 | "base_numbering": 1, 168 | "nav_menu": {}, 169 | "number_sections": false, 170 | "sideBar": true, 171 | "skip_h1_title": false, 172 | "title_cell": "Table of Contents", 173 | "title_sidebar": "Contents", 174 | "toc_cell": false, 175 | "toc_position": {}, 176 | "toc_section_display": true, 177 | "toc_window_display": true 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } 183 | -------------------------------------------------------------------------------- /3-2-1-fish_multi.csv: -------------------------------------------------------------------------------- 1 | species,length 2 | A,2 3 | A,3 4 | A,4 5 | B,6 6 | B,8 7 | B,10 8 | -------------------------------------------------------------------------------- /3-2-2-shoes.csv: -------------------------------------------------------------------------------- 1 | store,color,sales 2 | tokyo,blue,10 3 | tokyo,red,15 4 | osaka,blue,13 5 | osaka,red,9 6 | -------------------------------------------------------------------------------- /3-2-3-cov.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 18.5,34 3 | 18.7,39 4 | 19.1,41 5 | 19.7,38 6 | 21.5,45 7 | 21.7,41 8 | 21.8,52 9 | 22,44 10 | 23.4,44 11 | 23.8,49 12 | -------------------------------------------------------------------------------- /3-3-2-fish_multi_2.csv: -------------------------------------------------------------------------------- 1 | species,length 2 | A,2 3 | A,3 4 | A,3 5 | A,4 6 | A,4 7 | A,4 8 | A,4 9 | A,5 10 | A,5 11 | A,6 12 | B,5 13 | B,6 14 | B,6 15 | B,7 16 | B,7 17 | B,7 18 | B,7 19 | B,8 20 | B,8 21 | B,9 22 | -------------------------------------------------------------------------------- /3-7-1-fish_length.csv: -------------------------------------------------------------------------------- 1 | length 2 | 4.352981989508033500e+00 3 | 3.735303878484729889e+00 4 | 5.944616949606223777e+00 5 | 3.798326296317538375e+00 6 | 4.087687873262546567e+00 7 | 5.265984893649251042e+00 8 | 3.272614076115006654e+00 9 | 3.526690673655769270e+00 10 | 4.150082580669628207e+00 11 | 3.736104033776512789e+00 12 | -------------------------------------------------------------------------------- /3-8-1-junk-food-weight.csv: -------------------------------------------------------------------------------- 1 | weight 2 | 5.852981989508032967e+01 3 | 5.235303878484729978e+01 4 | 7.444616949606223955e+01 5 | 5.298326296317538464e+01 6 | 5.587687873262546390e+01 7 | 6.765984893649250864e+01 8 | 4.772614076115006299e+01 9 | 5.026690673655769359e+01 10 | 5.650082580669628385e+01 11 | 5.236104033776512523e+01 12 | 4.545788310062555126e+01 13 | 5.336098791529930452e+01 14 | 5.212936842399005855e+01 15 | 5.982777282087596404e+01 16 | 4.168169176422644284e+01 17 | 4.939856769848039164e+01 18 | 6.421112807589736349e+01 19 | 6.985864805785050180e+01 20 | 4.291056353849307214e+01 21 | 6.015878008714222602e+01 22 | -------------------------------------------------------------------------------- /3-9-1-paired-t-test.csv: -------------------------------------------------------------------------------- 1 | person,medicine,body_temperature 2 | A,before,36.2 3 | B,before,36.2 4 | C,before,35.3 5 | D,before,36.1 6 | E,before,36.1 7 | A,after,36.8 8 | B,after,36.1 9 | C,after,36.8 10 | D,after,37.1 11 | E,after,36.9 12 | -------------------------------------------------------------------------------- /5-1-1-beer.csv: -------------------------------------------------------------------------------- 1 | beer,temperature 2 | 45.3,20.5 3 | 59.3,25.0 4 | 40.4,10.0 5 | 38.0,26.9 6 | 37.0,15.8 7 | 40.900000000000006,4.2 8 | 60.2,13.5 9 | 63.3,26.0 10 | 51.099999999999994,23.3 11 | 44.9,8.5 12 | 47.0,26.2 13 | 53.2,19.1 14 | 43.5,24.3 15 | 53.199999999999996,23.3 16 | 37.4,8.4 17 | 59.9,23.5 18 | 41.5,13.9 19 | 75.1,35.5 20 | 55.6,27.2 21 | 57.2,20.5 22 | 46.5,10.2 23 | 35.8,20.5 24 | 51.9,21.6 25 | 38.199999999999996,7.9 26 | 66.0,42.2 27 | 55.3,23.9 28 | 55.300000000000004,36.9 29 | 43.3,8.9 30 | 70.5,36.4 31 | 38.8,6.4 32 | -------------------------------------------------------------------------------- /5-3-1-lm-model.csv: -------------------------------------------------------------------------------- 1 | humidity,price,sales,temperature,weather 2 | 29.5,290,229.7,17.8,rainy 3 | 38.1,290,206.1,26.1,rainy 4 | 31.5,290,202.5,22.0,rainy 5 | 39.7,290,195.5,23.0,rainy 6 | 24.7,290,214.4,14.5,rainy 7 | 27.2,290,174.4,20.8,rainy 8 | 29.3,290,238.6,23.7,rainy 9 | 26.4,290,190.8,15.2,rainy 10 | 27.9,290,211.2,18.7,rainy 11 | 38.0,290,204.5,20.2,rainy 12 | 26.9,290,221.7,13.1,rainy 13 | 28.9,290,179.0,21.6,rainy 14 | 37.9,290,208.2,24.2,rainy 15 | 27.7,290,197.1,15.7,rainy 16 | 29.4,290,227.2,21.8,rainy 17 | 30.7,290,183.6,13.4,rainy 18 | 33.8,290,205.2,19.8,rainy 19 | 25.8,290,185.1,11.9,rainy 20 | 39.6,290,215.4,25.6,rainy 21 | 33.6,290,219.1,22.0,rainy 22 | 36.8,290,191.4,19.9,rainy 23 | 31.2,290,220.5,16.1,rainy 24 | 40.5,290,229.2,26.4,rainy 25 | 38.2,290,227.3,29.8,rainy 26 | 22.8,290,210.4,10.7,rainy 27 | 43.1,290,205.2,26.2,rainy 28 | 35.2,290,215.9,28.1,rainy 29 | 32.4,290,196.0,21.7,rainy 30 | 26.3,290,196.8,14.0,rainy 31 | 34.0,290,221.1,24.3,rainy 32 | 32.2,295,194.5,19.1,rainy 33 | 30.8,295,196.4,17.0,rainy 34 | 30.3,295,188.3,13.8,rainy 35 | 39.1,295,196.7,22.8,rainy 36 | 34.0,295,200.7,24.0,rainy 37 | 26.4,295,202.1,16.9,rainy 38 | 30.0,295,192.4,22.6,rainy 39 | 26.6,295,202.7,14.3,rainy 40 | 29.4,295,235.7,24.0,rainy 41 | 34.6,295,217.4,20.2,rainy 42 | 30.0,310,196.0,19.1,rainy 43 | 30.8,310,186.1,19.5,rainy 44 | 34.6,310,194.0,24.3,rainy 45 | 30.1,310,229.9,23.8,rainy 46 | 31.0,310,203.9,22.6,rainy 47 | 34.4,315,189.3,20.7,rainy 48 | 34.1,315,201.3,20.4,rainy 49 | 32.5,315,233.2,23.1,rainy 50 | 38.2,315,201.2,21.2,rainy 51 | 30.1,315,211.3,23.4,rainy 52 | 28.0,290,229.6,18.4,sunny 53 | 22.0,290,207.1,7.8,sunny 54 | 24.7,290,216.1,25.2,sunny 55 | 42.4,290,234.8,30.9,sunny 56 | 32.4,290,226.5,22.2,sunny 57 | 26.6,295,233.2,19.5,sunny 58 | 35.7,295,236.7,19.3,sunny 59 | 31.4,295,238.2,19.4,sunny 60 | 31.3,295,229.4,20.1,sunny 61 | 24.8,295,231.6,14.4,sunny 62 | 28.8,310,204.6,17.4,sunny 63 | 22.4,310,231.8,15.0,sunny 64 | 33.1,310,228.1,21.2,sunny 65 | 28.0,310,212.7,18.5,sunny 66 | 31.9,310,229.3,22.5,sunny 67 | 33.1,310,216.8,19.1,sunny 68 | 33.8,310,241.9,24.9,sunny 69 | 31.4,310,243.1,21.1,sunny 70 | 37.4,310,265.0,31.0,sunny 71 | 22.1,310,186.7,10.5,sunny 72 | 30.6,315,191.5,16.8,sunny 73 | 37.3,315,214.8,24.5,sunny 74 | 39.8,315,234.5,32.6,sunny 75 | 31.9,315,228.7,18.8,sunny 76 | 27.5,315,222.0,20.2,sunny 77 | 26.7,315,185.3,18.9,sunny 78 | 29.7,315,220.4,26.7,sunny 79 | 32.9,315,227.7,18.6,sunny 80 | 31.3,315,224.5,23.4,sunny 81 | 33.2,315,226.5,18.4,sunny 82 | 23.8,315,206.0,13.6,sunny 83 | 29.6,315,215.9,21.6,sunny 84 | 31.8,315,222.8,22.5,sunny 85 | 36.7,315,231.0,26.5,sunny 86 | 29.8,315,219.3,19.4,sunny 87 | 28.8,315,215.1,16.9,sunny 88 | 31.7,315,210.3,22.8,sunny 89 | 31.3,315,224.1,21.2,sunny 90 | 27.5,315,220.5,21.4,sunny 91 | 30.0,315,233.5,19.6,sunny 92 | 32.9,315,241.9,25.8,sunny 93 | 30.9,315,221.9,21.8,sunny 94 | 37.2,315,222.8,29.5,sunny 95 | 31.3,315,214.1,25.6,sunny 96 | 31.7,315,227.3,23.3,sunny 97 | 24.2,315,208.4,11.9,sunny 98 | 33.1,315,215.4,23.0,sunny 99 | 33.6,315,220.2,22.1,sunny 100 | 29.6,315,212.6,24.1,sunny 101 | 34.9,315,233.7,25.2,sunny 102 | -------------------------------------------------------------------------------- /6-3-1-logistic-regression.csv: -------------------------------------------------------------------------------- 1 | hours,result 2 | 0,0 3 | 0,0 4 | 0,0 5 | 0,0 6 | 0,0 7 | 0,0 8 | 0,0 9 | 0,0 10 | 0,0 11 | 0,0 12 | 1,0 13 | 1,0 14 | 1,0 15 | 1,0 16 | 1,0 17 | 1,0 18 | 1,0 19 | 1,0 20 | 1,0 21 | 1,0 22 | 2,0 23 | 2,1 24 | 2,0 25 | 2,0 26 | 2,0 27 | 2,0 28 | 2,0 29 | 2,0 30 | 2,0 31 | 2,0 32 | 3,0 33 | 3,0 34 | 3,1 35 | 3,0 36 | 3,0 37 | 3,0 38 | 3,0 39 | 3,0 40 | 3,0 41 | 3,0 42 | 4,1 43 | 4,1 44 | 4,0 45 | 4,1 46 | 4,0 47 | 4,0 48 | 4,1 49 | 4,0 50 | 4,0 51 | 4,0 52 | 5,0 53 | 5,1 54 | 5,0 55 | 5,0 56 | 5,0 57 | 5,0 58 | 5,1 59 | 5,0 60 | 5,1 61 | 5,1 62 | 6,1 63 | 6,1 64 | 6,1 65 | 6,1 66 | 6,1 67 | 6,1 68 | 6,1 69 | 6,1 70 | 6,0 71 | 6,1 72 | 7,0 73 | 7,1 74 | 7,1 75 | 7,1 76 | 7,1 77 | 7,1 78 | 7,0 79 | 7,1 80 | 7,1 81 | 7,1 82 | 8,1 83 | 8,1 84 | 8,1 85 | 8,1 86 | 8,1 87 | 8,1 88 | 8,1 89 | 8,0 90 | 8,1 91 | 8,1 92 | 9,1 93 | 9,1 94 | 9,1 95 | 9,1 96 | 9,1 97 | 9,1 98 | 9,1 99 | 9,1 100 | 9,1 101 | 9,1 102 | -------------------------------------------------------------------------------- /6-5-1-poisson-regression.csv: -------------------------------------------------------------------------------- 1 | beer_number,temperature 2 | 6,17.5 3 | 11,26.6 4 | 2,5.0 5 | 4,14.1 6 | 2,9.4 7 | 2,7.8 8 | 3,10.6 9 | 5,15.4 10 | 6,16.9 11 | 7,21.2 12 | 6,17.6 13 | 11,25.6 14 | 4,11.1 15 | 16,31.3 16 | 4,5.8 17 | 13,25.1 18 | 5,17.5 19 | 7,21.8 20 | 3,9.2 21 | 5,10.9 22 | 14,29.0 23 | 22,34.0 24 | 7,14.4 25 | 11,25.8 26 | 18,31.3 27 | 17,31.8 28 | 2,7.6 29 | 2,6.2 30 | 4,10.1 31 | 16,31.3 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Stats Book 2 | 3 | ![image](https://user-images.githubusercontent.com/543384/142135901-5573f02c-629a-4fc1-af35-1f127bd408be.png) 4 | -------------------------------------------------------------------------------- /_build/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.DS_Store -------------------------------------------------------------------------------- /_build/.doctrees/2-2-认识Jupyter-Notebook.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/2-2-认识Jupyter-Notebook.doctree -------------------------------------------------------------------------------- /_build/.doctrees/2-3-Python编程基础.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/2-3-Python编程基础.doctree -------------------------------------------------------------------------------- /_build/.doctrees/2-4-认识numpy与pandas.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/2-4-认识numpy与pandas.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-1-使用Python进行描述统计单变量.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-1-使用Python进行描述统计单变量.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-1-使用Python进行描述统计:单变量.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-1-使用Python进行描述统计:单变量.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-10-列联表检验.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-10-列联表检验.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-2-使用Python进行描述统计多变量.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-2-使用Python进行描述统计多变量.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-2-使用Python进行描述统计:多变量.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-2-使用Python进行描述统计:多变量.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-3-基于matplotlib-seaborn的数据可视化.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-3-基于matplotlib-seaborn的数据可视化.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-4-用Python模拟抽样.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-4-用Python模拟抽样.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-5-样本统计量的性质.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-5-样本统计量的性质.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-6-正态分布及其应用.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-6-正态分布及其应用.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-7-参数估计.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-7-参数估计.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-8-假设检验.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-8-假设检验.doctree -------------------------------------------------------------------------------- /_build/.doctrees/3-9-均值差的检验.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-9-均值差的检验.doctree -------------------------------------------------------------------------------- /_build/.doctrees/5-1-一元回归.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/5-1-一元回归.doctree -------------------------------------------------------------------------------- /_build/.doctrees/5-2-方差分析.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/5-2-方差分析.doctree -------------------------------------------------------------------------------- /_build/.doctrees/5-3-含有多个解释变量的模型.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/5-3-含有多个解释变量的模型.doctree -------------------------------------------------------------------------------- /_build/.doctrees/6-1-各种概率分布.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-1-各种概率分布.doctree -------------------------------------------------------------------------------- /_build/.doctrees/6-3-logistic回归.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-3-logistic回归.doctree -------------------------------------------------------------------------------- /_build/.doctrees/6-4-广义线性模型的评估.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-4-广义线性模型的评估.doctree -------------------------------------------------------------------------------- /_build/.doctrees/6-5-泊松回归.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-5-泊松回归.doctree -------------------------------------------------------------------------------- /_build/.doctrees/7-3-Python中的Ridge回归与Lasso回归.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/7-3-Python中的Ridge回归与Lasso回归.doctree -------------------------------------------------------------------------------- /_build/.doctrees/7-4-线性模型与神经网络.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/7-4-线性模型与神经网络.doctree -------------------------------------------------------------------------------- /_build/.doctrees/README.doctree: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/README.doctree -------------------------------------------------------------------------------- /_build/.doctrees/environment.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/environment.pickle -------------------------------------------------------------------------------- /_build/.doctrees/glue_cache.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /_build/html/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/.DS_Store -------------------------------------------------------------------------------- /_build/html/.buildinfo: -------------------------------------------------------------------------------- 1 | # Sphinx build info version 1 2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. 3 | config: 9d17def4c2be3ed0b33cbdfec7fbe0e1 4 | tags: 645f666f9bcd5a90fca523b33c5a78b7 5 | -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_11_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_11_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_13_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_13_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_14_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_14_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_19_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_19_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_21_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_21_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_24_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_24_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_26_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_26_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_29_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_29_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_33_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_33_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_5_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_8_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_8_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-4-用Python模拟抽样_20_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-4-用Python模拟抽样_20_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-4-用Python模拟抽样_24_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-4-用Python模拟抽样_24_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-4-用Python模拟抽样_25_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-4-用Python模拟抽样_25_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_11_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_11_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_16_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_22_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_22_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_27_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_27_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_30_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_30_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_41_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_41_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-5-样本统计量的性质_43_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_43_3.png -------------------------------------------------------------------------------- /_build/html/_images/3-6-正态分布及其应用_23_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_23_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-6-正态分布及其应用_25_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_25_1.png -------------------------------------------------------------------------------- /_build/html/_images/3-6-正态分布及其应用_26_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_26_2.png -------------------------------------------------------------------------------- /_build/html/_images/3-6-正态分布及其应用_9_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_9_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-1-一元回归_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_18_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-1-一元回归_40_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_40_2.png -------------------------------------------------------------------------------- /_build/html/_images/5-1-一元回归_41_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_41_2.png -------------------------------------------------------------------------------- /_build/html/_images/5-1-一元回归_43_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_43_0.png -------------------------------------------------------------------------------- /_build/html/_images/5-1-一元回归_48_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_48_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-1-一元回归_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_5_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-2-方差分析_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-2-方差分析_5_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-3-含有多个解释变量的模型_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-3-含有多个解释变量的模型_12_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-3-含有多个解释变量的模型_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-3-含有多个解释变量的模型_5_1.png -------------------------------------------------------------------------------- /_build/html/_images/5-3-含有多个解释变量的模型_9_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-3-含有多个解释变量的模型_9_1.png -------------------------------------------------------------------------------- /_build/html/_images/6-1-各种概率分布_10_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-1-各种概率分布_10_2.png -------------------------------------------------------------------------------- /_build/html/_images/6-1-各种概率分布_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-1-各种概率分布_11_1.png -------------------------------------------------------------------------------- /_build/html/_images/6-1-各种概率分布_6_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-1-各种概率分布_6_2.png -------------------------------------------------------------------------------- /_build/html/_images/6-3-logistic回归_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-3-logistic回归_16_1.png -------------------------------------------------------------------------------- /_build/html/_images/6-3-logistic回归_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-3-logistic回归_5_1.png -------------------------------------------------------------------------------- /_build/html/_images/6-5-泊松回归_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-5-泊松回归_10_1.png -------------------------------------------------------------------------------- /_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_12_1.png -------------------------------------------------------------------------------- /_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_22_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_22_2.png -------------------------------------------------------------------------------- /_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_23_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_23_2.png -------------------------------------------------------------------------------- /_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_30_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_30_1.png -------------------------------------------------------------------------------- /_build/html/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css: -------------------------------------------------------------------------------- 1 | details.dropdown .summary-title{padding-right:3em !important;-moz-user-select:none;-ms-user-select:none;-webkit-user-select:none;user-select:none}details.dropdown:hover{cursor:pointer}details.dropdown .summary-content{cursor:default}details.dropdown summary{list-style:none;padding:1em}details.dropdown summary .octicon.no-title{vertical-align:middle}details.dropdown[open] summary .octicon.no-title{visibility:hidden}details.dropdown summary::-webkit-details-marker{display:none}details.dropdown summary:focus{outline:none}details.dropdown summary:hover .summary-up svg,details.dropdown summary:hover .summary-down svg{opacity:1}details.dropdown .summary-up svg,details.dropdown .summary-down svg{display:block;opacity:.6}details.dropdown .summary-up,details.dropdown .summary-down{pointer-events:none;position:absolute;right:1em;top:.75em}details.dropdown[open] .summary-down{visibility:hidden}details.dropdown:not([open]) .summary-up{visibility:hidden}details.dropdown.fade-in[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out;animation:panels-fade-in .5s ease-in-out}details.dropdown.fade-in-slide-down[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out}@keyframes panels-fade-in{0%{opacity:0}100%{opacity:1}}@keyframes panels-slide-down{0%{transform:translate(0, -10px)}100%{transform:translate(0, 0)}}.octicon{display:inline-block;fill:currentColor;vertical-align:text-top}.tabbed-content{box-shadow:0 -.0625rem var(--tabs-color-overline),0 .0625rem var(--tabs-color-underline);display:none;order:99;padding-bottom:.75rem;padding-top:.75rem;width:100%}.tabbed-content>:first-child{margin-top:0 !important}.tabbed-content>:last-child{margin-bottom:0 !important}.tabbed-content>.tabbed-set{margin:0}.tabbed-set{border-radius:.125rem;display:flex;flex-wrap:wrap;margin:1em 0;position:relative}.tabbed-set>input{opacity:0;position:absolute}.tabbed-set>input:checked+label{border-color:var(--tabs-color-label-active);color:var(--tabs-color-label-active)}.tabbed-set>input:checked+label+.tabbed-content{display:block}.tabbed-set>input:focus+label{outline-style:auto}.tabbed-set>input:not(.focus-visible)+label{outline:none;-webkit-tap-highlight-color:transparent}.tabbed-set>label{border-bottom:.125rem solid transparent;color:var(--tabs-color-label-inactive);cursor:pointer;font-size:var(--tabs-size-label);font-weight:700;padding:1em 1.25em .5em;transition:color 250ms;width:auto;z-index:1}html .tabbed-set>label:hover{color:var(--tabs-color-label-active)} 2 | -------------------------------------------------------------------------------- /_build/html/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --tabs-color-label-active: hsla(231, 99%, 66%, 1); 3 | --tabs-color-label-inactive: rgba(178, 206, 245, 0.62); 4 | --tabs-color-overline: rgb(207, 236, 238); 5 | --tabs-color-underline: rgb(207, 236, 238); 6 | --tabs-size-label: 1rem; 7 | } -------------------------------------------------------------------------------- /_build/html/_sources/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_sources/.DS_Store -------------------------------------------------------------------------------- /_build/html/_sources/2-2-认识Jupyter-Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n", 8 | "\n", 9 | "## 第 2 节 认识 Jupyter Notebook" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 3. 执行代码" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "1" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### 5. Markdown 的用法" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "source": [ 43 | "```\n", 44 | "# 一级标题\n", 45 | "## 二级标题\n", 46 | "### 三级标题\n", 47 | "#### 四级标题\n", 48 | "\n", 49 | "-----------------\n", 50 | "- 列表条目\n", 51 | "- 列表条目\n", 52 | "\n", 53 | "-----------------\n", 54 | "\n", 55 | "1. 序号条目\n", 56 | "2. 序号条目\n", 57 | "```" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [] 68 | } 69 | ], 70 | "metadata": { 71 | "kernelspec": { 72 | "display_name": "Python 3", 73 | "language": "python", 74 | "name": "python3" 75 | }, 76 | "language_info": { 77 | "codemirror_mode": { 78 | "name": "ipython", 79 | "version": 3 80 | }, 81 | "file_extension": ".py", 82 | "mimetype": "text/x-python", 83 | "name": "python", 84 | "nbconvert_exporter": "python", 85 | "pygments_lexer": "ipython3", 86 | "version": "3.7.6" 87 | }, 88 | "toc": { 89 | "base_numbering": 1, 90 | "nav_menu": {}, 91 | "number_sections": false, 92 | "sideBar": true, 93 | "skip_h1_title": false, 94 | "title_cell": "Table of Contents", 95 | "title_sidebar": "Contents", 96 | "toc_cell": false, 97 | "toc_position": {}, 98 | "toc_section_display": true, 99 | "toc_window_display": true 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 2 104 | } 105 | -------------------------------------------------------------------------------- /_build/html/_sources/3-10-列联表检验.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 10 节 列联表检验\n", 8 | "\n", 9 | "## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 5. 实现:计算 p 值" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# 用于数值计算的库\n", 28 | "import numpy as np\n", 29 | "import pandas as pd\n", 30 | "import scipy as sp\n", 31 | "from scipy import stats\n", 32 | "\n", 33 | "# 用于绘图的库\n", 34 | "from matplotlib import pyplot as plt\n", 35 | "import seaborn as sns\n", 36 | "sns.set()\n", 37 | "\n", 38 | "# 设置浮点数打印精度\n", 39 | "%precision 3\n", 40 | "# 在 Jupyter Notebook 里显示图形\n", 41 | "%matplotlib inline" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "0.010" 53 | ] 54 | }, 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "# 计算 p 值\n", 62 | "1 - sp.stats.chi2.cdf(x = 6.667, df = 1)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### 6. 实现:列联表检验" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "scrolled": true 77 | }, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | " color click freq\n", 84 | "0 blue click 20\n", 85 | "1 blue not 230\n", 86 | "2 red click 10\n", 87 | "3 red not 40\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "# 读入数据\n", 93 | "click_data = pd.read_csv(\"3-10-1-click_data.csv\")\n", 94 | "print(click_data)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "click click not\n", 107 | "color \n", 108 | "blue 20 230\n", 109 | "red 10 40\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "# 转换为列联表\n", 115 | "cross = pd.pivot_table(\n", 116 | " data = click_data,\n", 117 | " values = \"freq\",\n", 118 | " aggfunc = \"sum\",\n", 119 | " index = \"color\",\n", 120 | " columns = \"click\"\n", 121 | ")\n", 122 | "print(cross)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "(6.667, 0.010, 1, array([[ 25., 225.],\n", 134 | " [ 5., 45.]]))" 135 | ] 136 | }, 137 | "execution_count": 5, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "# 进行检验\n", 144 | "sp.stats.chi2_contingency(cross, correction = False)" 145 | ] 146 | } 147 | ], 148 | "metadata": { 149 | "kernelspec": { 150 | "display_name": "Python 3", 151 | "language": "python", 152 | "name": "python3" 153 | }, 154 | "language_info": { 155 | "codemirror_mode": { 156 | "name": "ipython", 157 | "version": 3 158 | }, 159 | "file_extension": ".py", 160 | "mimetype": "text/x-python", 161 | "name": "python", 162 | "nbconvert_exporter": "python", 163 | "pygments_lexer": "ipython3", 164 | "version": "3.7.6" 165 | }, 166 | "toc": { 167 | "base_numbering": 1, 168 | "nav_menu": {}, 169 | "number_sections": false, 170 | "sideBar": true, 171 | "skip_h1_title": false, 172 | "title_cell": "Table of Contents", 173 | "title_sidebar": "Contents", 174 | "toc_cell": false, 175 | "toc_position": {}, 176 | "toc_section_display": true, 177 | "toc_window_display": true 178 | } 179 | }, 180 | "nbformat": 4, 181 | "nbformat_minor": 2 182 | } 183 | -------------------------------------------------------------------------------- /_build/html/_sources/README.md: -------------------------------------------------------------------------------- 1 | # Python Stats Book 2 | 3 | ![image](https://user-images.githubusercontent.com/543384/142135901-5573f02c-629a-4fc1-af35-1f127bd408be.png) 4 | -------------------------------------------------------------------------------- /_build/html/_static/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/__init__.py -------------------------------------------------------------------------------- /_build/html/_static/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /_build/html/_static/copy-button.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /_build/html/_static/copybutton.css: -------------------------------------------------------------------------------- 1 | /* Copy buttons */ 2 | a.copybtn { 3 | position: absolute; 4 | top: .2em; 5 | right: .2em; 6 | width: 1em; 7 | height: 1em; 8 | opacity: .3; 9 | transition: opacity 0.5s; 10 | border: none; 11 | user-select: none; 12 | } 13 | 14 | div.highlight { 15 | position: relative; 16 | } 17 | 18 | a.copybtn > img { 19 | vertical-align: top; 20 | margin: 0; 21 | top: 0; 22 | left: 0; 23 | position: absolute; 24 | } 25 | 26 | .highlight:hover .copybtn { 27 | opacity: 1; 28 | } 29 | 30 | /** 31 | * A minimal CSS-only tooltip copied from: 32 | * https://codepen.io/mildrenben/pen/rVBrpK 33 | * 34 | * To use, write HTML like the following: 35 | * 36 | *

Short

37 | */ 38 | .o-tooltip--left { 39 | position: relative; 40 | } 41 | 42 | .o-tooltip--left:after { 43 | opacity: 0; 44 | visibility: hidden; 45 | position: absolute; 46 | content: attr(data-tooltip); 47 | padding: 2px; 48 | top: 0; 49 | left: -.2em; 50 | background: grey; 51 | font-size: 1rem; 52 | color: white; 53 | white-space: nowrap; 54 | z-index: 2; 55 | border-radius: 2px; 56 | transform: translateX(-102%) translateY(0); 57 | transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); 58 | } 59 | 60 | .o-tooltip--left:hover:after { 61 | display: block; 62 | opacity: 1; 63 | visibility: visible; 64 | transform: translateX(-100%) translateY(0); 65 | transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); 66 | transition-delay: .5s; 67 | } 68 | -------------------------------------------------------------------------------- /_build/html/_static/copybutton.js: -------------------------------------------------------------------------------- 1 | // Localization support 2 | const messages = { 3 | 'en': { 4 | 'copy': 'Copy', 5 | 'copy_to_clipboard': 'Copy to clipboard', 6 | 'copy_success': 'Copied!', 7 | 'copy_failure': 'Failed to copy', 8 | }, 9 | 'es' : { 10 | 'copy': 'Copiar', 11 | 'copy_to_clipboard': 'Copiar al portapapeles', 12 | 'copy_success': '¡Copiado!', 13 | 'copy_failure': 'Error al copiar', 14 | }, 15 | 'de' : { 16 | 'copy': 'Kopieren', 17 | 'copy_to_clipboard': 'In die Zwischenablage kopieren', 18 | 'copy_success': 'Kopiert!', 19 | 'copy_failure': 'Fehler beim Kopieren', 20 | } 21 | } 22 | 23 | let locale = 'en' 24 | if( document.documentElement.lang !== undefined 25 | && messages[document.documentElement.lang] !== undefined ) { 26 | locale = document.documentElement.lang 27 | } 28 | 29 | /** 30 | * Set up copy/paste for code blocks 31 | */ 32 | 33 | const runWhenDOMLoaded = cb => { 34 | if (document.readyState != 'loading') { 35 | cb() 36 | } else if (document.addEventListener) { 37 | document.addEventListener('DOMContentLoaded', cb) 38 | } else { 39 | document.attachEvent('onreadystatechange', function() { 40 | if (document.readyState == 'complete') cb() 41 | }) 42 | } 43 | } 44 | 45 | const codeCellId = index => `codecell${index}` 46 | 47 | // Clears selected text since ClipboardJS will select the text when copying 48 | const clearSelection = () => { 49 | if (window.getSelection) { 50 | window.getSelection().removeAllRanges() 51 | } else if (document.selection) { 52 | document.selection.empty() 53 | } 54 | } 55 | 56 | // Changes tooltip text for two seconds, then changes it back 57 | const temporarilyChangeTooltip = (el, newText) => { 58 | const oldText = el.getAttribute('data-tooltip') 59 | el.setAttribute('data-tooltip', newText) 60 | setTimeout(() => el.setAttribute('data-tooltip', oldText), 2000) 61 | } 62 | 63 | const addCopyButtonToCodeCells = () => { 64 | // If ClipboardJS hasn't loaded, wait a bit and try again. This 65 | // happens because we load ClipboardJS asynchronously. 66 | if (window.ClipboardJS === undefined) { 67 | setTimeout(addCopyButtonToCodeCells, 250) 68 | return 69 | } 70 | 71 | // Add copybuttons to all of our code cells 72 | const codeCells = document.querySelectorAll('div.highlight pre') 73 | codeCells.forEach((codeCell, index) => { 74 | const id = codeCellId(index) 75 | codeCell.setAttribute('id', id) 76 | const pre_bg = getComputedStyle(codeCell).backgroundColor; 77 | 78 | const clipboardButton = id => 79 | ` 80 | ${messages[locale]['copy_to_clipboard']} 81 | ` 82 | codeCell.insertAdjacentHTML('afterend', clipboardButton(id)) 83 | }) 84 | 85 | function escapeRegExp(string) { 86 | return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string 87 | } 88 | 89 | // Callback when a copy button is clicked. Will be passed the node that was clicked 90 | // should then grab the text and replace pieces of text that shouldn't be used in output 91 | function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true) { 92 | 93 | var regexp; 94 | var match; 95 | 96 | // create regexp to capture prompt and remaining line 97 | if (isRegexp) { 98 | regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') 99 | } else { 100 | regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') 101 | } 102 | 103 | const outputLines = []; 104 | var promptFound = false; 105 | for (const line of textContent.split('\n')) { 106 | match = line.match(regexp) 107 | if (match) { 108 | promptFound = true 109 | if (removePrompts) { 110 | outputLines.push(match[2]) 111 | } else { 112 | outputLines.push(line) 113 | } 114 | } else { 115 | if (!onlyCopyPromptLines) { 116 | outputLines.push(line) 117 | } 118 | } 119 | } 120 | 121 | // If no lines with the prompt were found then just use original lines 122 | if (promptFound) { 123 | textContent = outputLines.join('\n'); 124 | } 125 | 126 | // Remove a trailing newline to avoid auto-running when pasting 127 | if (textContent.endsWith("\n")) { 128 | textContent = textContent.slice(0, -1) 129 | } 130 | return textContent 131 | } 132 | 133 | 134 | var copyTargetText = (trigger) => { 135 | var target = document.querySelector(trigger.attributes['data-clipboard-target'].value); 136 | return formatCopyText(target.innerText, '', false, true, true) 137 | } 138 | 139 | // Initialize with a callback so we can modify the text before copy 140 | const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText}) 141 | 142 | // Update UI with error/success messages 143 | clipboard.on('success', event => { 144 | clearSelection() 145 | temporarilyChangeTooltip(event.trigger, messages[locale]['copy_success']) 146 | }) 147 | 148 | clipboard.on('error', event => { 149 | temporarilyChangeTooltip(event.trigger, messages[locale]['copy_failure']) 150 | }) 151 | } 152 | 153 | runWhenDOMLoaded(addCopyButtonToCodeCells) -------------------------------------------------------------------------------- /_build/html/_static/copybutton_funcs.js: -------------------------------------------------------------------------------- 1 | function escapeRegExp(string) { 2 | return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string 3 | } 4 | 5 | // Callback when a copy button is clicked. Will be passed the node that was clicked 6 | // should then grab the text and replace pieces of text that shouldn't be used in output 7 | export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true) { 8 | 9 | var regexp; 10 | var match; 11 | 12 | // create regexp to capture prompt and remaining line 13 | if (isRegexp) { 14 | regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') 15 | } else { 16 | regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') 17 | } 18 | 19 | const outputLines = []; 20 | var promptFound = false; 21 | for (const line of textContent.split('\n')) { 22 | match = line.match(regexp) 23 | if (match) { 24 | promptFound = true 25 | if (removePrompts) { 26 | outputLines.push(match[2]) 27 | } else { 28 | outputLines.push(line) 29 | } 30 | } else { 31 | if (!onlyCopyPromptLines) { 32 | outputLines.push(line) 33 | } 34 | } 35 | } 36 | 37 | // If no lines with the prompt were found then just use original lines 38 | if (promptFound) { 39 | textContent = outputLines.join('\n'); 40 | } 41 | 42 | // Remove a trailing newline to avoid auto-running when pasting 43 | if (textContent.endsWith("\n")) { 44 | textContent = textContent.slice(0, -1) 45 | } 46 | return textContent 47 | } 48 | -------------------------------------------------------------------------------- /_build/html/_static/css/blank.css: -------------------------------------------------------------------------------- 1 | /* This file is intentionally left blank to override the stylesheet of the 2 | parent theme via theme.conf. The parent style we import directly in theme.css */ -------------------------------------------------------------------------------- /_build/html/_static/css/theme.css: -------------------------------------------------------------------------------- 1 | /* Provided by the Sphinx base theme template at build time */ 2 | @import "../basic.css"; 3 | 4 | :root { 5 | /***************************************************************************** 6 | * Theme config 7 | **/ 8 | --pst-header-height: 60px; 9 | 10 | /***************************************************************************** 11 | * Font size 12 | **/ 13 | --pst-font-size-base: 15px; /* base font size - applied at body / html level */ 14 | 15 | /* heading font sizes */ 16 | --pst-font-size-h1: 36px; 17 | --pst-font-size-h2: 32px; 18 | --pst-font-size-h3: 26px; 19 | --pst-font-size-h4: 21px; 20 | --pst-font-size-h5: 18px; 21 | --pst-font-size-h6: 16px; 22 | 23 | /* smaller then heading font sizes*/ 24 | --pst-font-size-milli: 12px; 25 | 26 | --pst-sidebar-font-size: .9em; 27 | --pst-sidebar-caption-font-size: .9em; 28 | 29 | /***************************************************************************** 30 | * Font family 31 | **/ 32 | /* These are adapted from https://systemfontstack.com/ */ 33 | --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, "Helvetica Neue", 34 | Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol; 35 | --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco, 36 | Liberation Mono, Lucida Console, monospace; 37 | 38 | --pst-font-family-base: var(--pst-font-family-base-system); 39 | --pst-font-family-heading: var(--pst-font-family-base); 40 | --pst-font-family-monospace: var(--pst-font-family-monospace-system); 41 | 42 | /***************************************************************************** 43 | * Color 44 | * 45 | * Colors are defined in rgb string way, "red, green, blue" 46 | **/ 47 | --pst-color-primary: 19, 6, 84; 48 | --pst-color-success: 40, 167, 69; 49 | --pst-color-info: 0, 123, 255; /*23, 162, 184;*/ 50 | --pst-color-warning: 255, 193, 7; 51 | --pst-color-danger: 220, 53, 69; 52 | --pst-color-text-base: 51, 51, 51; 53 | 54 | --pst-color-h1: var(--pst-color-primary); 55 | --pst-color-h2: var(--pst-color-primary); 56 | --pst-color-h3: var(--pst-color-text-base); 57 | --pst-color-h4: var(--pst-color-text-base); 58 | --pst-color-h5: var(--pst-color-text-base); 59 | --pst-color-h6: var(--pst-color-text-base); 60 | --pst-color-paragraph: var(--pst-color-text-base); 61 | --pst-color-link: 0, 91, 129; 62 | --pst-color-link-hover: 227, 46, 0; 63 | --pst-color-headerlink: 198, 15, 15; 64 | --pst-color-headerlink-hover: 255, 255, 255; 65 | --pst-color-preformatted-text: 34, 34, 34; 66 | --pst-color-preformatted-background: 250, 250, 250; 67 | --pst-color-inline-code: 232, 62, 140; 68 | 69 | --pst-color-active-navigation: 19, 6, 84; 70 | --pst-color-navbar-link: 77, 77, 77; 71 | --pst-color-navbar-link-hover: var(--pst-color-active-navigation); 72 | --pst-color-navbar-link-active: var(--pst-color-active-navigation); 73 | --pst-color-sidebar-link: 77, 77, 77; 74 | --pst-color-sidebar-link-hover: var(--pst-color-active-navigation); 75 | --pst-color-sidebar-link-active: var(--pst-color-active-navigation); 76 | --pst-color-sidebar-expander-background-hover: 244, 244, 244; 77 | --pst-color-sidebar-caption: 77, 77, 77; 78 | --pst-color-toc-link: 119, 117, 122; 79 | --pst-color-toc-link-hover: var(--pst-color-active-navigation); 80 | --pst-color-toc-link-active: var(--pst-color-active-navigation); 81 | 82 | /***************************************************************************** 83 | * Icon 84 | **/ 85 | 86 | /* font awesome icons*/ 87 | --pst-icon-check-circle: '\f058'; 88 | --pst-icon-info-circle: '\f05a'; 89 | --pst-icon-exclamation-triangle: '\f071'; 90 | --pst-icon-exclamation-circle: '\f06a'; 91 | --pst-icon-times-circle: '\f057'; 92 | --pst-icon-lightbulb: '\f0eb'; 93 | 94 | /***************************************************************************** 95 | * Admonitions 96 | **/ 97 | 98 | --pst-color-admonition-default: var(--pst-color-info); 99 | --pst-color-admonition-note: var(--pst-color-info); 100 | --pst-color-admonition-attention: var(--pst-color-warning); 101 | --pst-color-admonition-caution: var(--pst-color-warning); 102 | --pst-color-admonition-warning: var(--pst-color-warning); 103 | --pst-color-admonition-danger: var(--pst-color-danger); 104 | --pst-color-admonition-error: var(--pst-color-danger); 105 | --pst-color-admonition-hint: var(--pst-color-success); 106 | --pst-color-admonition-tip: var(--pst-color-success); 107 | --pst-color-admonition-important: var(--pst-color-success); 108 | 109 | --pst-icon-admonition-default: var(--pst-icon-info-circle); 110 | --pst-icon-admonition-note: var(--pst-icon-info-circle); 111 | --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle); 112 | --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle); 113 | --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle); 114 | --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle); 115 | --pst-icon-admonition-error: var(--pst-icon-times-circle); 116 | --pst-icon-admonition-hint: var(--pst-icon-lightbulb); 117 | --pst-icon-admonition-tip: var(--pst-icon-lightbulb); 118 | --pst-icon-admonition-important: var(--pst-icon-exclamation-circle); 119 | 120 | } 121 | -------------------------------------------------------------------------------- /_build/html/_static/documentation_options.js: -------------------------------------------------------------------------------- 1 | var DOCUMENTATION_OPTIONS = { 2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), 3 | VERSION: '', 4 | LANGUAGE: 'None', 5 | COLLAPSE_INDEX: false, 6 | BUILDER: 'html', 7 | FILE_SUFFIX: '.html', 8 | LINK_SUFFIX: '.html', 9 | HAS_SOURCE: true, 10 | SOURCELINK_SUFFIX: '', 11 | NAVIGATION_WITH_KEYS: true 12 | }; -------------------------------------------------------------------------------- /_build/html/_static/file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/file.png -------------------------------------------------------------------------------- /_build/html/_static/images/logo_binder.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 10 | logo 11 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /_build/html/_static/images/logo_colab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/images/logo_colab.png -------------------------------------------------------------------------------- /_build/html/_static/images/logo_jupyterhub.svg: -------------------------------------------------------------------------------- 1 | logo_jupyterhubHub 2 | -------------------------------------------------------------------------------- /_build/html/_static/minus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/minus.png -------------------------------------------------------------------------------- /_build/html/_static/mystnb.css: -------------------------------------------------------------------------------- 1 | /* Whole cell */ 2 | div.container.cell { 3 | padding-left: 0; 4 | margin-bottom: 1em; 5 | } 6 | 7 | /* Removing all background formatting so we can control at the div level */ 8 | .cell_input div.highlight, .cell_output pre, .cell_input pre, .cell_output .output { 9 | border: none; 10 | box-shadow: none; 11 | } 12 | 13 | .cell_output .output pre, .cell_input pre { 14 | margin: 0px; 15 | } 16 | 17 | /* Input cells */ 18 | div.cell div.cell_input { 19 | padding-left: 0em; 20 | padding-right: 0em; 21 | border: 1px #ccc solid; 22 | background-color: #f7f7f7; 23 | border-left-color: green; 24 | border-left-width: medium; 25 | } 26 | 27 | div.cell_input > div, div.cell_output div.output > div.highlight { 28 | margin: 0em !important; 29 | border: none !important; 30 | } 31 | 32 | /* All cell outputs */ 33 | .cell_output { 34 | padding-left: 1em; 35 | padding-right: 0em; 36 | margin-top: 1em; 37 | } 38 | 39 | /* Outputs from jupyter_sphinx overrides to remove extra CSS */ 40 | div.section div.jupyter_container { 41 | padding: .4em; 42 | margin: 0 0 .4em 0; 43 | background-color: none; 44 | border: none; 45 | -moz-box-shadow: none; 46 | -webkit-box-shadow: none; 47 | box-shadow: none; 48 | } 49 | 50 | /* Text outputs from cells */ 51 | .cell_output .output.text_plain, 52 | .cell_output .output.traceback, 53 | .cell_output .output.stream, 54 | .cell_output .output.stderr 55 | { 56 | background: #fcfcfc; 57 | margin-top: 1em; 58 | margin-bottom: 0em; 59 | box-shadow: none; 60 | } 61 | 62 | .cell_output .output.text_plain, 63 | .cell_output .output.stream, 64 | .cell_output .output.stderr { 65 | border: 1px solid #f7f7f7; 66 | } 67 | 68 | .cell_output .output.stderr { 69 | background: #fdd; 70 | } 71 | 72 | .cell_output .output.traceback { 73 | border: 1px solid #ffd6d6; 74 | } 75 | 76 | /* Math align to the left */ 77 | .cell_output .MathJax_Display { 78 | text-align: left !important; 79 | } 80 | 81 | /* Pandas tables. Pulled from the Jupyter / nbsphinx CSS */ 82 | div.cell_output table { 83 | border: none; 84 | border-collapse: collapse; 85 | border-spacing: 0; 86 | color: black; 87 | font-size: 1em; 88 | table-layout: fixed; 89 | } 90 | div.cell_output thead { 91 | border-bottom: 1px solid black; 92 | vertical-align: bottom; 93 | } 94 | div.cell_output tr, 95 | div.cell_output th, 96 | div.cell_output td { 97 | text-align: right; 98 | vertical-align: middle; 99 | padding: 0.5em 0.5em; 100 | line-height: normal; 101 | white-space: normal; 102 | max-width: none; 103 | border: none; 104 | } 105 | div.cell_output th { 106 | font-weight: bold; 107 | } 108 | div.cell_output tbody tr:nth-child(odd) { 109 | background: #f5f5f5; 110 | } 111 | div.cell_output tbody tr:hover { 112 | background: rgba(66, 165, 245, 0.2); 113 | } 114 | 115 | 116 | /* Inline text from `paste` operation */ 117 | 118 | span.pasted-text { 119 | font-weight: bold; 120 | } 121 | 122 | span.pasted-inline img { 123 | max-height: 2em; 124 | } 125 | 126 | tbody span.pasted-inline img { 127 | max-height: none; 128 | } 129 | 130 | /* Font colors for translated ANSI escape sequences 131 | Color values are adapted from share/jupyter/nbconvert/templates/classic/static/style.css 132 | */ 133 | div.highlight .-Color-Bold { 134 | font-weight: bold; 135 | } 136 | div.highlight .-Color[class*=-Black] { 137 | color :#3E424D 138 | } 139 | div.highlight .-Color[class*=-Red] { 140 | color: #E75C58 141 | } 142 | div.highlight .-Color[class*=-Green] { 143 | color: #00A250 144 | } 145 | div.highlight .-Color[class*=-Yellow] { 146 | color: yellow 147 | } 148 | div.highlight .-Color[class*=-Blue] { 149 | color: #208FFB 150 | } 151 | div.highlight .-Color[class*=-Magenta] { 152 | color: #D160C4 153 | } 154 | div.highlight .-Color[class*=-Cyan] { 155 | color: #60C6C8 156 | } 157 | div.highlight .-Color[class*=-White] { 158 | color: #C5C1B4 159 | } 160 | div.highlight .-Color[class*=-BGBlack] { 161 | background-color: #3E424D 162 | } 163 | div.highlight .-Color[class*=-BGRed] { 164 | background-color: #E75C58 165 | } 166 | div.highlight .-Color[class*=-BGGreen] { 167 | background-color: #00A250 168 | } 169 | div.highlight .-Color[class*=-BGYellow] { 170 | background-color: yellow 171 | } 172 | div.highlight .-Color[class*=-BGBlue] { 173 | background-color: #208FFB 174 | } 175 | div.highlight .-Color[class*=-BGMagenta] { 176 | background-color: #D160C4 177 | } 178 | div.highlight .-Color[class*=-BGCyan] { 179 | background-color: #60C6C8 180 | } 181 | div.highlight .-Color[class*=-BGWhite] { 182 | background-color: #C5C1B4 183 | } 184 | -------------------------------------------------------------------------------- /_build/html/_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css: -------------------------------------------------------------------------------- 1 | details.dropdown .summary-title{padding-right:3em !important;-moz-user-select:none;-ms-user-select:none;-webkit-user-select:none;user-select:none}details.dropdown:hover{cursor:pointer}details.dropdown .summary-content{cursor:default}details.dropdown summary{list-style:none;padding:1em}details.dropdown summary .octicon.no-title{vertical-align:middle}details.dropdown[open] summary .octicon.no-title{visibility:hidden}details.dropdown summary::-webkit-details-marker{display:none}details.dropdown summary:focus{outline:none}details.dropdown summary:hover .summary-up svg,details.dropdown summary:hover .summary-down svg{opacity:1}details.dropdown .summary-up svg,details.dropdown .summary-down svg{display:block;opacity:.6}details.dropdown .summary-up,details.dropdown .summary-down{pointer-events:none;position:absolute;right:1em;top:.75em}details.dropdown[open] .summary-down{visibility:hidden}details.dropdown:not([open]) .summary-up{visibility:hidden}details.dropdown.fade-in[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out;animation:panels-fade-in .5s ease-in-out}details.dropdown.fade-in-slide-down[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out}@keyframes panels-fade-in{0%{opacity:0}100%{opacity:1}}@keyframes panels-slide-down{0%{transform:translate(0, -10px)}100%{transform:translate(0, 0)}}.octicon{display:inline-block;fill:currentColor;vertical-align:text-top}.tabbed-content{box-shadow:0 -.0625rem var(--tabs-color-overline),0 .0625rem var(--tabs-color-underline);display:none;order:99;padding-bottom:.75rem;padding-top:.75rem;width:100%}.tabbed-content>:first-child{margin-top:0 !important}.tabbed-content>:last-child{margin-bottom:0 !important}.tabbed-content>.tabbed-set{margin:0}.tabbed-set{border-radius:.125rem;display:flex;flex-wrap:wrap;margin:1em 0;position:relative}.tabbed-set>input{opacity:0;position:absolute}.tabbed-set>input:checked+label{border-color:var(--tabs-color-label-active);color:var(--tabs-color-label-active)}.tabbed-set>input:checked+label+.tabbed-content{display:block}.tabbed-set>input:focus+label{outline-style:auto}.tabbed-set>input:not(.focus-visible)+label{outline:none;-webkit-tap-highlight-color:transparent}.tabbed-set>label{border-bottom:.125rem solid transparent;color:var(--tabs-color-label-inactive);cursor:pointer;font-size:var(--tabs-size-label);font-weight:700;padding:1em 1.25em .5em;transition:color 250ms;width:auto;z-index:1}html .tabbed-set>label:hover{color:var(--tabs-color-label-active)} 2 | -------------------------------------------------------------------------------- /_build/html/_static/panels-variables.06eb56fa6e07937060861dad626602ad.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --tabs-color-label-active: hsla(231, 99%, 66%, 1); 3 | --tabs-color-label-inactive: rgba(178, 206, 245, 0.62); 4 | --tabs-color-overline: rgb(207, 236, 238); 5 | --tabs-color-underline: rgb(207, 236, 238); 6 | --tabs-size-label: 1rem; 7 | } -------------------------------------------------------------------------------- /_build/html/_static/plus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/plus.png -------------------------------------------------------------------------------- /_build/html/_static/pygments.css: -------------------------------------------------------------------------------- 1 | .highlight .hll { background-color: #ffffcc } 2 | .highlight { background: #eeffcc; } 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */ 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */ 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */ 6 | .highlight .o { color: #666666 } /* Operator */ 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */ 10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ 11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ 12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ 13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */ 14 | .highlight .ge { font-style: italic } /* Generic.Emph */ 15 | .highlight .gr { color: #FF0000 } /* Generic.Error */ 16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ 17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */ 18 | .highlight .go { color: #333333 } /* Generic.Output */ 19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ 20 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ 22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */ 23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ 24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ 25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ 26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */ 27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ 28 | .highlight .kt { color: #902000 } /* Keyword.Type */ 29 | .highlight .m { color: #208050 } /* Literal.Number */ 30 | .highlight .s { color: #4070a0 } /* Literal.String */ 31 | .highlight .na { color: #4070a0 } /* Name.Attribute */ 32 | .highlight .nb { color: #007020 } /* Name.Builtin */ 33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ 34 | .highlight .no { color: #60add5 } /* Name.Constant */ 35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ 36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ 37 | .highlight .ne { color: #007020 } /* Name.Exception */ 38 | .highlight .nf { color: #06287e } /* Name.Function */ 39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ 40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ 41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ 42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */ 43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ 44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */ 46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */ 47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */ 48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */ 49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */ 50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */ 51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ 52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */ 53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ 54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ 55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */ 56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ 57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ 58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ 59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */ 60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */ 61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */ 62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */ 63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ 64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */ 65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ 66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ 67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ 68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ 69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /_build/html/_static/sphinx-book-theme.12a9622fbb08dcb3a2a40b2c02b83a57.js: -------------------------------------------------------------------------------- 1 | var initTriggerNavBar=()=>{if($(window).width()<768){$("#navbar-toggler").trigger("click")}} 2 | var scrollToActive=()=>{var navbar=document.getElementById('site-navigation') 3 | var active_pages=navbar.querySelectorAll(".active") 4 | var active_page=active_pages[active_pages.length-1] 5 | if(active_page!==undefined&&active_page.offsetTop>($(window).height()*.5)){navbar.scrollTop=active_page.offsetTop-($(window).height()*.2)}} 6 | var sbRunWhenDOMLoaded=cb=>{if(document.readyState!='loading'){cb()}else if(document.addEventListener){document.addEventListener('DOMContentLoaded',cb)}else{document.attachEvent('onreadystatechange',function(){if(document.readyState=='complete')cb()})}} 7 | function toggleFullScreen(){var navToggler=$("#navbar-toggler");if(!document.fullscreenElement){document.documentElement.requestFullscreen();if(!navToggler.hasClass("collapsed")){navToggler.click();}}else{if(document.exitFullscreen){document.exitFullscreen();if(navToggler.hasClass("collapsed")){navToggler.click();}}}} 8 | var initTooltips=()=>{$(document).ready(function(){$('[data-toggle="tooltip"]').tooltip();});} 9 | var initTocHide=()=>{var scrollTimeout;var throttle=200;var tocHeight=$("#bd-toc-nav").outerHeight(true)+$(".bd-toc").outerHeight(true);var hideTocAfter=tocHeight+200;var checkTocScroll=function(){var margin_content=$(".margin, .tag_margin, .full-width, .full_width, .tag_full-width, .tag_full_width, .sidebar, .tag_sidebar, .popout, .tag_popout");margin_content.each((index,item)=>{var topOffset=$(item).offset().top-$(window).scrollTop();var bottomOffset=topOffset+$(item).outerHeight(true);var topOverlaps=((topOffset>=0)&&(topOffset=0)&&(bottomOffset20){$("div.bd-toc").removeClass("show") 10 | return false}else{$("div.bd-toc").addClass("show")};})};var manageScrolledClassOnBody=function(){if(window.scrollY>0){document.body.classList.add("scrolled");}else{document.body.classList.remove("scrolled");}} 11 | $(window).on('scroll',function(){if(!scrollTimeout){scrollTimeout=setTimeout(function(){checkTocScroll();manageScrolledClassOnBody();scrollTimeout=null;},throttle);}});} 12 | var initThebeSBT=()=>{var title=$("div.section h1")[0] 13 | if(!$(title).next().hasClass("thebe-launch-button")){$("").insertAfter($(title))} 14 | initThebe();} 15 | sbRunWhenDOMLoaded(initTooltips) 16 | sbRunWhenDOMLoaded(initTriggerNavBar) 17 | sbRunWhenDOMLoaded(scrollToActive) 18 | sbRunWhenDOMLoaded(initTocHide) 19 | -------------------------------------------------------------------------------- /_build/html/_static/sphinx-thebe.css: -------------------------------------------------------------------------------- 1 | /* Thebelab Buttons */ 2 | .thebelab-button { 3 | z-index: 999; 4 | display: inline-block; 5 | padding: 0.35em 1.2em; 6 | margin: 0px 1px; 7 | border-radius: 0.12em; 8 | box-sizing: border-box; 9 | text-decoration: none; 10 | font-family: 'Roboto', sans-serif; 11 | font-weight: 300; 12 | text-align: center; 13 | transition: all 0.2s; 14 | background-color: #dddddd; 15 | border: 0.05em solid white; 16 | color: #000000; 17 | } 18 | 19 | .thebelab-button:hover{ 20 | border: 0.05em solid black; 21 | background-color: #fcfcfc; 22 | } 23 | 24 | .thebe-launch-button { 25 | height: 2.2em; 26 | font-size: .8em; 27 | border: 1px black solid; 28 | } 29 | 30 | /* Thebelab Cell */ 31 | .thebelab-cell pre { 32 | background: none; 33 | } 34 | 35 | .thebelab-cell .thebelab-input { 36 | padding-left: 1em; 37 | margin-bottom: .5em; 38 | margin-top: .5em; 39 | } 40 | 41 | .thebelab-cell .jp-OutputArea { 42 | margin-top: .5em; 43 | margin-left: 1em; 44 | } 45 | 46 | button.thebelab-button.thebelab-run-button { 47 | margin-left: 1.5em; 48 | margin-bottom: .5em; 49 | } 50 | 51 | /* Loading button */ 52 | button.thebe-launch-button div.spinner { 53 | float: left; 54 | margin-right: 1em; 55 | } 56 | 57 | /* Remove the spinner when thebelab is ready */ 58 | .thebe-launch-button.thebe-status-ready .spinner { 59 | display: none; 60 | } 61 | 62 | .thebe-launch-button span.status { 63 | font-family: monospace; 64 | font-weight: bold; 65 | } 66 | 67 | .thebe-launch-button.thebe-status-ready span.status { 68 | color: green; 69 | } 70 | 71 | .spinner { 72 | height: 2em; 73 | text-align: center; 74 | font-size: 0.7em; 75 | } 76 | 77 | .spinner > div { 78 | background-color: #F37726; 79 | height: 100%; 80 | width: 6px; 81 | display: inline-block; 82 | 83 | -webkit-animation: sk-stretchdelay 1.2s infinite ease-in-out; 84 | animation: sk-stretchdelay 1.2s infinite ease-in-out; 85 | } 86 | 87 | .spinner .rect2 { 88 | -webkit-animation-delay: -1.1s; 89 | animation-delay: -1.1s; 90 | } 91 | 92 | .spinner .rect3 { 93 | -webkit-animation-delay: -1.0s; 94 | animation-delay: -1.0s; 95 | } 96 | 97 | .spinner .rect4 { 98 | -webkit-animation-delay: -0.9s; 99 | animation-delay: -0.9s; 100 | } 101 | 102 | .spinner .rect5 { 103 | -webkit-animation-delay: -0.8s; 104 | animation-delay: -0.8s; 105 | } 106 | 107 | @-webkit-keyframes sk-stretchdelay { 108 | 0%, 40%, 100% { -webkit-transform: scaleY(0.4) } 109 | 20% { -webkit-transform: scaleY(1.0) } 110 | } 111 | 112 | @keyframes sk-stretchdelay { 113 | 0%, 40%, 100% { 114 | transform: scaleY(0.4); 115 | -webkit-transform: scaleY(0.4); 116 | } 20% { 117 | transform: scaleY(1.0); 118 | -webkit-transform: scaleY(1.0); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /_build/html/_static/sphinx-thebe.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Add attributes to Thebe blocks to initialize thebe properly 3 | */ 4 | 5 | var initThebe = () => { 6 | // If Thebelab hasn't loaded, wait a bit and try again. This 7 | // happens because we load ClipboardJS asynchronously. 8 | if (window.thebelab === undefined) { 9 | console.log("thebe not loaded, retrying..."); 10 | setTimeout(initThebe, 500) 11 | return 12 | } 13 | 14 | console.log("Adding thebe to code cells..."); 15 | 16 | // Load thebe config in case we want to update it as some point 17 | thebe_config = $('script[type="text/x-thebe-config"]')[0] 18 | 19 | 20 | // If we already detect a Thebe cell, don't re-run 21 | if (document.querySelectorAll('div.thebe-cell').length > 0) { 22 | return; 23 | } 24 | 25 | // Update thebe buttons with loading message 26 | $(".thebe-launch-button").each((ii, button) => { 27 | button.innerHTML = ` 28 |
29 |
30 |
31 |
32 |
33 |
34 | `; 35 | }) 36 | 37 | // Set thebe event hooks 38 | var thebeStatus; 39 | thebelab.on("status", function (evt, data) { 40 | console.log("Status changed:", data.status, data.message); 41 | 42 | $(".thebe-launch-button ") 43 | .removeClass("thebe-status-" + thebeStatus) 44 | .addClass("thebe-status-" + data.status) 45 | .find(".loading-text").html("Launching from mybinder.org: " + data.status + ""); 46 | 47 | // Now update our thebe status 48 | thebeStatus = data.status; 49 | 50 | // Find any cells with an initialization tag and ask thebe to run them when ready 51 | if (data.status === "ready") { 52 | var thebeInitCells = document.querySelectorAll('.thebe-init, .tag_thebe-init'); 53 | thebeInitCells.forEach((cell) => { 54 | console.log("Initializing Thebe with cell: " + cell.id); 55 | cell.querySelector('.thebelab-run-button').click(); 56 | }); 57 | } 58 | }); 59 | 60 | 61 | // Find all code cells, replace with Thebe interactive code cells 62 | const codeCells = document.querySelectorAll(thebe_selector) 63 | codeCells.forEach((codeCell, index) => { 64 | const codeCellId = index => `codecell${index}`; 65 | codeCell.id = codeCellId(index); 66 | codeCellText = codeCell.querySelector(thebe_selector_input); 67 | codeCellOutput = codeCell.querySelector(thebe_selector_output); 68 | 69 | // Clean up the language to make it work w/ CodeMirror and add it to the cell 70 | dataLanguage = detectLanguage(kernelName); 71 | 72 | if (codeCellText) { 73 | codeCellText.setAttribute('data-language', dataLanguage); 74 | codeCellText.setAttribute('data-executable', 'true'); 75 | 76 | // If we had an output, insert it just after the `pre` cell 77 | if (codeCellOutput) { 78 | $(codeCellOutput).attr("data-output", ""); 79 | $(codeCellOutput).insertAfter(codeCellText); 80 | } 81 | } 82 | }); 83 | 84 | // Init thebe 85 | thebelab.bootstrap(); 86 | } 87 | 88 | // Helper function to munge the language name 89 | var detectLanguage = (language) => { 90 | if (language.indexOf('python') > -1) { 91 | language = "python"; 92 | } else if (language === 'ir') { 93 | language = "r" 94 | } 95 | return language; 96 | } 97 | -------------------------------------------------------------------------------- /_build/html/_static/togglebutton.css: -------------------------------------------------------------------------------- 1 | /* Visibility of the target */ 2 | .toggle, div.admonition.toggle .admonition-title ~ * { 3 | transition: opacity .5s, height .5s; 4 | } 5 | 6 | .toggle-hidden:not(.admonition) { 7 | visibility: hidden; 8 | opacity: 0; 9 | height: 1.5em; 10 | margin: 0px; 11 | padding: 0px; 12 | } 13 | 14 | /* Overrides for admonition toggles */ 15 | 16 | /* Titles should cut off earlier to avoid overlapping w/ button */ 17 | div.admonition.toggle p.admonition-title { 18 | padding-right: 25%; 19 | } 20 | 21 | /* hides all the content of a page until de-toggled */ 22 | div.admonition.toggle-hidden .admonition-title ~ * { 23 | height: 0; 24 | margin: 0; 25 | float: left; /* so they overlap when hidden */ 26 | opacity: 0; 27 | visibility: hidden; 28 | } 29 | 30 | /* Toggle buttons inside admonitions so we see the title */ 31 | .toggle.admonition { 32 | position: relative; 33 | } 34 | 35 | .toggle.admonition.admonition-title:after { 36 | content: "" !important; 37 | } 38 | 39 | /* Note, we'll over-ride this in sphinx-book-theme */ 40 | .toggle.admonition button.toggle-button { 41 | margin-right: 0.5em; 42 | right: 0em; 43 | position: absolute; 44 | top: .2em; 45 | } 46 | 47 | /* General button style */ 48 | button.toggle-button { 49 | background: #999; 50 | border: none; 51 | z-index: 100; 52 | right: -2.5em; 53 | margin-left: -2.5em; /* A hack to keep code blocks from being pushed left */ 54 | position: relative; 55 | float: right; 56 | border-radius: 100%; 57 | width: 1.5em; 58 | height: 1.5em; 59 | padding: 0px; 60 | } 61 | 62 | @media (min-width: 768px) { 63 | button.toggle-button.toggle-button-hidden:before { 64 | content: "Click to show"; 65 | position: absolute; 66 | font-size: .8em; 67 | left: -6.5em; 68 | bottom: .4em; 69 | } 70 | } 71 | 72 | 73 | /* Plus / minus toggles */ 74 | .toggle-button .bar { 75 | background-color: white; 76 | position: absolute; 77 | left: 15%; 78 | top: 43%; 79 | width: 16px; 80 | height: 3px; 81 | } 82 | 83 | .toggle-button .vertical { 84 | transition: all 0.25s ease-in-out; 85 | transform-origin: center; 86 | } 87 | 88 | .toggle-button-hidden .vertical { 89 | transform: rotate(-90deg); 90 | } -------------------------------------------------------------------------------- /_build/html/_static/togglebutton.js: -------------------------------------------------------------------------------- 1 | var initToggleItems = () => { 2 | var itemsToToggle = document.querySelectorAll(togglebuttonSelector); 3 | console.log(itemsToToggle, togglebuttonSelector) 4 | // Add the button to each admonition and hook up a callback to toggle visibility 5 | itemsToToggle.forEach((item, index) => { 6 | var toggleID = `toggle-${index}`; 7 | var buttonID = `button-${toggleID}`; 8 | var collapseButton = ` 9 | `; 13 | 14 | item.setAttribute('id', toggleID); 15 | 16 | if (!item.classList.contains("toggle")){ 17 | item.classList.add("toggle"); 18 | } 19 | 20 | // If it's an admonition block, then we'll add the button inside 21 | if (item.classList.contains("admonition")) { 22 | item.insertAdjacentHTML("afterbegin", collapseButton); 23 | } else { 24 | item.insertAdjacentHTML('beforebegin', collapseButton); 25 | } 26 | 27 | thisButton = $(`#${buttonID}`); 28 | thisButton.on('click', toggleClickHandler); 29 | if (!item.classList.contains("toggle-shown")) { 30 | toggleHidden(thisButton[0]); 31 | } 32 | }) 33 | }; 34 | 35 | // This should simply add / remove the collapsed class and change the button text 36 | var toggleHidden = (button) => { 37 | target = button.dataset['target'] 38 | var itemToToggle = document.getElementById(target); 39 | if (itemToToggle.classList.contains("toggle-hidden")) { 40 | itemToToggle.classList.remove("toggle-hidden"); 41 | button.classList.remove("toggle-button-hidden"); 42 | } else { 43 | itemToToggle.classList.add("toggle-hidden"); 44 | button.classList.add("toggle-button-hidden"); 45 | } 46 | } 47 | 48 | var toggleClickHandler = (click) => { 49 | button = document.getElementById(click.target.dataset['button']); 50 | toggleHidden(button); 51 | } 52 | 53 | // If we want to blanket-add toggle classes to certain cells 54 | var addToggleToSelector = () => { 55 | const selector = ""; 56 | if (selector.length > 0) { 57 | document.querySelectorAll(selector).forEach((item) => { 58 | item.classList.add("toggle"); 59 | }) 60 | } 61 | } 62 | 63 | // Helper function to run when the DOM is finished 64 | const sphinxToggleRunWhenDOMLoaded = cb => { 65 | if (document.readyState != 'loading') { 66 | cb() 67 | } else if (document.addEventListener) { 68 | document.addEventListener('DOMContentLoaded', cb) 69 | } else { 70 | document.attachEvent('onreadystatechange', function() { 71 | if (document.readyState == 'complete') cb() 72 | }) 73 | } 74 | } 75 | sphinxToggleRunWhenDOMLoaded(addToggleToSelector) 76 | sphinxToggleRunWhenDOMLoaded(initToggleItems) 77 | -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Font Awesome Free License 2 | ------------------------- 3 | 4 | Font Awesome Free is free, open source, and GPL friendly. You can use it for 5 | commercial projects, open source projects, or really almost whatever you want. 6 | Full Font Awesome Free license: https://fontawesome.com/license/free. 7 | 8 | # Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/) 9 | In the Font Awesome Free download, the CC BY 4.0 license applies to all icons 10 | packaged as SVG and JS file types. 11 | 12 | # Fonts: SIL OFL 1.1 License (https://scripts.sil.org/OFL) 13 | In the Font Awesome Free download, the SIL OFL license applies to all icons 14 | packaged as web and desktop font files. 15 | 16 | # Code: MIT License (https://opensource.org/licenses/MIT) 17 | In the Font Awesome Free download, the MIT license applies to all non-font and 18 | non-icon files. 19 | 20 | # Attribution 21 | Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font 22 | Awesome Free files already contain embedded comments with sufficient 23 | attribution, so you shouldn't need to do anything additional when using these 24 | files normally. 25 | 26 | We've kept attribution comments terse, so we ask that you do not actively work 27 | to remove them from files, especially code. They're a great way for folks to 28 | learn about Font Awesome. 29 | 30 | # Brand Icons 31 | All brand icons are trademarks of their respective owners. The use of these 32 | trademarks does not indicate endorsement of the trademark holder by Font 33 | Awesome, nor vice versa. **Please do not use brand logos for any purpose except 34 | to represent the company, product, or service to which they refer.** 35 | -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2 -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2 -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff -------------------------------------------------------------------------------- /_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2 -------------------------------------------------------------------------------- /_build/html/_static/webpack-macros.html: -------------------------------------------------------------------------------- 1 | 2 | {% macro head_pre_icons() %} 3 | 5 | 7 | 9 | {% endmacro %} 10 | 11 | {% macro head_pre_fonts() %} 12 | {% endmacro %} 13 | 14 | {% macro head_pre_bootstrap() %} 15 | 16 | 17 | {% endmacro %} 18 | 19 | {% macro head_js_preload() %} 20 | 21 | {% endmacro %} 22 | 23 | {% macro body_post() %} 24 | 25 | {% endmacro %} -------------------------------------------------------------------------------- /_build/html/index.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /_build/html/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/objects.inv -------------------------------------------------------------------------------- /_build/html/reports/2-3-Python编程基础.log: -------------------------------------------------------------------------------- 1 | Traceback (most recent call last): 2 | File "/opt/anaconda3/lib/python3.7/site-packages/jupyter_cache/executors/utils.py", line 56, in single_nb_execution 3 | record_timing=False, 4 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 1087, in execute 5 | return NotebookClient(nb=nb, resources=resources, km=km, **kwargs).execute() 6 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 74, in wrapped 7 | return just_run(coro(*args, **kwargs)) 8 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 53, in just_run 9 | return loop.run_until_complete(coro) 10 | File "/opt/anaconda3/lib/python3.7/asyncio/base_events.py", line 583, in run_until_complete 11 | return future.result() 12 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 541, in async_execute 13 | cell, index, execution_count=self.code_cells_executed + 1 14 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 832, in async_execute_cell 15 | self._check_raise_for_error(cell, exec_reply) 16 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 740, in _check_raise_for_error 17 | raise CellExecutionError.from_cell_and_msg(cell, exec_reply['content']) 18 | nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: 19 | ------------------ 20 | "A" + 1 21 | ------------------ 22 | 23 | --------------------------------------------------------------------------- 24 | TypeError Traceback (most recent call last) 25 |  in  26 | ----> 1 "A" + 1 27 |  28 | TypeError: can only concatenate str (not "int") to str 29 | TypeError: can only concatenate str (not "int") to str 30 | 31 | -------------------------------------------------------------------------------- /_build/html/reports/3-7-参数估计.log: -------------------------------------------------------------------------------- 1 | Traceback (most recent call last): 2 | File "/opt/anaconda3/lib/python3.7/site-packages/jupyter_cache/executors/utils.py", line 56, in single_nb_execution 3 | record_timing=False, 4 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 1087, in execute 5 | return NotebookClient(nb=nb, resources=resources, km=km, **kwargs).execute() 6 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 74, in wrapped 7 | return just_run(coro(*args, **kwargs)) 8 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 53, in just_run 9 | return loop.run_until_complete(coro) 10 | File "/opt/anaconda3/lib/python3.7/asyncio/base_events.py", line 583, in run_until_complete 11 | return future.result() 12 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 541, in async_execute 13 | cell, index, execution_count=self.code_cells_executed + 1 14 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 832, in async_execute_cell 15 | self._check_raise_for_error(cell, exec_reply) 16 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 740, in _check_raise_for_error 17 | raise CellExecutionError.from_cell_and_msg(cell, exec_reply['content']) 18 | nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: 19 | ------------------ 20 | # 标准误差 21 | se = sigma / sp.sqrt(len(fish)) 22 | se 23 | ------------------ 24 | 25 | --------------------------------------------------------------------------- 26 | NameError Traceback (most recent call last) 27 |  in  28 |  1 # 标准误差 29 | ----> 2 se = sigma / sp.sqrt(len(fish)) 30 |  3 se 31 | 32 | NameError: name 'sigma' is not defined 33 | NameError: name 'sigma' is not defined 34 | 35 | -------------------------------------------------------------------------------- /_build/html/reports/7-3-Python中的Ridge回归与Lasso回归.log: -------------------------------------------------------------------------------- 1 | Traceback (most recent call last): 2 | File "/opt/anaconda3/lib/python3.7/site-packages/jupyter_cache/executors/utils.py", line 56, in single_nb_execution 3 | record_timing=False, 4 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 1087, in execute 5 | return NotebookClient(nb=nb, resources=resources, km=km, **kwargs).execute() 6 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 74, in wrapped 7 | return just_run(coro(*args, **kwargs)) 8 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 53, in just_run 9 | return loop.run_until_complete(coro) 10 | File "/opt/anaconda3/lib/python3.7/asyncio/base_events.py", line 583, in run_until_complete 11 | return future.result() 12 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 541, in async_execute 13 | cell, index, execution_count=self.code_cells_executed + 1 14 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 832, in async_execute_cell 15 | self._check_raise_for_error(cell, exec_reply) 16 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 740, in _check_raise_for_error 17 | raise CellExecutionError.from_cell_and_msg(cell, exec_reply['content']) 18 | nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell: 19 | ------------------ 20 | # 对不同的 α 值进行 Lasso 回归 21 | lasso_alphas, lasso_coefs, _ = linear_model.lasso_path( 22 | X, y, fit_intercept = False) 23 | ------------------ 24 | 25 | --------------------------------------------------------------------------- 26 | ValueError Traceback (most recent call last) 27 |  in  28 |  1 # 对不同的 α 值进行 Lasso 回归 29 |  2 lasso_alphas, lasso_coefs, _ = linear_model.lasso_path( 30 | ----> 3 X, y, fit_intercept = False) 31 |  32 | /opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/_coordinate_descent.py in lasso_path(X, y, eps, n_alphas, alphas, precompute, Xy, copy_X, coef_init, verbose, return_n_iter, positive, **params) 33 |  359 positive=positive, 34 |  360 return_n_iter=return_n_iter, 35 | --> 361 **params, 36 |  362 ) 37 |  363  38 | 39 | /opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/_coordinate_descent.py in enet_path(X, y, l1_ratio, eps, n_alphas, alphas, precompute, Xy, copy_X, coef_init, verbose, return_n_iter, positive, check_input, **params) 40 |  505  41 |  506 if len(params) > 0: 42 | --> 507 raise ValueError("Unexpected parameters in params", params.keys()) 43 |  508  44 |  509 # We expect X and y to be already Fortran ordered when bypassing 45 | 46 | ValueError: ('Unexpected parameters in params', dict_keys(['fit_intercept'])) 47 | ValueError: ('Unexpected parameters in params', dict_keys(['fit_intercept'])) 48 | 49 | -------------------------------------------------------------------------------- /_build/jupyter_execute/2-2-认识Jupyter-Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n", 8 | "\n", 9 | "## 第 2 节 认识 Jupyter Notebook" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 3. 执行代码" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "1" 30 | ] 31 | }, 32 | "execution_count": 1, 33 | "metadata": {}, 34 | "output_type": "execute_result" 35 | } 36 | ], 37 | "source": [ 38 | "1" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "### 5. Markdown 的用法" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "source": [ 54 | "```\n", 55 | "# 一级标题\n", 56 | "## 二级标题\n", 57 | "### 三级标题\n", 58 | "#### 四级标题\n", 59 | "\n", 60 | "-----------------\n", 61 | "- 列表条目\n", 62 | "- 列表条目\n", 63 | "\n", 64 | "-----------------\n", 65 | "\n", 66 | "1. 序号条目\n", 67 | "2. 序号条目\n", 68 | "```" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Python 3", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.7.6" 98 | }, 99 | "toc": { 100 | "base_numbering": 1, 101 | "nav_menu": {}, 102 | "number_sections": false, 103 | "sideBar": true, 104 | "skip_h1_title": false, 105 | "title_cell": "Table of Contents", 106 | "title_sidebar": "Contents", 107 | "toc_cell": false, 108 | "toc_position": {}, 109 | "toc_section_display": true, 110 | "toc_window_display": true 111 | } 112 | }, 113 | "nbformat": 4, 114 | "nbformat_minor": 2 115 | } -------------------------------------------------------------------------------- /_build/jupyter_execute/2-2-认识Jupyter-Notebook.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 2 章 Python 与 Jupyter Notebook 基础 5 | # 6 | # ## 第 2 节 认识 Jupyter Notebook 7 | 8 | # ### 3. 执行代码 9 | 10 | # In[1]: 11 | 12 | 13 | 1 14 | 15 | 16 | # ### 5. Markdown 的用法 17 | 18 | # ``` 19 | # # 一级标题 20 | # ## 二级标题 21 | # ### 三级标题 22 | # #### 四级标题 23 | # 24 | # ----------------- 25 | # - 列表条目 26 | # - 列表条目 27 | # 28 | # ----------------- 29 | # 30 | # 1. 序号条目 31 | # 2. 序号条目 32 | # ``` 33 | 34 | # In[ ]: 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /_build/jupyter_execute/2-3-Python编程基础.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 3 节 Python 编程基础 5 | # 6 | # ## 第 2 章 Python 与 Jupyter Notebook 基础|用 Python 动手学统计学 7 | 8 | # ### 1. 实现:四则运算 9 | 10 | # In[1]: 11 | 12 | 13 | 1 + 1 14 | 15 | 16 | # In[2]: 17 | 18 | 19 | 5 - 2 20 | 21 | 22 | # In[3]: 23 | 24 | 25 | 2 * 3 26 | 27 | 28 | # In[4]: 29 | 30 | 31 | 2 ** 3 32 | 33 | 34 | # In[5]: 35 | 36 | 37 | 6 / 3 38 | 39 | 40 | # In[6]: 41 | 42 | 43 | 7 // 3 44 | 45 | 46 | # ### 2. 实现:编写注释 47 | 48 | # In[7]: 49 | 50 | 51 | # 1 + 1 52 | 53 | 54 | # ### 3. 实现:数据类型 55 | 56 | # In[8]: 57 | 58 | 59 | "A" 60 | 61 | 62 | # In[9]: 63 | 64 | 65 | 'A' 66 | 67 | 68 | # In[10]: 69 | 70 | 71 | # 字符串 72 | type("A") 73 | 74 | 75 | # In[11]: 76 | 77 | 78 | type('A') 79 | 80 | 81 | # In[12]: 82 | 83 | 84 | # 整型 85 | type(1) 86 | 87 | 88 | # In[13]: 89 | 90 | 91 | # 浮点型 92 | type(2.4) 93 | 94 | 95 | # In[14]: 96 | 97 | 98 | # 布尔型 99 | type(True) 100 | 101 | 102 | # In[15]: 103 | 104 | 105 | # 布尔型 106 | type(False) 107 | 108 | 109 | # In[16]: 110 | 111 | 112 | "A" + 1 113 | 114 | 115 | # ### 4. 实现:比较运算符 116 | 117 | # In[17]: 118 | 119 | 120 | 1 > 0.89 121 | 122 | 123 | # In[18]: 124 | 125 | 126 | 3 >= 2 127 | 128 | 129 | # In[19]: 130 | 131 | 132 | 3 < 2 133 | 134 | 135 | # In[20]: 136 | 137 | 138 | 3 <= 2 139 | 140 | 141 | # In[21]: 142 | 143 | 144 | 3 == 2 145 | 146 | 147 | # In[22]: 148 | 149 | 150 | 3 != 2 151 | 152 | 153 | # ### 5. 实现:变量 154 | 155 | # In[23]: 156 | 157 | 158 | x = 2 159 | x + 1 160 | 161 | 162 | # ### 6. 实现:函数 163 | 164 | # In[24]: 165 | 166 | 167 | (x + 2) * 4 168 | 169 | 170 | # In[25]: 171 | 172 | 173 | def sample_function(data): 174 | return((data + 2) * 4) 175 | 176 | 177 | # In[26]: 178 | 179 | 180 | sample_function(x) 181 | 182 | 183 | # In[27]: 184 | 185 | 186 | sample_function(3) 187 | 188 | 189 | # In[28]: 190 | 191 | 192 | sample_function(x) + sample_function(3) 193 | 194 | 195 | # ### 7. 实现:类与实例 196 | 197 | # In[29]: 198 | 199 | 200 | class Sample_Class: 201 | def __init__(self, data1, data2): 202 | self.data1 = data1 203 | self.data2 = data2 204 | 205 | def method2(self): 206 | return(self.data1 + self.data2) 207 | 208 | 209 | # In[30]: 210 | 211 | 212 | sample_instance = Sample_Class(data1 = 2, data2 = 3) 213 | 214 | 215 | # In[31]: 216 | 217 | 218 | sample_instance.data1 219 | 220 | 221 | # In[32]: 222 | 223 | 224 | sample_instance.method2() 225 | 226 | 227 | # ### 8. 实现:基于 if 语句的程序分支 228 | 229 | # In[3]: 230 | 231 | 232 | data = 1 233 | if(data < 2): 234 | print("数字小于 2") 235 | else: 236 | print("数字不小于 2") 237 | 238 | 239 | # In[2]: 240 | 241 | 242 | data = 3 243 | if(data < 2): 244 | print("数字小于 2") 245 | else: 246 | print("数字不小于 2") 247 | 248 | 249 | # ### 9. 实现:基于 for 语句的循环 250 | 251 | # In[35]: 252 | 253 | 254 | range(0, 3) 255 | 256 | 257 | # In[36]: 258 | 259 | 260 | for i in range(0, 3): 261 | print(i) 262 | 263 | 264 | # In[37]: 265 | 266 | 267 | for i in range(0, 3): 268 | print("hello") 269 | 270 | 271 | # In[ ]: 272 | 273 | 274 | 275 | 276 | -------------------------------------------------------------------------------- /_build/jupyter_execute/2-4-认识numpy与pandas.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 4 节 认识 numpy 与 pandas 5 | # 6 | # ## 第 2 章 Python 与 Jupyter Notebook 基础|用 Python 动手学统计学 7 | 8 | # ### 1. 导入用于分析的功能 9 | 10 | # In[1]: 11 | 12 | 13 | import numpy as np 14 | import pandas as pd 15 | 16 | 17 | # ### 3. 实现:列表 18 | 19 | # In[2]: 20 | 21 | 22 | sample_list = [1,2,3,4,5] 23 | sample_list 24 | 25 | 26 | # ### 5. 实现:数组 27 | 28 | # In[3]: 29 | 30 | 31 | sample_array = np.array([1,2,3,4,5]) 32 | sample_array 33 | 34 | 35 | # In[4]: 36 | 37 | 38 | sample_array + 2 39 | 40 | 41 | # In[5]: 42 | 43 | 44 | sample_array * 2 45 | 46 | 47 | # In[6]: 48 | 49 | 50 | np.array([1 ,2, "A"]) 51 | 52 | 53 | # In[7]: 54 | 55 | 56 | # 矩阵 57 | sample_array_2 = np.array( 58 | [[1,2,3,4,5], 59 | [6,7,8,9,10]]) 60 | sample_array_2 61 | 62 | 63 | # In[8]: 64 | 65 | 66 | # 获取行数与列数 67 | sample_array_2.shape 68 | 69 | 70 | # ### 6. 实现:生成等差数列的方法 71 | 72 | # In[9]: 73 | 74 | 75 | np.arange(start = 1, stop = 6, step = 1) 76 | 77 | 78 | # In[10]: 79 | 80 | 81 | np.arange(start = 0.1, stop = 0.8, step = 0.2) 82 | 83 | 84 | # In[11]: 85 | 86 | 87 | np.arange(0.1, 0.8, 0.2) 88 | 89 | 90 | # ### 7. 实现:多种生成数组的方式 91 | 92 | # In[12]: 93 | 94 | 95 | # 元素相同的数组 96 | np.tile("A", 5) 97 | 98 | 99 | # In[13]: 100 | 101 | 102 | # 存放 4 个 0 103 | np.tile(0, 4) 104 | 105 | 106 | # In[14]: 107 | 108 | 109 | # 只有 0 的数组 110 | np.zeros(4) 111 | 112 | 113 | # In[15]: 114 | 115 | 116 | # 二维数组 117 | np.zeros([2,3]) 118 | 119 | 120 | # In[16]: 121 | 122 | 123 | # 只有 1 的数组 124 | np.ones(3) 125 | 126 | 127 | # ### 8. 实现:切片 128 | 129 | # In[17]: 130 | 131 | 132 | # 一维数组 133 | d1_array = np.array([1,2,3,4,5]) 134 | d1_array 135 | 136 | 137 | # In[18]: 138 | 139 | 140 | # 取得第一个元素 141 | d1_array[0] 142 | 143 | 144 | # In[19]: 145 | 146 | 147 | # 获取索引中的 1 号和 2 号元素 148 | d1_array[1:3] 149 | 150 | 151 | # In[20]: 152 | 153 | 154 | # 二维数组 155 | d2_array = np.array( 156 | [[1,2,3,4,5], 157 | [6,7,8,9,10]]) 158 | d2_array 159 | 160 | 161 | # In[21]: 162 | 163 | 164 | d2_array[0, 3] 165 | 166 | 167 | # In[22]: 168 | 169 | 170 | d2_array[1, 2:4] 171 | 172 | 173 | # ### 9. 实现:数据帧 174 | 175 | # In[23]: 176 | 177 | 178 | sample_df = pd.DataFrame({ 179 | 'col1' : sample_array, 180 | 'col2' : sample_array * 2, 181 | 'col3' : ["A", "B", "C", "D", "E"] 182 | }) 183 | print(sample_df) 184 | 185 | 186 | # In[24]: 187 | 188 | 189 | sample_df 190 | 191 | 192 | # ### 10. 实现:读取文件中的数据 193 | 194 | # In[25]: 195 | 196 | 197 | file_data = pd.read_csv("2-4-1-sample_data.csv") 198 | print(file_data) 199 | 200 | 201 | # In[26]: 202 | 203 | 204 | type(file_data) 205 | 206 | 207 | # ### 11. 实现:连接数据帧 208 | 209 | # In[27]: 210 | 211 | 212 | df_1 = pd.DataFrame({ 213 | 'col1' : np.array([1, 2, 3]), 214 | 'col2' : np.array(["A", "B", "C"]) 215 | }) 216 | df_2 = pd.DataFrame({ 217 | 'col1' : np.array([4, 5, 6]), 218 | 'col2' : np.array(["D", "E", "F"]) 219 | }) 220 | 221 | 222 | # In[28]: 223 | 224 | 225 | # 在纵向上连接 226 | print(pd.concat([df_1, df_2])) 227 | 228 | 229 | # In[29]: 230 | 231 | 232 | # 在横向上连接 233 | print(pd.concat([df_1, df_2], axis = 1)) 234 | 235 | 236 | # ### 12. 实现:数据帧的列操作 237 | 238 | # In[30]: 239 | 240 | 241 | # 对象数据 242 | print(sample_df) 243 | 244 | 245 | # In[31]: 246 | 247 | 248 | # 按列名获取数据 249 | print(sample_df.col2) 250 | 251 | 252 | # In[32]: 253 | 254 | 255 | print(sample_df["col2"]) 256 | 257 | 258 | # In[33]: 259 | 260 | 261 | print(sample_df[["col2", "col3"]]) 262 | 263 | 264 | # In[34]: 265 | 266 | 267 | # 删除指定的列 268 | print(sample_df.drop("col1", axis = 1)) 269 | 270 | 271 | # ### 13. 实现:数据帧的行操作 272 | 273 | # In[35]: 274 | 275 | 276 | # 获取前 3 行 277 | print(sample_df.head(n = 3)) 278 | 279 | 280 | # In[36]: 281 | 282 | 283 | # 获取第 1 行 284 | print(sample_df.query('index == 0')) 285 | 286 | 287 | # In[37]: 288 | 289 | 290 | # 通过多种条件获取数据 291 | print(sample_df.query('col3 == "A"')) 292 | 293 | 294 | # In[38]: 295 | 296 | 297 | # 按 OR 条件获取数据 298 | print(sample_df.query('col3 == "A" | col3 == "D"')) 299 | 300 | 301 | # In[39]: 302 | 303 | 304 | # 按 AND 条件获取数据 305 | print(sample_df.query('col3 == "A" & col1 == 3')) 306 | 307 | 308 | # In[40]: 309 | 310 | 311 | # 同时指定行和列的条件 312 | print(sample_df.query('col3 == "A"')[["col2", "col3"]]) 313 | 314 | 315 | # ### 14. 补充:序列 316 | 317 | # In[41]: 318 | 319 | 320 | type(sample_df) 321 | 322 | 323 | # In[42]: 324 | 325 | 326 | type(sample_df.col1) 327 | 328 | 329 | # In[43]: 330 | 331 | 332 | # 转换为数组 333 | type(np.array(sample_df.col1)) 334 | 335 | 336 | # In[44]: 337 | 338 | 339 | type(sample_df.col1.values) 340 | 341 | 342 | # ### 15. 补充:函数文档 343 | 344 | # In[45]: 345 | 346 | 347 | help(sample_df.query) 348 | 349 | 350 | # In[ ]: 351 | 352 | 353 | 354 | 355 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-1-使用Python进行描述统计单变量.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # 5 | # # 第 3 章 使用 Pyhton 进行数据分析 6 | # 7 | # ## 第 1 节 使用 Python 进行描述统计:单变量 8 | # 9 | # 10 | 11 | # ### 1. 统计分析与 scipy 12 | 13 | # In[1]: 14 | 15 | 16 | # 用于数值计算的库 17 | import numpy as np 18 | import scipy as sp 19 | 20 | # 设置浮点数打印精度 21 | get_ipython().run_line_magic('precision', '3') 22 | 23 | 24 | # ### 2. 单变量数据的操作 25 | 26 | # In[2]: 27 | 28 | 29 | fish_data = np.array([2,3,3,4,4,4,4,5,5,6]) 30 | fish_data 31 | 32 | 33 | # ### 3. 实现:总和与样本容量 34 | 35 | # In[3]: 36 | 37 | 38 | # 总和 39 | sp.sum(fish_data) 40 | 41 | 42 | # In[4]: 43 | 44 | 45 | # 参考 46 | np.sum(fish_data) 47 | 48 | 49 | # In[5]: 50 | 51 | 52 | # 参考 53 | fish_data.sum() 54 | 55 | 56 | # In[6]: 57 | 58 | 59 | # 参考 60 | sum(fish_data) 61 | 62 | 63 | # In[7]: 64 | 65 | 66 | # 样本容量 67 | len(fish_data) 68 | 69 | 70 | # ### 4. 实现:均值(期望值) 71 | 72 | # In[8]: 73 | 74 | 75 | # 计算均值 76 | N = len(fish_data) 77 | sum_value = sp.sum(fish_data) 78 | mu = sum_value / N 79 | mu 80 | 81 | 82 | # In[9]: 83 | 84 | 85 | # 计算均值的函数 86 | sp.mean(fish_data) 87 | 88 | 89 | # ### 5. 实现:样本方差 90 | 91 | # In[10]: 92 | 93 | 94 | # 样本方差 95 | sigma_2_sample = sp.sum((fish_data - mu) ** 2) / N 96 | sigma_2_sample 97 | 98 | 99 | # In[11]: 100 | 101 | 102 | fish_data 103 | 104 | 105 | # In[12]: 106 | 107 | 108 | fish_data - mu 109 | 110 | 111 | # In[13]: 112 | 113 | 114 | (fish_data - mu) ** 2 115 | 116 | 117 | # In[14]: 118 | 119 | 120 | sp.sum((fish_data - mu) ** 2) 121 | 122 | 123 | # In[15]: 124 | 125 | 126 | # 计算样本方差的函数 127 | sp.var(fish_data, ddof = 0) 128 | 129 | 130 | # ### 6. 实现:无偏方差 131 | 132 | # In[16]: 133 | 134 | 135 | # 无偏方差 136 | sigma_2 = sp.sum((fish_data - mu) ** 2) / (N - 1) 137 | sigma_2 138 | 139 | 140 | # In[17]: 141 | 142 | 143 | # 无偏方差 144 | sp.var(fish_data, ddof = 1) 145 | 146 | 147 | # ### 7. 实现:标准差 148 | 149 | # In[18]: 150 | 151 | 152 | # 标准差 153 | sigma = sp.sqrt(sigma_2) 154 | sigma 155 | 156 | 157 | # In[19]: 158 | 159 | 160 | # 计算标准差的函数 161 | sp.std(fish_data, ddof = 1) 162 | 163 | 164 | # ### 8. 补充:标准化 165 | 166 | # In[20]: 167 | 168 | 169 | fish_data - mu 170 | 171 | 172 | # In[21]: 173 | 174 | 175 | sp.mean(fish_data - mu) 176 | 177 | 178 | # In[22]: 179 | 180 | 181 | fish_data / sigma 182 | 183 | 184 | # In[23]: 185 | 186 | 187 | sp.std(fish_data / sigma, ddof = 1) 188 | 189 | 190 | # In[24]: 191 | 192 | 193 | standard = (fish_data - mu) / sigma 194 | standard 195 | 196 | 197 | # In[25]: 198 | 199 | 200 | sp.mean(standard) 201 | 202 | 203 | # In[26]: 204 | 205 | 206 | sp.std(standard, ddof = 1) 207 | 208 | 209 | # ### 9. 补充:其他统计量 210 | 211 | # In[27]: 212 | 213 | 214 | # 最大值 215 | sp.amax(fish_data) 216 | 217 | 218 | # In[28]: 219 | 220 | 221 | # 最小值 222 | sp.amin(fish_data) 223 | 224 | 225 | # In[29]: 226 | 227 | 228 | # 中位数 229 | sp.median(fish_data) 230 | 231 | 232 | # In[30]: 233 | 234 | 235 | fish_data_2 = np.array([2,3,3,4,4,4,4,5,5,100]) 236 | 237 | 238 | # In[31]: 239 | 240 | 241 | sp.mean(fish_data_2) 242 | 243 | 244 | # In[32]: 245 | 246 | 247 | sp.median(fish_data_2) 248 | 249 | 250 | # ### 10. 实现:scipy.stats 与四分位数 251 | 252 | # In[33]: 253 | 254 | 255 | from scipy import stats 256 | 257 | 258 | # In[34]: 259 | 260 | 261 | fish_data_3 = np.array([1,2,3,4,5,6,7,8,9]) 262 | stats.scoreatpercentile(fish_data_3, 25) 263 | 264 | 265 | # In[35]: 266 | 267 | 268 | stats.scoreatpercentile(fish_data_3, 75) 269 | 270 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-1-使用Python进行描述统计:单变量.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 1 节 使用 Python 进行描述统计:单变量 5 | # 6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 7 | 8 | # ### 1. 统计分析与 scipy 9 | 10 | # In[1]: 11 | 12 | 13 | # 用于数值计算的库 14 | import numpy as np 15 | import scipy as sp 16 | 17 | # 设置浮点数打印精度 18 | get_ipython().run_line_magic('precision', '3') 19 | 20 | 21 | # ### 2. 单变量数据的操作 22 | 23 | # In[2]: 24 | 25 | 26 | fish_data = np.array([2,3,3,4,4,4,4,5,5,6]) 27 | fish_data 28 | 29 | 30 | # ### 3. 实现:总和与样本容量 31 | 32 | # In[3]: 33 | 34 | 35 | # 总和 36 | sp.sum(fish_data) 37 | 38 | 39 | # In[4]: 40 | 41 | 42 | # 参考 43 | np.sum(fish_data) 44 | 45 | 46 | # In[5]: 47 | 48 | 49 | # 参考 50 | fish_data.sum() 51 | 52 | 53 | # In[6]: 54 | 55 | 56 | # 参考 57 | sum(fish_data) 58 | 59 | 60 | # In[7]: 61 | 62 | 63 | # 样本容量 64 | len(fish_data) 65 | 66 | 67 | # ### 4. 实现:均值(期望值) 68 | 69 | # In[8]: 70 | 71 | 72 | # 计算均值 73 | N = len(fish_data) 74 | sum_value = sp.sum(fish_data) 75 | mu = sum_value / N 76 | mu 77 | 78 | 79 | # In[9]: 80 | 81 | 82 | # 计算均值的函数 83 | sp.mean(fish_data) 84 | 85 | 86 | # ### 5. 实现:样本方差 87 | 88 | # In[10]: 89 | 90 | 91 | # 样本方差 92 | sigma_2_sample = sp.sum((fish_data - mu) ** 2) / N 93 | sigma_2_sample 94 | 95 | 96 | # In[11]: 97 | 98 | 99 | fish_data 100 | 101 | 102 | # In[12]: 103 | 104 | 105 | fish_data - mu 106 | 107 | 108 | # In[13]: 109 | 110 | 111 | (fish_data - mu) ** 2 112 | 113 | 114 | # In[14]: 115 | 116 | 117 | sp.sum((fish_data - mu) ** 2) 118 | 119 | 120 | # In[15]: 121 | 122 | 123 | # 计算样本方差的函数 124 | sp.var(fish_data, ddof = 0) 125 | 126 | 127 | # ### 6. 实现:无偏方差 128 | 129 | # In[16]: 130 | 131 | 132 | # 无偏方差 133 | sigma_2 = sp.sum((fish_data - mu) ** 2) / (N - 1) 134 | sigma_2 135 | 136 | 137 | # In[17]: 138 | 139 | 140 | # 无偏方差 141 | sp.var(fish_data, ddof = 1) 142 | 143 | 144 | # ### 7. 实现:标准差 145 | 146 | # In[18]: 147 | 148 | 149 | # 标准差 150 | sigma = sp.sqrt(sigma_2) 151 | sigma 152 | 153 | 154 | # In[19]: 155 | 156 | 157 | # 计算标准差的函数 158 | sp.std(fish_data, ddof = 1) 159 | 160 | 161 | # ### 8. 补充:标准化 162 | 163 | # In[20]: 164 | 165 | 166 | fish_data - mu 167 | 168 | 169 | # In[21]: 170 | 171 | 172 | sp.mean(fish_data - mu) 173 | 174 | 175 | # In[22]: 176 | 177 | 178 | fish_data / sigma 179 | 180 | 181 | # In[23]: 182 | 183 | 184 | sp.std(fish_data / sigma, ddof = 1) 185 | 186 | 187 | # In[24]: 188 | 189 | 190 | standard = (fish_data - mu) / sigma 191 | standard 192 | 193 | 194 | # In[25]: 195 | 196 | 197 | sp.mean(standard) 198 | 199 | 200 | # In[26]: 201 | 202 | 203 | sp.std(standard, ddof = 1) 204 | 205 | 206 | # ### 9. 补充:其他统计量 207 | 208 | # In[27]: 209 | 210 | 211 | # 最大值 212 | sp.amax(fish_data) 213 | 214 | 215 | # In[28]: 216 | 217 | 218 | # 最小值 219 | sp.amin(fish_data) 220 | 221 | 222 | # In[29]: 223 | 224 | 225 | # 中位数 226 | sp.median(fish_data) 227 | 228 | 229 | # In[30]: 230 | 231 | 232 | fish_data_2 = np.array([2,3,3,4,4,4,4,5,5,100]) 233 | 234 | 235 | # In[31]: 236 | 237 | 238 | sp.mean(fish_data_2) 239 | 240 | 241 | # In[32]: 242 | 243 | 244 | sp.median(fish_data_2) 245 | 246 | 247 | # ### 10. 实现:scipy.stats 与四分位数 248 | 249 | # In[33]: 250 | 251 | 252 | from scipy import stats 253 | 254 | 255 | # In[34]: 256 | 257 | 258 | fish_data_3 = np.array([1,2,3,4,5,6,7,8,9]) 259 | stats.scoreatpercentile(fish_data_3, 25) 260 | 261 | 262 | # In[35]: 263 | 264 | 265 | stats.scoreatpercentile(fish_data_3, 75) 266 | 267 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-10-列联表检验.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第 10 节 列联表检验\n", 8 | "\n", 9 | "## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 5. 实现:计算 p 值" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "# 用于数值计算的库\n", 28 | "import numpy as np\n", 29 | "import pandas as pd\n", 30 | "import scipy as sp\n", 31 | "from scipy import stats\n", 32 | "\n", 33 | "# 用于绘图的库\n", 34 | "from matplotlib import pyplot as plt\n", 35 | "import seaborn as sns\n", 36 | "sns.set()\n", 37 | "\n", 38 | "# 设置浮点数打印精度\n", 39 | "%precision 3\n", 40 | "# 在 Jupyter Notebook 里显示图形\n", 41 | "%matplotlib inline" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "0.009821437357809604" 53 | ] 54 | }, 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "# 计算 p 值\n", 62 | "1 - sp.stats.chi2.cdf(x = 6.667, df = 1)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### 6. 实现:列联表检验" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "scrolled": true 77 | }, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | " color click freq\n", 84 | "0 blue click 20\n", 85 | "1 blue not 230\n", 86 | "2 red click 10\n", 87 | "3 red not 40\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "# 读入数据\n", 93 | "click_data = pd.read_csv(\"3-10-1-click_data.csv\")\n", 94 | "print(click_data)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "click click not\n", 107 | "color \n", 108 | "blue 20 230\n", 109 | "red 10 40\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "# 转换为列联表\n", 115 | "cross = pd.pivot_table(\n", 116 | " data = click_data,\n", 117 | " values = \"freq\",\n", 118 | " aggfunc = \"sum\",\n", 119 | " index = \"color\",\n", 120 | " columns = \"click\"\n", 121 | ")\n", 122 | "print(cross)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "(6.666666666666666,\n", 134 | " 0.009823274507519247,\n", 135 | " 1,\n", 136 | " array([[ 25., 225.],\n", 137 | " [ 5., 45.]]))" 138 | ] 139 | }, 140 | "execution_count": 5, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "# 进行检验\n", 147 | "sp.stats.chi2_contingency(cross, correction = False)" 148 | ] 149 | } 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "Python 3", 154 | "language": "python", 155 | "name": "python3" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.7.6" 168 | }, 169 | "toc": { 170 | "base_numbering": 1, 171 | "nav_menu": {}, 172 | "number_sections": false, 173 | "sideBar": true, 174 | "skip_h1_title": false, 175 | "title_cell": "Table of Contents", 176 | "title_sidebar": "Contents", 177 | "toc_cell": false, 178 | "toc_position": {}, 179 | "toc_section_display": true, 180 | "toc_window_display": true 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 2 185 | } -------------------------------------------------------------------------------- /_build/jupyter_execute/3-10-列联表检验.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 10 节 列联表检验 5 | # 6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 7 | 8 | # ### 5. 实现:计算 p 值 9 | 10 | # In[1]: 11 | 12 | 13 | # 用于数值计算的库 14 | import numpy as np 15 | import pandas as pd 16 | import scipy as sp 17 | from scipy import stats 18 | 19 | # 用于绘图的库 20 | from matplotlib import pyplot as plt 21 | import seaborn as sns 22 | sns.set() 23 | 24 | # 设置浮点数打印精度 25 | get_ipython().run_line_magic('precision', '3') 26 | # 在 Jupyter Notebook 里显示图形 27 | get_ipython().run_line_magic('matplotlib', 'inline') 28 | 29 | 30 | # In[2]: 31 | 32 | 33 | # 计算 p 值 34 | 1 - sp.stats.chi2.cdf(x = 6.667, df = 1) 35 | 36 | 37 | # ### 6. 实现:列联表检验 38 | 39 | # In[3]: 40 | 41 | 42 | # 读入数据 43 | click_data = pd.read_csv("3-10-1-click_data.csv") 44 | print(click_data) 45 | 46 | 47 | # In[4]: 48 | 49 | 50 | # 转换为列联表 51 | cross = pd.pivot_table( 52 | data = click_data, 53 | values = "freq", 54 | aggfunc = "sum", 55 | index = "color", 56 | columns = "click" 57 | ) 58 | print(cross) 59 | 60 | 61 | # In[5]: 62 | 63 | 64 | # 进行检验 65 | sp.stats.chi2_contingency(cross, correction = False) 66 | 67 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-2-使用Python进行描述统计多变量.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 2 节 使用 Python 进行描述统计:多变量 5 | # 6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 4. 多变量数据的管理 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import pandas as pd 17 | import scipy as sp 18 | 19 | # 设置浮点数打印精度 20 | get_ipython().run_line_magic('precision', '3') 21 | 22 | 23 | # ### 5. 实现:求各分组的统计量 24 | 25 | # In[2]: 26 | 27 | 28 | fish_multi = pd.read_csv("3-2-1-fish_multi.csv") 29 | print(fish_multi) 30 | 31 | 32 | # In[3]: 33 | 34 | 35 | # 按鱼的种类计算 36 | group = fish_multi.groupby("species") 37 | print(group.mean()) 38 | 39 | 40 | # In[4]: 41 | 42 | 43 | print(group.std(ddof = 1)) 44 | 45 | 46 | # In[5]: 47 | 48 | 49 | group.describe() 50 | 51 | 52 | # ### 6. 实现:列联表 53 | 54 | # In[6]: 55 | 56 | 57 | shoes = pd.read_csv("3-2-2-shoes.csv") 58 | print(shoes) 59 | 60 | 61 | # In[7]: 62 | 63 | 64 | cross = pd.pivot_table( 65 | data = shoes, 66 | values = "sales", 67 | aggfunc = "sum", 68 | index = "store", 69 | columns = "color" 70 | ) 71 | print(cross) 72 | 73 | 74 | # ### 9. 实现:协方差 75 | 76 | # In[8]: 77 | 78 | 79 | cov_data = pd.read_csv("3-2-3-cov.csv") 80 | print(cov_data) 81 | 82 | 83 | # In[9]: 84 | 85 | 86 | # 读取数据的列 87 | x = cov_data["x"] 88 | y = cov_data["y"] 89 | 90 | # 求样本容量 91 | N = len(cov_data) 92 | 93 | # 求各变量均值 94 | mu_x = sp.mean(x) 95 | mu_y = sp.mean(y) 96 | 97 | 98 | # In[10]: 99 | 100 | 101 | # 样本协方差 102 | cov_sample = sum((x - mu_x) * (y - mu_y)) / N 103 | cov_sample 104 | 105 | 106 | # In[11]: 107 | 108 | 109 | # 协方差 110 | cov = sum((x - mu_x) * (y - mu_y)) / (N - 1) 111 | cov 112 | 113 | 114 | # ### 10. 实现:协方差矩阵 115 | 116 | # In[12]: 117 | 118 | 119 | # 样本协方差 120 | sp.cov(x, y, ddof = 0) 121 | 122 | 123 | # In[13]: 124 | 125 | 126 | # 无偏协方差 127 | sp.cov(x, y, ddof = 1) 128 | 129 | 130 | # ### 13. 实现:皮尔逊积矩相关系数 131 | 132 | # In[14]: 133 | 134 | 135 | # 计算两个变量的方差 136 | sigma_2_x = sp.var(x, ddof = 1) 137 | sigma_2_y = sp.var(y, ddof = 1) 138 | 139 | # 计算相关系数 140 | rho = cov / sp.sqrt(sigma_2_x * sigma_2_y) 141 | rho 142 | 143 | 144 | # In[15]: 145 | 146 | 147 | # 计算两个变量的方差 148 | sigma_2_x_sample = sp.var(x, ddof = 0) 149 | sigma_2_y_sample = sp.var(y, ddof = 0) 150 | 151 | # 计算相关系数 152 | cov_sample / sp.sqrt(sigma_2_x_sample * sigma_2_y_sample) 153 | 154 | 155 | # In[16]: 156 | 157 | 158 | # 相关矩阵 159 | sp.corrcoef(x, y) 160 | 161 | 162 | # In[ ]: 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-2-使用Python进行描述统计:多变量.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # 5 | # 6 | # # 第 2 节 使用 Python 进行描述统计:多变量 7 | # 8 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 9 | 10 | # ### 4. 多变量数据的管理 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import pandas as pd 17 | import scipy as sp 18 | 19 | # 设置浮点数打印精度 20 | get_ipython().run_line_magic('precision', '3') 21 | 22 | 23 | # ### 5. 实现:求各分组的统计量 24 | 25 | # In[2]: 26 | 27 | 28 | fish_multi = pd.read_csv("3-2-1-fish_multi.csv") 29 | print(fish_multi) 30 | 31 | 32 | # In[3]: 33 | 34 | 35 | # 按鱼的种类计算 36 | group = fish_multi.groupby("species") 37 | print(group.mean()) 38 | 39 | 40 | # In[4]: 41 | 42 | 43 | print(group.std(ddof = 1)) 44 | 45 | 46 | # In[5]: 47 | 48 | 49 | group.describe() 50 | 51 | 52 | # ### 6. 实现:列联表 53 | 54 | # In[6]: 55 | 56 | 57 | shoes = pd.read_csv("3-2-2-shoes.csv") 58 | print(shoes) 59 | 60 | 61 | # In[7]: 62 | 63 | 64 | cross = pd.pivot_table( 65 | data = shoes, 66 | values = "sales", 67 | aggfunc = "sum", 68 | index = "store", 69 | columns = "color" 70 | ) 71 | print(cross) 72 | 73 | 74 | # ### 9. 实现:协方差 75 | 76 | # In[8]: 77 | 78 | 79 | cov_data = pd.read_csv("3-2-3-cov.csv") 80 | print(cov_data) 81 | 82 | 83 | # In[9]: 84 | 85 | 86 | # 读取数据的列 87 | x = cov_data["x"] 88 | y = cov_data["y"] 89 | 90 | # 求样本容量 91 | N = len(cov_data) 92 | 93 | # 求各变量均值 94 | mu_x = sp.mean(x) 95 | mu_y = sp.mean(y) 96 | 97 | 98 | # In[10]: 99 | 100 | 101 | # 样本协方差 102 | cov_sample = sum((x - mu_x) * (y - mu_y)) / N 103 | cov_sample 104 | 105 | 106 | # In[11]: 107 | 108 | 109 | # 协方差 110 | cov = sum((x - mu_x) * (y - mu_y)) / (N - 1) 111 | cov 112 | 113 | 114 | # ### 10. 实现:协方差矩阵 115 | 116 | # In[12]: 117 | 118 | 119 | # 样本协方差 120 | sp.cov(x, y, ddof = 0) 121 | 122 | 123 | # In[13]: 124 | 125 | 126 | # 无偏协方差 127 | sp.cov(x, y, ddof = 1) 128 | 129 | 130 | # ### 13. 实现:皮尔逊积矩相关系数 131 | 132 | # In[14]: 133 | 134 | 135 | # 计算两个变量的方差 136 | sigma_2_x = sp.var(x, ddof = 1) 137 | sigma_2_y = sp.var(y, ddof = 1) 138 | 139 | # 计算相关系数 140 | rho = cov / sp.sqrt(sigma_2_x * sigma_2_y) 141 | rho 142 | 143 | 144 | # In[15]: 145 | 146 | 147 | # 计算两个变量的方差 148 | sigma_2_x_sample = sp.var(x, ddof = 0) 149 | sigma_2_y_sample = sp.var(y, ddof = 0) 150 | 151 | # 计算相关系数 152 | cov_sample / sp.sqrt(sigma_2_x_sample * sigma_2_y_sample) 153 | 154 | 155 | # In[16]: 156 | 157 | 158 | # 相关矩阵 159 | sp.corrcoef(x, y) 160 | 161 | 162 | # In[ ]: 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 3 节 基于 matplotlib、seaborn 的数据可视化 5 | # 6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 7 | 8 | # ### 2. 实现:数据可视化的环境准备 9 | 10 | # In[1]: 11 | 12 | 13 | # 用于数值计算的库 14 | import numpy as np 15 | import pandas as pd 16 | 17 | # 设置浮点数打印精度 18 | get_ipython().run_line_magic('precision', '3') 19 | 20 | # 用于绘图的库 21 | from matplotlib import pyplot as plt 22 | 23 | # 在 Jupyter Notebook 里显示图形 24 | get_ipython().run_line_magic('matplotlib', 'inline') 25 | 26 | 27 | # ### 3. 实现:用 pyplot 绘制折线图 28 | 29 | # In[2]: 30 | 31 | 32 | x = np.array([0,1,2,3,4,5,6,7,8,9]) 33 | y = np.array([2,3,4,3,5,4,6,7,4,8]) 34 | 35 | 36 | # In[3]: 37 | 38 | 39 | plt.plot(x, y, color = 'black') 40 | plt.title("lineplot matplotlib") 41 | plt.xlabel("x") 42 | plt.ylabel("y") 43 | 44 | 45 | # ### 4. 实现:用 seaborn 和 pyplot 绘制折线图 46 | 47 | # In[4]: 48 | 49 | 50 | import seaborn as sns 51 | sns.set() 52 | 53 | 54 | # In[5]: 55 | 56 | 57 | plt.plot(x, y, color = 'black') 58 | plt.title("lineplot seaborn") 59 | plt.xlabel("x") 60 | plt.ylabel("y") 61 | 62 | 63 | # ### 5. 实现:用 seaborn 绘制直方图 64 | 65 | # In[6]: 66 | 67 | 68 | fish_data = np.array([2,3,3,4,4,4,4,5,5,6]) 69 | fish_data 70 | 71 | 72 | # In[7]: 73 | 74 | 75 | sns.distplot(fish_data, bins = 5, 76 | color = 'black', kde = False) 77 | 78 | 79 | # ### 6. 实现:通过核密度估计将直方图平滑化 80 | 81 | # In[8]: 82 | 83 | 84 | sns.distplot(fish_data, bins = 1, 85 | color = 'black', kde = False) 86 | 87 | 88 | # In[9]: 89 | 90 | 91 | sns.distplot(fish_data, color = 'black') 92 | 93 | 94 | # ### 7. 实现:两个变量的直方图 95 | 96 | # In[10]: 97 | 98 | 99 | fish_multi = pd.read_csv("3-3-2-fish_multi_2.csv") 100 | print(fish_multi) 101 | 102 | 103 | # In[11]: 104 | 105 | 106 | print(fish_multi.groupby("species").describe()) 107 | 108 | 109 | # In[12]: 110 | 111 | 112 | # 按鱼的种类区分数据 113 | length_a = fish_multi.query('species == "A"')["length"] 114 | length_b = fish_multi.query('species == "B"')["length"] 115 | 116 | 117 | # In[13]: 118 | 119 | 120 | # 绘制这两个直方图 121 | sns.distplot(length_a, bins = 5, 122 | color = 'black', kde = False) 123 | sns.distplot(length_b, bins = 5, 124 | color = 'gray', kde = False) 125 | 126 | 127 | # ### 9. 实现:箱形图 128 | 129 | # In[14]: 130 | 131 | 132 | # 箱形图 133 | sns.boxplot(x = "species", y = "length", 134 | data = fish_multi, color = 'gray') 135 | 136 | 137 | # In[15]: 138 | 139 | 140 | fish_multi.groupby("species").describe() 141 | 142 | 143 | # ### 10. 实现:小提琴图 144 | 145 | # In[16]: 146 | 147 | 148 | sns.violinplot(x = "species", y = "length", 149 | data = fish_multi, color = 'gray') 150 | 151 | 152 | # ### 11. 实现:条形图 153 | 154 | # In[17]: 155 | 156 | 157 | sns.barplot(x = "species", y = "length", 158 | data = fish_multi, color = 'gray') 159 | 160 | 161 | # ### 12. 实现:散点图 162 | 163 | # In[18]: 164 | 165 | 166 | cov_data = pd.read_csv("3-2-3-cov.csv") 167 | print(cov_data) 168 | 169 | 170 | # In[19]: 171 | 172 | 173 | sns.jointplot(x = "x", y = "y", 174 | data = cov_data, color = 'black') 175 | 176 | 177 | # ### 13. 实现:散点图矩阵 178 | 179 | # In[20]: 180 | 181 | 182 | # 导入 seaborn 内置的鸢尾花数据 183 | iris = sns.load_dataset("iris") 184 | iris.head(n = 3) 185 | 186 | 187 | # In[21]: 188 | 189 | 190 | # 每种类鸢尾花各个规格的均值 191 | iris.groupby("species").mean() 192 | 193 | 194 | # In[22]: 195 | 196 | 197 | # 散点图矩阵 198 | sns.pairplot(iris, hue="species", palette='gray') 199 | 200 | 201 | # In[ ]: 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_11_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_11_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_13_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_13_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_14_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_14_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_19_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_19_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_21_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_21_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_24_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_24_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_26_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_26_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_29_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_29_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_33_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_33_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_5_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_8_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_8_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-4-用Python模拟抽样.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 4 节 用 Python 模拟抽样 5 | # 6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 1. 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 设置浮点数打印精度 27 | get_ipython().run_line_magic('precision', '3') 28 | # 在 Jupyter Notebook 里显示图形 29 | get_ipython().run_line_magic('matplotlib', 'inline') 30 | 31 | 32 | # ### 3. 在只有 5 条鱼的湖中抽样 33 | 34 | # In[2]: 35 | 36 | 37 | fish_5 = np.array([2,3,4,5,6]) 38 | fish_5 39 | 40 | 41 | # In[3]: 42 | 43 | 44 | # 从总体中随机抽样 45 | np.random.choice(fish_5, size = 1, replace = False) 46 | 47 | 48 | # In[4]: 49 | 50 | 51 | # 从总体中随机抽样 52 | np.random.choice(fish_5, size = 3, replace = False) 53 | 54 | 55 | # In[5]: 56 | 57 | 58 | np.random.choice(fish_5, size = 3, replace = False) 59 | 60 | 61 | # In[6]: 62 | 63 | 64 | # 设定随机数种子以得到相同结果 65 | np.random.seed(1) 66 | np.random.choice(fish_5, size = 3, replace = False) 67 | 68 | 69 | # In[7]: 70 | 71 | 72 | np.random.seed(1) 73 | np.random.choice(fish_5, size = 3, replace = False) 74 | 75 | 76 | # In[8]: 77 | 78 | 79 | # 计算样本均值 80 | np.random.seed(1) 81 | sp.mean( 82 | np.random.choice(fish_5, size = 3, replace = False) 83 | ) 84 | 85 | 86 | # ### 6. 从鱼较多的湖中抽样 87 | 88 | # In[9]: 89 | 90 | 91 | # 鱼较多的总体 92 | fish_100000 = pd.read_csv( 93 | "3-4-1-fish_length_100000.csv")["length"] 94 | fish_100000.head() 95 | 96 | 97 | # In[10]: 98 | 99 | 100 | len(fish_100000) 101 | 102 | 103 | # In[11]: 104 | 105 | 106 | # 抽样模拟实验 107 | sampling_result = np.random.choice( 108 | fish_100000, size = 10, replace = False) 109 | sampling_result 110 | 111 | 112 | # In[12]: 113 | 114 | 115 | # 样本均值 116 | sp.mean(sampling_result) 117 | 118 | 119 | # ### 7. 总体分布 120 | 121 | # In[13]: 122 | 123 | 124 | sp.mean(fish_100000) 125 | 126 | 127 | # In[14]: 128 | 129 | 130 | sp.std(fish_100000, ddof = 0) 131 | 132 | 133 | # In[15]: 134 | 135 | 136 | sp.var(fish_100000, ddof = 0) 137 | 138 | 139 | # In[16]: 140 | 141 | 142 | sns.distplot(fish_100000, kde = False, color = 'black') 143 | 144 | 145 | # ### 8. 对比总体分布和正态分布的概率密度函数 146 | 147 | # In[17]: 148 | 149 | 150 | x = np.arange(start = 1, stop = 7.1, step = 0.1) 151 | x 152 | 153 | 154 | # In[18]: 155 | 156 | 157 | stats.norm.pdf(x = x, loc = 4, scale = 0.8) 158 | 159 | 160 | # In[19]: 161 | 162 | 163 | plt.plot(x, 164 | stats.norm.pdf(x = x, loc = 4, scale = 0.8), 165 | color = 'black') 166 | 167 | 168 | # In[20]: 169 | 170 | 171 | # 把正态分布的概率密度和总体的直方图重合 172 | sns.distplot(fish_100000, kde = False, 173 | norm_hist = True, color = 'black') 174 | plt.plot(x, 175 | stats.norm.pdf(x = x, loc = 4, scale = 0.8), 176 | color = 'black') 177 | 178 | 179 | # ### 9. 抽样过程的抽象描述 180 | 181 | # In[21]: 182 | 183 | 184 | sampling_norm = stats.norm.rvs( 185 | loc = 4, scale = 0.8, size = 10) 186 | sampling_norm 187 | 188 | 189 | # In[22]: 190 | 191 | 192 | # 样本均值 193 | sp.mean(sampling_norm) 194 | 195 | 196 | # In[ ]: 197 | 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-4-用Python模拟抽样_20_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-4-用Python模拟抽样_20_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-4-用Python模拟抽样_24_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-4-用Python模拟抽样_24_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-4-用Python模拟抽样_25_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-4-用Python模拟抽样_25_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_11_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_11_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_16_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_22_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_22_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_27_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_27_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_30_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_30_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_41_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_41_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-5-样本统计量的性质_43_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_43_3.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-6-正态分布及其应用.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 6 节 正态分布及其应用 5 | # 6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 1. 导入函数库 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 设置浮点数打印精度 27 | get_ipython().run_line_magic('precision', '3') 28 | # 在 Jupyter Notebook 里显示图形 29 | get_ipython().run_line_magic('matplotlib', 'inline') 30 | 31 | 32 | # ### 2. 实现:概率密度 33 | 34 | # In[2]: 35 | 36 | 37 | # 圆周率 38 | sp.pi 39 | 40 | 41 | # In[3]: 42 | 43 | 44 | # 指数函数 45 | sp.exp(1) 46 | 47 | 48 | # In[4]: 49 | 50 | 51 | # 均值为 4 标准差为 0.8 的正态分布在随机变量为 3 时的概率密度 52 | x = 3 53 | mu = 4 54 | sigma = 0.8 55 | 56 | 1 / (sp.sqrt(2 * sp.pi * sigma**2)) * sp.exp(- ((x - mu)**2) / (2 * sigma**2)) 57 | 58 | 59 | # In[5]: 60 | 61 | 62 | stats.norm.pdf(loc = 4, scale = 0.8, x = 3) 63 | 64 | 65 | # In[6]: 66 | 67 | 68 | norm_dist = stats.norm(loc = 4, scale = 0.8) 69 | norm_dist.pdf(x = 3) 70 | 71 | 72 | # In[7]: 73 | 74 | 75 | x_plot = np.arange(start = 1, stop = 7.1, step = 0.1) 76 | plt.plot( 77 | x_plot, 78 | stats.norm.pdf(x = x_plot, loc = 4, scale = 0.8), 79 | color = 'black' 80 | ) 81 | 82 | 83 | # ### 3. 样本小于等于某值的比例 84 | 85 | # In[8]: 86 | 87 | 88 | np.random.seed(1) 89 | simulated_sample = stats.norm.rvs( 90 | loc = 4, scale = 0.8, size = 100000) 91 | simulated_sample 92 | 93 | 94 | # In[9]: 95 | 96 | 97 | sp.sum(simulated_sample <= 3) 98 | 99 | 100 | # In[10]: 101 | 102 | 103 | sp.sum(simulated_sample <= 3) / len(simulated_sample) 104 | 105 | 106 | # ### 5. 实现:累积分布函数 107 | 108 | # In[11]: 109 | 110 | 111 | stats.norm.cdf(loc = 4, scale = 0.8, x = 3) 112 | 113 | 114 | # In[12]: 115 | 116 | 117 | stats.norm.cdf(loc = 4, scale = 0.8, x = 4) 118 | 119 | 120 | # ### 7. 实现:百分位数 121 | 122 | # In[13]: 123 | 124 | 125 | stats.norm.ppf(loc = 4, scale = 0.8, q = 0.025) 126 | 127 | 128 | # In[14]: 129 | 130 | 131 | left = stats.norm.cdf(loc = 4, scale = 0.8, x = 3) 132 | stats.norm.ppf(loc = 4, scale = 0.8, q = left) 133 | 134 | 135 | # In[15]: 136 | 137 | 138 | stats.norm.ppf(loc = 4, scale = 0.8, q = 0.5) 139 | 140 | 141 | # ### 10. t 值的样本分布 142 | 143 | # In[16]: 144 | 145 | 146 | # 随机数种子 147 | np.random.seed(1) 148 | # 存放 t 值的空间 149 | t_value_array = np.zeros(10000) 150 | # 实例化一个正态分布 151 | norm_dist = stats.norm(loc = 4, scale = 0.8) 152 | # 开始实验 153 | for i in range(0, 10000): 154 | sample = norm_dist.rvs(size = 10) 155 | sample_mean = sp.mean(sample) 156 | sample_std = sp.std(sample, ddof = 1) 157 | sample_se = sample_std / sp.sqrt(len(sample)) 158 | t_value_array[i] = (sample_mean - 4) / sample_se 159 | 160 | 161 | # In[17]: 162 | 163 | 164 | # t 值的直方图 165 | sns.distplot(t_value_array, color = 'black') 166 | 167 | # 标准正态分布的概率祺 168 | x = np.arange(start = -8, stop = 8.1, step = 0.1) 169 | plt.plot(x, stats.norm.pdf(x = x), 170 | color = 'black', linestyle = 'dotted') 171 | 172 | 173 | # ### 12. 实现:t 分布 174 | 175 | # In[18]: 176 | 177 | 178 | plt.plot(x, stats.norm.pdf(x = x), 179 | color = 'black', linestyle = 'dotted') 180 | plt.plot(x, stats.t.pdf(x = x, df = 9), 181 | color = 'black') 182 | 183 | 184 | # In[19]: 185 | 186 | 187 | sns.distplot(t_value_array, 188 | color = 'black', norm_hist = True) 189 | plt.plot(x, stats.t.pdf(x = x, df = 9), 190 | color = 'black', linestyle = 'dotted') 191 | 192 | 193 | # In[ ]: 194 | 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-6-正态分布及其应用_23_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_23_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-6-正态分布及其应用_25_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_25_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-6-正态分布及其应用_26_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_26_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-6-正态分布及其应用_9_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_9_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/3-7-参数估计.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 7 节 参数估计 5 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 6 | # 7 | # 8 | 9 | # ### 2. 环境准备 10 | 11 | # In[1]: 12 | 13 | 14 | # 用于数值计算的库 15 | import numpy as np 16 | import pandas as pd 17 | import scipy as sp 18 | from scipy import stats 19 | 20 | # 用于绘图的库 21 | from matplotlib import pyplot as plt 22 | import seaborn as sns 23 | sns.set() 24 | 25 | # 设置浮点数打印精度 26 | get_ipython().run_line_magic('precision', '3') 27 | # 在 Jupyter Notebook 里显示图形 28 | get_ipython().run_line_magic('matplotlib', 'inline') 29 | 30 | 31 | # In[2]: 32 | 33 | 34 | # 读入数据 35 | fish = pd.read_csv("3-7-1-fish_length.csv")["length"] 36 | fish 37 | 38 | 39 | # ### 4. 实现:点估计 40 | 41 | # In[3]: 42 | 43 | 44 | # 总体均值的点估计 45 | mu = sp.mean(fish) 46 | mu 47 | 48 | 49 | # In[4]: 50 | 51 | 52 | # 总体方差的点估计 53 | sigma_2 = sp.var(fish, ddof = 1) 54 | sigma_2 55 | 56 | 57 | # ### 9. 实现:区间估计 58 | 59 | # In[5]: 60 | 61 | 62 | # 自由度 63 | df = len(fish) - 1 64 | df 65 | 66 | 67 | # In[6]: 68 | 69 | 70 | # 标准误差 71 | se = sigma / sp.sqrt(len(fish)) 72 | se 73 | 74 | 75 | # In[8]: 76 | 77 | 78 | # 区间估计 79 | interval = stats.t.interval( 80 | alpha = 0.95, df = df, loc = mu, scale = se) 81 | interval 82 | 83 | 84 | # ### 10. 补充:置信区间的求解细节 85 | 86 | # In[9]: 87 | 88 | 89 | # 97.5% 分位数 90 | t_975 = stats.t.ppf(q = 0.975, df = df) 91 | t_975 92 | 93 | 94 | # In[10]: 95 | 96 | 97 | # 下置信界限 98 | lower = mu - t_975 * se 99 | lower 100 | 101 | 102 | # In[11]: 103 | 104 | 105 | # 上置信界限 106 | upper = mu + t_975 * se 107 | upper 108 | 109 | 110 | # ### 11. 决定置信区间大小的因素 111 | 112 | # In[12]: 113 | 114 | 115 | # 样本方差越大, 置信区间越大 116 | se2 = (sigma*10) / sp.sqrt(len(fish)) 117 | stats.t.interval( 118 | alpha = 0.95, df = df, loc = mu, scale = se2) 119 | 120 | 121 | # In[13]: 122 | 123 | 124 | # 样本容量越大, 置信区间越小 125 | df2 = (len(fish)*10) - 1 126 | se3 = sigma / sp.sqrt(len(fish)*10) 127 | stats.t.interval( 128 | alpha = 0.95, df = df2, loc = mu, scale = se3) 129 | 130 | 131 | # In[14]: 132 | 133 | 134 | # 99% 置信区间 135 | stats.t.interval( 136 | alpha = 0.99, df = df, loc = mu, scale = se) 137 | 138 | 139 | # ### 12. 区间估计结果的解读 140 | 141 | # In[19]: 142 | 143 | 144 | # 如果置信区间包含总体均值 (4) 就取 True 145 | be_included_array = np.zeros(20000, dtype = "bool") 146 | be_included_array 147 | 148 | 149 | # In[20]: 150 | 151 | 152 | # 执行 20,000 次求 95% 置信区间的操作 153 | # 如果置信区间包含总体均值 (4) 就取 True 154 | np.random.seed(1) 155 | norm_dist = stats.norm(loc = 4, scale = 0.8) 156 | for i in range(0, 20000): 157 | sample = norm_dist.rvs(size = 10) 158 | df = len(sample) - 1 159 | mu = sp.mean(sample) 160 | std = sp.std(sample, ddof = 1) 161 | se = std / sp.sqrt(len(sample)) 162 | interval = stats.t.interval(0.95, df, mu, se) 163 | if(interval[0] <= 4 and interval[1] >= 4): 164 | be_included_array[i] = True 165 | 166 | 167 | # In[21]: 168 | 169 | 170 | sum(be_included_array) / len(be_included_array) 171 | 172 | 173 | # In[ ]: 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-8-假设检验.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 8 节 假设检验 5 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 6 | # 7 | # 8 | 9 | # ### 13. t 检验的实现:环境准备 10 | 11 | # In[1]: 12 | 13 | 14 | # 用于数值计算的库 15 | import numpy as np 16 | import pandas as pd 17 | import scipy as sp 18 | from scipy import stats 19 | 20 | # 用于绘图的库 21 | from matplotlib import pyplot as plt 22 | import seaborn as sns 23 | sns.set() 24 | 25 | # 设置浮点数打印精度 26 | get_ipython().run_line_magic('precision', '3') 27 | # 在 Jupyter Notebook 里显示图形 28 | get_ipython().run_line_magic('matplotlib', 'inline') 29 | 30 | 31 | # In[2]: 32 | 33 | 34 | # 读入数据 35 | junk_food = pd.read_csv( 36 | "3-8-1-junk-food-weight.csv")["weight"] 37 | junk_food.head() 38 | 39 | 40 | # ### 14. t 检验的实现:计算 t 值 41 | 42 | # In[3]: 43 | 44 | 45 | # 样本均值 46 | mu = sp.mean(junk_food) 47 | mu 48 | 49 | 50 | # In[4]: 51 | 52 | 53 | # 自由度 54 | df = len(junk_food) - 1 55 | df 56 | 57 | 58 | # In[5]: 59 | 60 | 61 | # 标准误差 62 | sigma = sp.std(junk_food, ddof = 1) 63 | se = sigma / sp.sqrt(len(junk_food)) 64 | se 65 | 66 | 67 | # In[6]: 68 | 69 | 70 | # t 值 71 | t_value = (mu - 50) / se 72 | t_value 73 | 74 | 75 | # ### 15. t 检验的实现:计算 p 值 76 | 77 | # In[7]: 78 | 79 | 80 | # p 值 81 | alpha = stats.t.cdf(t_value, df = df) 82 | (1 - alpha) * 2 83 | 84 | 85 | # In[8]: 86 | 87 | 88 | # t 检验 89 | stats.ttest_1samp(junk_food, 50) 90 | 91 | 92 | # ### 16. 通过模拟实验计算 p 值 93 | 94 | # In[9]: 95 | 96 | 97 | # 样本的相关信息 (一部分) 98 | size = len(junk_food) 99 | sigma = sp.std(junk_food, ddof = 1) 100 | 101 | 102 | # In[10]: 103 | 104 | 105 | # 存放 t 值的窗口 106 | t_value_array = np.zeros(50000) 107 | 108 | 109 | # In[11]: 110 | 111 | 112 | # 总体均值为 50, 以接受零假设为前提进行 50,000 次抽样并计算 t 值的实验 113 | np.random.seed(1) 114 | norm_dist = stats.norm(loc = 50, scale = sigma) 115 | for i in range(0, 50000): 116 | sample = norm_dist.rvs(size = size) 117 | sample_mean = sp.mean(sample) 118 | sample_std = sp.std(sample, ddof = 1) 119 | sample_se = sample_std / sp.sqrt(size) 120 | t_value_array[i] = (sample_mean - 50) / sample_se 121 | 122 | 123 | # In[12]: 124 | 125 | 126 | (sum(t_value_array > t_value) / 50000) * 2 127 | 128 | 129 | # In[ ]: 130 | 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /_build/jupyter_execute/3-9-均值差的检验.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 9 节 均值差的检验 5 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学 6 | # 7 | # 8 | 9 | # ### 3. 实现:实验准备 10 | 11 | # In[1]: 12 | 13 | 14 | # 用于数值计算的库 15 | import numpy as np 16 | import pandas as pd 17 | import scipy as sp 18 | from scipy import stats 19 | 20 | # 用于绘图的库 21 | from matplotlib import pyplot as plt 22 | import seaborn as sns 23 | sns.set() 24 | 25 | # 设置浮点数打印精度 26 | get_ipython().run_line_magic('precision', '3') 27 | # 在 Jupyter Notebook 里显示图形 28 | get_ipython().run_line_magic('matplotlib', 'inline') 29 | 30 | 31 | # In[2]: 32 | 33 | 34 | # 读入数据 35 | paired_test_data = pd.read_csv( 36 | "3-9-1-paired-t-test.csv") 37 | print(paired_test_data) 38 | 39 | 40 | # ### 4. 实现:配对样本 t 检验 41 | 42 | # In[3]: 43 | 44 | 45 | # 服药前后的样本均值 46 | before = paired_test_data.query( 47 | 'medicine == "before"')["body_temperature"] 48 | after = paired_test_data.query( 49 | 'medicine == "after"')["body_temperature"] 50 | # 转为数组类型 51 | before = np.array(before) 52 | after = np.array(after) 53 | # 计算差值 54 | diff = after - before 55 | diff 56 | 57 | 58 | # In[4]: 59 | 60 | 61 | # 检验均值是否与 0 存在差异 62 | stats.ttest_1samp(diff, 0) 63 | 64 | 65 | # In[5]: 66 | 67 | 68 | # 配对样本 t 检验 69 | stats.ttest_rel(after, before) 70 | 71 | 72 | # ### 6. 实现:独立样本 t 检验 73 | 74 | # In[6]: 75 | 76 | 77 | # 均值 78 | mean_bef = sp.mean(before) 79 | mean_aft = sp.mean(after) 80 | 81 | # 方差 82 | sigma_bef = sp.var(before, ddof = 1) 83 | sigma_aft = sp.var(after, ddof = 1) 84 | 85 | # 样本容量 86 | m = len(before) 87 | n = len(after) 88 | 89 | # t 值 90 | t_value = (mean_aft - mean_bef) / sp.sqrt((sigma_bef/m + sigma_aft/n)) 91 | t_value 92 | 93 | 94 | # In[7]: 95 | 96 | 97 | stats.ttest_ind(after, before, equal_var = False) 98 | 99 | 100 | # In[ ]: 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # 5 | # # 第 5 章 正态线性模型 6 | # 7 | # ## 第 1 节 含有单个连续型解释变量的模型(一元回归) 8 | 9 | # ### 1. 环境准备 10 | 11 | # In[1]: 12 | 13 | 14 | # 用于数值计算的库 15 | import numpy as np 16 | import pandas as pd 17 | import scipy as sp 18 | from scipy import stats 19 | 20 | # 用于绘图的库 21 | from matplotlib import pyplot as plt 22 | import seaborn as sns 23 | sns.set() 24 | 25 | # 用于估计统计模型的库 (部分版本会报出警告信息) 26 | import statsmodels.formula.api as smf 27 | import statsmodels.api as sm 28 | 29 | # 设置浮点数打印精度 30 | get_ipython().run_line_magic('precision', '3') 31 | # 在 Jupyter Notebook 里显示图形 32 | get_ipython().run_line_magic('matplotlib', 'inline') 33 | 34 | 35 | # ### 2. 实现:读入数据并绘制其图形 36 | 37 | # In[2]: 38 | 39 | 40 | # 读入数据 41 | beer = pd.read_csv("5-1-1-beer.csv") 42 | print(beer.head()) 43 | 44 | 45 | # In[3]: 46 | 47 | 48 | # 绘制图像 49 | sns.jointplot(x = "temperature", y = "beer", 50 | data = beer, color = 'black') 51 | 52 | 53 | # ### 4. 实现:使用 statsmodels 实现模型化 54 | 55 | # In[4]: 56 | 57 | 58 | # 建模 59 | lm_model = smf.ols(formula = "beer ~ temperature", 60 | data = beer).fit() 61 | 62 | 63 | # ### 5. 实现:打印估计结果并检验系数 64 | 65 | # In[5]: 66 | 67 | 68 | # 打印估计的结果 69 | lm_model.summary() 70 | 71 | 72 | # ### 7. 实现:使用 AIC 进行模型选择 73 | 74 | # In[6]: 75 | 76 | 77 | # 空模型 78 | null_model = smf.ols("beer ~ 1", data = beer).fit() 79 | 80 | 81 | # In[7]: 82 | 83 | 84 | # 空模型的 AIC 85 | null_model.aic 86 | 87 | 88 | # In[8]: 89 | 90 | 91 | # 含有解释变量的模型的 AIC 92 | lm_model.aic 93 | 94 | 95 | # In[9]: 96 | 97 | 98 | # 对数似然度 99 | lm_model.llf 100 | 101 | 102 | # In[10]: 103 | 104 | 105 | # 解释变量的个数 106 | lm_model.df_model 107 | 108 | 109 | # In[11]: 110 | 111 | 112 | # AIC 113 | -2*(lm_model.llf - (lm_model.df_model + 1)) 114 | 115 | 116 | # ### 9. 实现:用 seaborn 绘制回归直线 117 | 118 | # In[12]: 119 | 120 | 121 | sns.lmplot(x = "temperature", y = "beer", data = beer, 122 | scatter_kws = {"color": "black"}, 123 | line_kws = {"color": "black"}) 124 | 125 | 126 | # ### 10. 实现:使用模型进行预测 127 | 128 | # In[13]: 129 | 130 | 131 | # 拟合值 132 | lm_model.predict() 133 | 134 | 135 | # In[14]: 136 | 137 | 138 | # 预测 139 | lm_model.predict(pd.DataFrame({"temperature":[0]})) 140 | 141 | 142 | # In[15]: 143 | 144 | 145 | # 气温为 0 度时的预测值等于截距 146 | lm_model.params 147 | 148 | 149 | # In[16]: 150 | 151 | 152 | # 预测 153 | lm_model.predict(pd.DataFrame({"temperature":[20]})) 154 | 155 | 156 | # In[17]: 157 | 158 | 159 | # 不使用 predict 函数进行预测 160 | beta0 = lm_model.params[0] 161 | beta1 = lm_model.params[1] 162 | temperature = 20 163 | 164 | beta0 + beta1 * temperature 165 | 166 | 167 | # ### 11. 实现:获取残差 168 | 169 | # In[18]: 170 | 171 | 172 | # 获得残差 173 | resid = lm_model.resid 174 | resid.head(3) 175 | 176 | 177 | # In[19]: 178 | 179 | 180 | # 计算拟合值 181 | y_hat = beta0 + beta1 * beer.temperature 182 | y_hat.head(3) 183 | 184 | 185 | # In[20]: 186 | 187 | 188 | # 获得拟合值 189 | lm_model.fittedvalues.head(3) 190 | 191 | 192 | # In[21]: 193 | 194 | 195 | # 手动计算残差 196 | (beer.beer - y_hat).head(3) 197 | 198 | 199 | # ### 13. 实现:决定系数 200 | 201 | # In[22]: 202 | 203 | 204 | # 决定系数 205 | mu = sp.mean(beer.beer) 206 | y = beer.beer 207 | yhat = lm_model.predict() 208 | 209 | sp.sum((yhat - mu)**2) / sp.sum((y - mu)**2) 210 | 211 | 212 | # In[23]: 213 | 214 | 215 | lm_model.rsquared 216 | 217 | 218 | # In[24]: 219 | 220 | 221 | sp.sum((yhat - mu)**2) + sum(resid**2) 222 | 223 | 224 | # In[25]: 225 | 226 | 227 | sp.sum((y - mu)**2) 228 | 229 | 230 | # In[26]: 231 | 232 | 233 | 1 - sp.sum(resid**2) / sp.sum((y - mu)**2) 234 | 235 | 236 | # ### 15. 实现:修正决定系数 237 | 238 | # In[27]: 239 | 240 | 241 | n = len(beer.beer) 242 | s = 1 243 | 1 - ((sp.sum(resid**2) / (n - s - 1)) / 244 | (sp.sum((y - mu)**2) / (n - 1))) 245 | 246 | 247 | # In[28]: 248 | 249 | 250 | lm_model.rsquared_adj 251 | 252 | 253 | # ### 16. 实现:残差的直方图和散点图 254 | 255 | # In[29]: 256 | 257 | 258 | # 残差的直方图 259 | sns.distplot(resid, color = 'black') 260 | 261 | 262 | # In[30]: 263 | 264 | 265 | # 残差的散点图 266 | sns.jointplot(lm_model.fittedvalues, resid, 267 | joint_kws={"color": "black"}, 268 | marginal_kws={"color": "black"}) 269 | 270 | 271 | # ### 18. 实现:分位图 272 | 273 | # In[31]: 274 | 275 | 276 | # 分位图 277 | fig = sm.qqplot(resid, line = "s") 278 | 279 | 280 | # In[32]: 281 | 282 | 283 | # 递增排列 284 | resid_sort = resid.sort_values() 285 | resid_sort.head() 286 | 287 | 288 | # In[33]: 289 | 290 | 291 | # 最小的数据所在位置 292 | 1 / 31 293 | 294 | 295 | # In[34]: 296 | 297 | 298 | # 按样本容量变换为 0 到 1 的范围, 得到理论累积概率 299 | # 300 | nobs = len(resid_sort) 301 | cdf = np.arange(1, nobs + 1) / (nobs + 1) 302 | cdf 303 | 304 | 305 | # In[35]: 306 | 307 | 308 | # 累积概率对应的百分位数 309 | ppf = stats.norm.ppf(cdf) 310 | ppf 311 | 312 | 313 | # In[36]: 314 | 315 | 316 | # 参考: 横轴为理论分位数, 纵轴为已排序的实际数据, 绘出的散点图就是分位图 317 | fig = sm.qqplot(resid, line = "s") 318 | 319 | plt.plot(stats.norm.ppf(cdf), resid_sort, "o", color = "black") 320 | 321 | 322 | # ### 19. 根据 summary 函数的输出分析残差 323 | 324 | # In[37]: 325 | 326 | 327 | # 打印估计的结果 328 | lm_model.summary() 329 | 330 | -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_18_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归_40_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_40_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归_41_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_41_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归_43_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_43_0.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归_48_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_48_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-1-一元回归_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_5_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-2-方差分析.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 2 节 方差分析 5 | # 6 | # ## 第 5 章 正态线性模型|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 8. 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 用于估计统计模型的库 (部分版本会报出警告信息) 27 | import statsmodels.formula.api as smf 28 | import statsmodels.api as sm 29 | 30 | # 设置浮点数打印精度 31 | get_ipython().run_line_magic('precision', '3') 32 | # 在 Jupyter Notebook 里显示图形 33 | get_ipython().run_line_magic('matplotlib', 'inline') 34 | 35 | 36 | # ### 9. 生成数据并可视化 37 | 38 | # In[2]: 39 | 40 | 41 | # 定义一组示例数据 42 | weather = [ 43 | "cloudy","cloudy", 44 | "rainy","rainy", 45 | "sunny","sunny" 46 | ] 47 | beer = [6,8,2,4,10,12] 48 | 49 | # 转换成数据帧 50 | weather_beer = pd.DataFrame({ 51 | "beer" : beer, 52 | "weather": weather 53 | }) 54 | print(weather_beer) 55 | 56 | 57 | # In[3]: 58 | 59 | 60 | # 绘出箱形图 61 | sns.boxplot(x = "weather",y = "beer", 62 | data = weather_beer, color='gray') 63 | 64 | 65 | # In[4]: 66 | 67 | 68 | # 每种天气下销售额的均值 69 | print(weather_beer.groupby("weather").mean()) 70 | 71 | 72 | # ### 10. 实现:方差分析①:计算组间平方和与组内平方和 73 | 74 | # In[5]: 75 | 76 | 77 | # 天气的影响 (对应 weather 变量) 78 | effect = [7,7,3,3,11,11] 79 | 80 | 81 | # In[6]: 82 | 83 | 84 | # 组间偏差平方和 85 | mu_effect = sp.mean(effect) 86 | squares_model = sp.sum((effect - mu_effect) ** 2 ) 87 | squares_model 88 | 89 | 90 | # In[7]: 91 | 92 | 93 | # 无法用天气来解释的部分, 即误差 94 | resid = weather_beer.beer - effect 95 | resid 96 | 97 | 98 | # In[8]: 99 | 100 | 101 | # 组内偏差平方和 102 | squares_resid = sp.sum(resid ** 2) 103 | squares_resid 104 | 105 | 106 | # ### 11. 实现:方差分析②:计算组间方差与组内方差 107 | 108 | # In[9]: 109 | 110 | 111 | df_model = 2 # 组间差异的自由度 112 | df_resid = 3 # 组内差异的自由度 113 | 114 | 115 | # In[10]: 116 | 117 | 118 | # 组间均方 (方差) 119 | variance_model = squares_model / df_model 120 | variance_model 121 | 122 | 123 | # In[11]: 124 | 125 | 126 | # 组内均方 (方差) 127 | variance_resid = squares_resid / df_resid 128 | variance_resid 129 | 130 | 131 | # ### 12. 实现:方差分析③:计算 p 值 132 | 133 | # In[12]: 134 | 135 | 136 | # F 比 137 | f_ratio = variance_model / variance_resid 138 | f_ratio 139 | 140 | 141 | # In[13]: 142 | 143 | 144 | # p 值 145 | 1 - sp.stats.f.cdf(x=f_ratio,dfn=df_model,dfd=df_resid) 146 | 147 | 148 | # ### 15. 实现:statsmodels 中的方差分析 149 | 150 | # In[14]: 151 | 152 | 153 | # 建立正态线性模型 154 | anova_model = smf.ols("beer ~ weather", 155 | data = weather_beer).fit() 156 | 157 | 158 | # In[15]: 159 | 160 | 161 | # 方差分析的结果 162 | print(sm.stats.anova_lm(anova_model, typ=2)) 163 | 164 | 165 | # ### 17. 模型系数的含义 166 | 167 | # In[16]: 168 | 169 | 170 | anova_model.params 171 | 172 | 173 | # ### 18. 使用模型分离效应和误差 174 | 175 | # In[17]: 176 | 177 | 178 | # 拟合值 179 | fitted = anova_model.fittedvalues 180 | fitted 181 | 182 | 183 | # In[18]: 184 | 185 | 186 | # 残差 187 | anova_model.resid 188 | 189 | 190 | # ### 19. 回归模型中的方差分析 191 | 192 | # In[19]: 193 | 194 | 195 | # 读取数据 196 | beer = pd.read_csv("5-1-1-beer.csv") 197 | 198 | # 估计模型 199 | lm_model = smf.ols(formula = "beer ~ temperature", 200 | data = beer).fit() 201 | 202 | 203 | # In[20]: 204 | 205 | 206 | df_lm_model = 1 # 模型自由度 207 | df_lm_resid = 28 # 残差自由度 208 | 209 | 210 | # In[21]: 211 | 212 | 213 | # 拟合值 214 | lm_effect = lm_model.fittedvalues 215 | # 残差 216 | lm_resid = lm_model.resid 217 | # 气温的影响力度 218 | mu = sp.mean(lm_effect) 219 | squares_lm_model = sp.sum((lm_effect - mu) ** 2) 220 | variance_lm_model = squares_lm_model / df_lm_model 221 | # 残差的方差 222 | squares_lm_resid = sp.sum((lm_resid) ** 2) 223 | variance_lm_resid = squares_lm_resid / df_lm_resid 224 | # F 比 225 | f_value_lm = variance_lm_model / variance_lm_resid 226 | f_value_lm 227 | 228 | 229 | # In[22]: 230 | 231 | 232 | # 参考: p 值 (结果在截断后约等于 0) 233 | 1 - sp.stats.f.cdf( 234 | x=f_value_lm,dfn=df_lm_model,dfd=df_lm_resid) 235 | 236 | 237 | # In[23]: 238 | 239 | 240 | # 方差分析表 241 | print(sm.stats.anova_lm(lm_model, typ=2)) 242 | 243 | 244 | # In[24]: 245 | 246 | 247 | # 模型的 sumamry 248 | lm_model.summary() 249 | 250 | 251 | # In[ ]: 252 | 253 | 254 | 255 | 256 | -------------------------------------------------------------------------------- /_build/jupyter_execute/5-2-方差分析_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-2-方差分析_5_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-3-含有多个解释变量的模型.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 3 节 含有多个解释变量的模型 5 | # 6 | # ## 第 5 章 正态线性模型|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 1. 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 用于估计统计模型的库 (部分版本会报出警告信息) 27 | import statsmodels.formula.api as smf 28 | import statsmodels.api as sm 29 | 30 | # 设置浮点数打印精度 31 | get_ipython().run_line_magic('precision', '3') 32 | # 在Jupyter Notebook里显示图形 33 | get_ipython().run_line_magic('matplotlib', 'inline') 34 | 35 | 36 | # In[2]: 37 | 38 | 39 | # 读入数据 40 | sales = pd.read_csv("5-3-1-lm-model.csv") 41 | print(sales.head(3)) 42 | 43 | 44 | # ### 2. 实现:数据可视化 45 | 46 | # In[3]: 47 | 48 | 49 | # 数据可视化 50 | sns.pairplot(data = sales, hue = "weather", 51 | palette="gray") 52 | 53 | 54 | # ### 3. 错误的分析:建立只有 1 个变量的模型 55 | 56 | # In[4]: 57 | 58 | 59 | # 只使用价格这 1 种解释变量进行建模 60 | lm_dame = smf.ols("sales ~ price", sales).fit() 61 | lm_dame.params 62 | 63 | 64 | # In[5]: 65 | 66 | 67 | # 价格的系数与 0 存在显著性差异 68 | print(sm.stats.anova_lm(lm_dame, typ=2)) 69 | 70 | 71 | # In[6]: 72 | 73 | 74 | # 价格与销售额的关系 75 | sns.lmplot(x = "price", y = "sales", data = sales, 76 | scatter_kws = {"color": "black"}, 77 | line_kws = {"color": "black"}) 78 | 79 | 80 | # ### 4. 分析解释变量之间的关系 81 | 82 | # In[7]: 83 | 84 | 85 | # 计算各天气下的均值 86 | print(sales.groupby("weather").mean()) 87 | 88 | 89 | # In[8]: 90 | 91 | 92 | # 不同天气中销售额—价格回归直线 93 | sns.lmplot(x = "price", y = "sales", data = sales, 94 | hue="weather", palette='gray') 95 | 96 | 97 | # ### 5. 实现:多解释变量的模型 98 | 99 | # In[9]: 100 | 101 | 102 | # 估计多解释变量的模型 103 | lm_sales = smf.ols( 104 | "sales ~ weather + humidity + temperature + price", 105 | data=sales).fit() 106 | # 估计的结果 107 | lm_sales.params 108 | 109 | 110 | # ### 6. 错误的分析:使用普通方差分析 111 | 112 | # In[10]: 113 | 114 | 115 | # 普通方差分析 116 | print(sm.stats.anova_lm(lm_sales, typ=1).round(3)) 117 | 118 | 119 | # In[11]: 120 | 121 | 122 | # 改变解释变量的顺序 123 | lm_sales_2 = smf.ols( 124 | "sales ~ weather + temperature + humidity + price", 125 | data=sales).fit() 126 | # 检验结果 127 | print(sm.stats.anova_lm(lm_sales_2, typ=1).round(3)) 128 | 129 | 130 | # ### 7. 实现:回归系数的 t 检验 131 | 132 | # In[12]: 133 | 134 | 135 | # 模型 1 的回归系数的 t 检验 136 | lm_sales.summary().tables[1] 137 | 138 | 139 | # In[13]: 140 | 141 | 142 | # 模型 2 的回归系数的 t 检验 143 | lm_sales_2.summary().tables[1] 144 | 145 | 146 | # ### 9. 模型选择与方差分析 147 | 148 | # In[14]: 149 | 150 | 151 | # 空模型的残差平方和 152 | mod_null = smf.ols("sales ~ 1", sales).fit() 153 | resid_sq_null = sp.sum(mod_null.resid ** 2) 154 | resid_sq_null 155 | 156 | 157 | # In[15]: 158 | 159 | 160 | # 天气模型的残差平方和 161 | mod_1 = smf.ols("sales ~ weather", sales).fit() 162 | resid_sq_1 = sp.sum(mod_1.resid ** 2) 163 | resid_sq_1 164 | 165 | 166 | # In[16]: 167 | 168 | 169 | # 残差平方和的差 170 | resid_sq_null - resid_sq_1 171 | 172 | 173 | # In[17]: 174 | 175 | 176 | print(sm.stats.anova_lm(mod_1).round(3)) 177 | 178 | 179 | # In[18]: 180 | 181 | 182 | # "天气 + 湿度" 模型的残差平方和 183 | mod_2 = smf.ols( 184 | "sales ~ weather + humidity", sales).fit() 185 | resid_sq_2 = sp.sum(mod_2.resid ** 2) 186 | resid_sq_2 187 | 188 | 189 | # In[19]: 190 | 191 | 192 | # 残差平方和的差 193 | resid_sq_1 - resid_sq_2 194 | 195 | 196 | # In[20]: 197 | 198 | 199 | print(sm.stats.anova_lm(mod_2).round(3)) 200 | 201 | 202 | # In[21]: 203 | 204 | 205 | # "天气 + 气温" 模型的残差平方和 206 | mod_2_2 = smf.ols( 207 | "sales ~ weather + temperature", sales).fit() 208 | resid_sq_2_2 = sp.sum(mod_2_2.resid ** 2) 209 | resid_sq_2_2 210 | 211 | 212 | # In[22]: 213 | 214 | 215 | # "天气 + 气温 + 湿度" 模型的残差平方和 216 | mod_3_2 = smf.ols( 217 | "sales ~ weather + temperature + humidity", 218 | sales).fit() 219 | resid_sq_3_2 = sp.sum(mod_3_2.resid ** 2) 220 | resid_sq_3_2 221 | 222 | 223 | # In[23]: 224 | 225 | 226 | resid_sq_2_2 - resid_sq_3_2 227 | 228 | 229 | # In[24]: 230 | 231 | 232 | print(sm.stats.anova_lm(mod_3_2).round(3)) 233 | 234 | 235 | # ### 11. 实现:Type II ANOVA 236 | 237 | # In[25]: 238 | 239 | 240 | # 包含所有解释变量的模型的残差平方和 241 | mod_full = smf.ols( 242 | "sales ~ weather + humidity + temperature + price", 243 | sales).fit() 244 | resid_sq_full = sp.sum(mod_full.resid ** 2) 245 | resid_sq_full 246 | 247 | 248 | # In[26]: 249 | 250 | 251 | # 不含湿度的模型的残差平方和 252 | mod_non_humi = smf.ols( 253 | "sales ~ weather + temperature + price", 254 | sales).fit() 255 | resid_sq_non_humi = sp.sum(mod_non_humi.resid ** 2) 256 | resid_sq_non_humi 257 | 258 | 259 | # In[27]: 260 | 261 | 262 | # 调整平方和 263 | resid_sq_non_humi - resid_sq_full 264 | 265 | 266 | # In[28]: 267 | 268 | 269 | # Type II ANOVA 270 | print(sm.stats.anova_lm(mod_full, typ=2).round(3)) 271 | 272 | 273 | # In[29]: 274 | 275 | 276 | # 对比这两个模型 277 | mod_full.compare_f_test(mod_non_humi) 278 | 279 | 280 | # ### 13. 实现:变量选择与模型选择 281 | 282 | # In[30]: 283 | 284 | 285 | print(sm.stats.anova_lm(mod_non_humi, typ=2).round(3)) 286 | 287 | 288 | # In[31]: 289 | 290 | 291 | mod_non_humi.params 292 | 293 | 294 | # ### 14. 实现:用 AIC 进行变量选择 295 | 296 | # In[32]: 297 | 298 | 299 | print("包含所有变量的模型:", mod_full.aic.round(3)) 300 | print("不含湿度的模型  :", mod_non_humi.aic.round(3)) 301 | 302 | 303 | # In[ ]: 304 | 305 | 306 | 307 | 308 | -------------------------------------------------------------------------------- /_build/jupyter_execute/5-3-含有多个解释变量的模型_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-3-含有多个解释变量的模型_12_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-3-含有多个解释变量的模型_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-3-含有多个解释变量的模型_5_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/5-3-含有多个解释变量的模型_9_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-3-含有多个解释变量的模型_9_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/6-1-各种概率分布.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 6 章 广义线性模型 5 | # 6 | # ## 第 1 节 各种概率分布 7 | # 8 | # 9 | 10 | # ### 8. 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 设置浮点数打印精度 27 | get_ipython().run_line_magic('precision', '3') 28 | # 在 Jupyter Notebook 里显示图形 29 | get_ipython().run_line_magic('matplotlib', 'inline') 30 | 31 | 32 | # ### 9. 实现:二项分布 33 | 34 | # In[2]: 35 | 36 | 37 | # 二项分布的概率质量函数 38 | sp.stats.binom.pmf(k = 1, n = 2, p = 0.5) 39 | 40 | 41 | # In[3]: 42 | 43 | 44 | # 从 N = 10, p = 0.5 的二项分布中生成随机数 45 | np.random.seed(1) 46 | sp.stats.binom.rvs(n = 10, p = 0.2, size = 5) 47 | 48 | 49 | # In[4]: 50 | 51 | 52 | # N = 10, p = 0.2 的二项分布 53 | binomial = sp.stats.binom(n = 10, p = 0.2) 54 | 55 | # 生成随机数 56 | np.random.seed(1) 57 | rvs_binomial = binomial.rvs(size = 10000) 58 | 59 | # 概率质量函数 60 | m = np.arange(0,10,1) 61 | pmf_binomial = binomial.pmf(k = m) 62 | 63 | # 绘制出样本直方图与概率质量函数 64 | sns.distplot(rvs_binomial, bins = m, kde = False, 65 | norm_hist = True, color = 'gray') 66 | plt.plot(m, pmf_binomial, color = 'black') 67 | 68 | 69 | # ### 14. 实现:泊松分布 70 | 71 | # In[5]: 72 | 73 | 74 | # 泊松分布的概率质量函数 75 | sp.stats.poisson.pmf(k = 2, mu = 5) 76 | 77 | 78 | # In[6]: 79 | 80 | 81 | # 从 λ = 2 的泊松分布中生成随机数 82 | np.random.seed(1) 83 | sp.stats.poisson.rvs(mu = 2, size = 5) 84 | 85 | 86 | # In[7]: 87 | 88 | 89 | # λ = 2 的泊松分布 90 | poisson = sp.stats.poisson(mu = 2) 91 | 92 | # 生成随机数 93 | np.random.seed(1) 94 | rvs_poisson = poisson.rvs(size = 10000) 95 | 96 | # 概率质量函数 97 | pmf_poisson = poisson.pmf(k = m) 98 | 99 | # 绘制样本直方图与概率质量函数 100 | sns.distplot(rvs_poisson, bins = m, kde = False, 101 | norm_hist = True, color = 'gray') 102 | plt.plot(m, pmf_poisson, color = 'black') 103 | 104 | 105 | # In[8]: 106 | 107 | 108 | # N 非常大但 p 非常小的二项分布 109 | N = 100000000 110 | p = 0.00000002 111 | binomial_2 = sp.stats.binom(n = N, p = p) 112 | 113 | # 概率质量函数 114 | pmf_binomial_2 = binomial_2.pmf(k = m) 115 | 116 | # 绘制概率质量函数 117 | plt.plot(m, pmf_poisson, color = 'gray') 118 | plt.plot(m, pmf_binomial_2, color = 'black', 119 | linestyle = 'dotted') 120 | 121 | 122 | # In[ ]: 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /_build/jupyter_execute/6-1-各种概率分布_10_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-1-各种概率分布_10_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/6-1-各种概率分布_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-1-各种概率分布_11_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/6-1-各种概率分布_6_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-1-各种概率分布_6_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/6-3-logistic回归.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 3 节 logistic 回归 5 | # ## 第 6 章 广义线性模型|用 Python 动手学统计学 6 | # 7 | # 8 | 9 | # ### 10. 环境准备 10 | 11 | # In[1]: 12 | 13 | 14 | # 用于数值计算的库 15 | import numpy as np 16 | import pandas as pd 17 | import scipy as sp 18 | from scipy import stats 19 | 20 | # 用于绘图的库 21 | from matplotlib import pyplot as plt 22 | import seaborn as sns 23 | sns.set() 24 | 25 | # 用于估计统计模型的库 (部分版本会报出警告信息) 26 | import statsmodels.formula.api as smf 27 | import statsmodels.api as sm 28 | 29 | # 设置浮点数打印精度 30 | get_ipython().run_line_magic('precision', '3') 31 | # 在 Jupyter Notebook 里显示图形 32 | get_ipython().run_line_magic('matplotlib', 'inline') 33 | 34 | 35 | # ### 11. 实现:读取数据并可视化 36 | 37 | # In[2]: 38 | 39 | 40 | # 读取数据 41 | test_result = pd.read_csv("6-3-1-logistic-regression.csv") 42 | print(test_result.head(3)) 43 | 44 | 45 | # In[3]: 46 | 47 | 48 | # 可视化 49 | sns.barplot(x = "hours",y = "result", 50 | data = test_result, palette='gray_r') 51 | 52 | 53 | # In[4]: 54 | 55 | 56 | # 学习时间与合格率的关系 57 | print(test_result.groupby("hours").mean()) 58 | 59 | 60 | # ### 12. 实现:logistic 回归 61 | 62 | # In[5]: 63 | 64 | 65 | # 建模 66 | mod_glm = smf.glm(formula = "result ~ hours", 67 | data = test_result, 68 | family=sm.families.Binomial()).fit() 69 | 70 | 71 | # In[6]: 72 | 73 | 74 | # 参考: 指定联系函数 75 | logistic_reg = smf.glm(formula = "result ~ hours", 76 | data = test_result, 77 | family=sm.families.Binomial(link=sm.families.links.logit)).fit() 78 | 79 | 80 | # ### 13. 实现:logistic 回归的结果 81 | 82 | # In[7]: 83 | 84 | 85 | # 打印估计的结果 86 | mod_glm.summary() 87 | 88 | 89 | # ### 14. 实现:模型选择 90 | 91 | # In[8]: 92 | 93 | 94 | # 空模型 95 | mod_glm_null = smf.glm( 96 | "result ~ 1", data = test_result, 97 | family=sm.families.Binomial()).fit() 98 | 99 | 100 | # In[9]: 101 | 102 | 103 | # 对比 AIC 104 | print("空模型   :", mod_glm_null.aic.round(3)) 105 | print("学习时间模型:", mod_glm.aic.round(3)) 106 | 107 | 108 | # ### 15. 实现:回归曲线 109 | 110 | # In[10]: 111 | 112 | 113 | # 用 lmplot 绘制 logistic 回归曲线 114 | sns.lmplot(x = "hours", y = "result", 115 | data = test_result, 116 | logistic = True, 117 | scatter_kws = {"color": "black"}, 118 | line_kws = {"color": "black"}, 119 | x_jitter = 0.1, y_jitter = 0.02) 120 | 121 | 122 | # ### 16. 实现:预测成功概率 123 | 124 | # In[11]: 125 | 126 | 127 | # 0~9 上公差为 1 的等差数列 128 | exp_val = pd.DataFrame({ 129 | "hours": np.arange(0, 10, 1) 130 | }) 131 | # 成功概率的预测值 132 | pred = mod_glm.predict(exp_val) 133 | pred 134 | 135 | 136 | # ### 19. logistic 回归的系数与优势比的关系 137 | 138 | # In[12]: 139 | 140 | 141 | # 学习时间为 1 小时的合格率 142 | exp_val_1 = pd.DataFrame({"hours": [1]}) 143 | pred_1 = mod_glm.predict(exp_val_1) 144 | 145 | # 学习时间为 2 小时的合格率 146 | exp_val_2 = pd.DataFrame({"hours": [2]}) 147 | pred_2 = mod_glm.predict(exp_val_2) 148 | 149 | 150 | # In[13]: 151 | 152 | 153 | # 优势 154 | odds_1 = pred_1 / (1 - pred_1) 155 | odds_2 = pred_2 / (1 - pred_2) 156 | 157 | # 对数优势比 158 | sp.log(odds_2 / odds_1) 159 | 160 | 161 | # In[14]: 162 | 163 | 164 | # 系数 165 | mod_glm.params["hours"] 166 | 167 | 168 | # In[15]: 169 | 170 | 171 | # 补充: 系数为 e 的指数时,其结果就是优势比 172 | sp.exp(mod_glm.params["hours"]) 173 | 174 | 175 | # In[ ]: 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /_build/jupyter_execute/6-3-logistic回归_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-3-logistic回归_16_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/6-3-logistic回归_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-3-logistic回归_5_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/6-4-广义线性模型的评估.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 4 节 广义线性模型的评估 5 | # 6 | # ## 第 6 章 广义线性模型|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 1. 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 用于估计统计模型的库 (部分版本会报出警告信息) 27 | import statsmodels.formula.api as smf 28 | import statsmodels.api as sm 29 | 30 | # 设置浮点数打印精度 31 | get_ipython().run_line_magic('precision', '3') 32 | # 在 Jupyter Notebook 里显示图形 33 | get_ipython().run_line_magic('matplotlib', 'inline') 34 | 35 | 36 | # In[2]: 37 | 38 | 39 | # 读取数据 40 | test_result = pd.read_csv("6-3-1-logistic-regression.csv") 41 | 42 | # 模型化 43 | mod_glm = smf.glm("result ~ hours", data = test_result, 44 | family=sm.families.Binomial()).fit() 45 | 46 | 47 | # ### 4. 皮尔逊残差 48 | 49 | # In[3]: 50 | 51 | 52 | # 计算皮尔逊残差 53 | 54 | # 预测的成功概率 55 | pred = mod_glm.predict() 56 | # 响应变量 (合格情况) 57 | y = test_result.result 58 | 59 | # 皮尔逊残差 60 | peason_resid = (y - pred) / sp.sqrt(pred * (1 - pred)) 61 | peason_resid.head(3) 62 | 63 | 64 | # In[4]: 65 | 66 | 67 | # 获取皮尔逊残差 68 | mod_glm.resid_pearson.head(3) 69 | 70 | 71 | # In[5]: 72 | 73 | 74 | # 皮尔逊残差的平方和 75 | sp.sum(mod_glm.resid_pearson**2) 76 | 77 | 78 | # In[6]: 79 | 80 | 81 | # 同样出现在 summary 函数的结果中 82 | mod_glm.pearson_chi2 83 | 84 | 85 | # ### 9. 偏差残差 86 | 87 | # In[7]: 88 | 89 | 90 | # 计算偏差残差 91 | 92 | # 预测的成功概率 93 | pred = mod_glm.predict() 94 | # 响应变量 (合格情况) 95 | y = test_result.result 96 | 97 | # 与完美预测了合格情况时的对数似然度的差值 98 | resid_tmp = 0 - sp.log( 99 | sp.stats.binom.pmf(k = y, n = 1, p = pred)) 100 | # 偏差残差 101 | deviance_resid = sp.sqrt( 102 | 2 * resid_tmp 103 | ) * np.sign(y - pred) 104 | # 打印结果 105 | deviance_resid.head(3) 106 | 107 | 108 | # In[8]: 109 | 110 | 111 | mod_glm.resid_deviance.head(3) 112 | 113 | 114 | # In[9]: 115 | 116 | 117 | # deviance 118 | sp.sum(mod_glm.resid_deviance ** 2) 119 | 120 | -------------------------------------------------------------------------------- /_build/jupyter_execute/6-5-泊松回归.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 5 节 泊松回归 5 | # 6 | # ## 第 6 章 广义线性模型|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 4. 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | from scipy import stats 20 | 21 | # 用于绘图的库 22 | from matplotlib import pyplot as plt 23 | import seaborn as sns 24 | sns.set() 25 | 26 | # 用于估计统计模型的库 (部分版本会报出警告信息) 27 | import statsmodels.formula.api as smf 28 | import statsmodels.api as sm 29 | 30 | # 设置浮点数打印精度 31 | get_ipython().run_line_magic('precision', '3') 32 | # 在 Jupyter Notebook 里显示图形 33 | get_ipython().run_line_magic('matplotlib', 'inline') 34 | 35 | 36 | # In[2]: 37 | 38 | 39 | # 读取数据 40 | beer = pd.read_csv("6-5-1-poisson-regression.csv") 41 | print(beer.head(3)) 42 | 43 | 44 | # ### 5. 实现:泊松回归 45 | 46 | # In[3]: 47 | 48 | 49 | # 建模 50 | mod_pois = smf.glm("beer_number ~ temperature", beer, 51 | family=sm.families.Poisson()).fit() 52 | mod_pois.summary() 53 | 54 | 55 | # ### 6. 实现:模型选择 56 | 57 | # In[4]: 58 | 59 | 60 | # 空模型 61 | mod_pois_null = smf.glm( 62 | "beer_number ~ 1", data = beer, 63 | family=sm.families.Poisson()).fit() 64 | 65 | 66 | # In[5]: 67 | 68 | 69 | # 对比 AIC 70 | print("空模型 :", mod_pois_null.aic.round(3)) 71 | print("气温模型:", mod_pois.aic.round(3)) 72 | 73 | 74 | # ### 7. 实现:回归曲线 75 | 76 | # In[6]: 77 | 78 | 79 | # 绘制回归曲线 80 | 81 | # 计算预测值 82 | x_plot = np.arange(0, 37) 83 | pred = mod_pois.predict( 84 | pd.DataFrame({"temperature": x_plot})) 85 | 86 | # 不含默认回归直线的 lmplot 87 | sns.lmplot(y="beer_number", x = "temperature", 88 | data = beer, fit_reg = False, 89 | scatter_kws = {"color":"black"}) 90 | # 绘出回归曲线 91 | plt.plot(x_plot, pred, color="black") 92 | 93 | 94 | # ### 8. 回归系数的含义 95 | 96 | # In[7]: 97 | 98 | 99 | # 气温为 1 度时销售数量的期望 100 | exp_val_1 = pd.DataFrame({"temperature": [1]}) 101 | pred_1 = mod_pois.predict(exp_val_1) 102 | 103 | # 气温为 2 度时销售数量的期望 104 | exp_val_2 = pd.DataFrame({"temperature": [2]}) 105 | pred_2 = mod_pois.predict(exp_val_2) 106 | 107 | # 气温每升高 1 度, 销量变为多少倍 108 | pred_2 / pred_1 109 | 110 | 111 | # In[8]: 112 | 113 | 114 | # e 的指数为回归系数 115 | sp.exp(mod_pois.params["temperature"]) 116 | 117 | -------------------------------------------------------------------------------- /_build/jupyter_execute/6-5-泊松回归_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-5-泊松回归_10_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 7 章 统计学与机器学习 5 | # 6 | # ## 第 3 节 Python 中的 Ridge 回归与 Lasso 回归 7 | # 8 | # 9 | # 10 | 11 | # ### 2. 环境准备 12 | 13 | # In[1]: 14 | 15 | 16 | # 用于数值计算的库 17 | import numpy as np 18 | import pandas as pd 19 | import scipy as sp 20 | from scipy import stats 21 | 22 | # 用于绘图的库 23 | from matplotlib import pyplot as plt 24 | import seaborn as sns 25 | sns.set() 26 | 27 | # 用于估计统计模型的库 (部分版本会报出警告信息) 28 | import statsmodels.formula.api as smf 29 | import statsmodels.api as sm 30 | 31 | # 用于机器学习的库 32 | from sklearn import linear_model 33 | 34 | # 设置浮点数打印精度 35 | get_ipython().run_line_magic('precision', '3') 36 | # 在 Jupyter Notebook 里显示图形 37 | get_ipython().run_line_magic('matplotlib', 'inline') 38 | 39 | 40 | # In[2]: 41 | 42 | 43 | # 读入示例数据 44 | X = pd.read_csv("7-3-1-large-data.csv") 45 | X.head(3) 46 | 47 | 48 | # ### 3. 实现:标准化 49 | 50 | # In[3]: 51 | 52 | 53 | # X_1 的均值 54 | sp.mean(X.X_1) 55 | 56 | 57 | # In[4]: 58 | 59 | 60 | # 所有解释变量的均值 61 | sp.mean(X, axis = 0).head(3) 62 | 63 | 64 | # In[5]: 65 | 66 | 67 | # 标准化 68 | X -= sp.mean(X, axis = 0) 69 | X /= sp.std(X, ddof = 1, axis = 0) 70 | 71 | 72 | # In[6]: 73 | 74 | 75 | # 检验 76 | sp.mean(X, axis = 0).head(3).round(3) 77 | 78 | 79 | # In[7]: 80 | 81 | 82 | # 检验 83 | sp.std(X, ddof = 1, axis = 0).head(3) 84 | 85 | 86 | # ### 4. 定义响应变量 87 | 88 | # In[8]: 89 | 90 | 91 | # 定义响应变量 92 | 93 | # 服从正态分布的噪声 94 | np.random.seed(1) 95 | noise = sp.stats.norm.rvs(loc = 0, scale = 1, size = X.shape[0]) 96 | 97 | # 设正确的系数为 5, 定义响应变量 98 | y = X.X_1 * 5 + noise 99 | 100 | 101 | # In[9]: 102 | 103 | 104 | # 把响应变量和解释变量放在一起 105 | large_data = pd.concat([pd.DataFrame({"y":y}), X], axis = 1) 106 | # 绘制散点图 107 | sns.jointplot(y = "y", x = "X_1", data = large_data, 108 | color = 'black') 109 | 110 | 111 | # ### 5. 实现:普通最小二乘法 112 | 113 | # In[10]: 114 | 115 | 116 | lm_statsmodels = sm.OLS(endog = y, exog = X).fit() 117 | lm_statsmodels.params.head(3) 118 | 119 | 120 | # ### 6. 实现:使用 sklearn 实现线性回归 121 | 122 | # In[11]: 123 | 124 | 125 | # 指定模型的结构 126 | lm_sklearn = linear_model.LinearRegression() 127 | # 指定数据来源并估计模型 128 | lm_sklearn.fit(X, y) 129 | # 所估计的参数 (数组型) 130 | lm_sklearn.coef_ 131 | 132 | 133 | # ### 7. 实现:Ridge 回归:惩罚项的影响 134 | 135 | # In[12]: 136 | 137 | 138 | # 生成 50 个 α 139 | n_alphas = 50 140 | ridge_alphas = np.logspace(-2, 0.7, n_alphas) 141 | 142 | 143 | # In[13]: 144 | 145 | 146 | # 参考 147 | sp.log10(ridge_alphas) 148 | 149 | 150 | # In[14]: 151 | 152 | 153 | # 对不同的 α 值进行 Ridge 回归 154 | 155 | # 存放已估计的回归系数的列表 156 | ridge_coefs = [] 157 | # 使用 for 循环多次估计 Ridge 回归 158 | for a in ridge_alphas: 159 | ridge = linear_model.Ridge(alpha = a, fit_intercept = False) 160 | ridge.fit(X, y) 161 | ridge_coefs.append(ridge.coef_) 162 | 163 | 164 | # In[15]: 165 | 166 | 167 | # 转换为数组 168 | ridge_coefs = np.array(ridge_coefs) 169 | ridge_coefs.shape 170 | 171 | 172 | # In[16]: 173 | 174 | 175 | # 参考 176 | log_alphas = -sp.log10(ridge_alphas) 177 | plt.plot(log_alphas, ridge_coefs[::,0], color = 'black') 178 | plt.plot(log_alphas, ridge_coefs[::,1], color = 'black') 179 | 180 | plt.xlim([min(log_alphas)-0.1, max(log_alphas) + 0.3]) 181 | plt.ylim([-8, 10.5]) 182 | 183 | 184 | # In[17]: 185 | 186 | 187 | # 横轴为 -log10(α), 纵轴为系数的折线图 188 | # 无需重复 100 次即可自动得到 100 条线 189 | 190 | # 对 α 取对数 191 | log_alphas = -sp.log10(ridge_alphas) 192 | # 绘制曲线, 横轴为 -log10(α), 纵轴为系数 193 | plt.plot(log_alphas, ridge_coefs, color = 'black') 194 | # 标出解释变量 X_1 的系数 195 | plt.text(max(log_alphas) + 0.1, np.array(ridge_coefs)[0,0], "X_1") 196 | # X 轴的范围 197 | plt.xlim([min(log_alphas) - 0.1, max(log_alphas) + 0.3]) 198 | # 轴标签 199 | plt.title("Ridge") 200 | plt.xlabel("- log10(alpha)") 201 | plt.ylabel("Coefficients") 202 | 203 | 204 | # ### 8. 实现:Ridge 回归:确定最佳正则化强度 205 | 206 | # In[18]: 207 | 208 | 209 | # 通过交叉验证法求最佳 α 210 | ridge_best = linear_model.RidgeCV( 211 | cv = 10, alphas = ridge_alphas, fit_intercept = False) 212 | ridge_best.fit(X, y) 213 | 214 | # 最佳的 -log10(α) 215 | -sp.log10(ridge_best.alpha_) 216 | 217 | 218 | # In[19]: 219 | 220 | 221 | # 最佳 α 222 | ridge_best.alpha_ 223 | 224 | 225 | # In[20]: 226 | 227 | 228 | # 取最佳 α 时的回归系数 229 | ridge_best.coef_ 230 | 231 | 232 | # ### 9. 实现:Lasso 回归:惩罚指标的影响 233 | 234 | # In[21]: 235 | 236 | 237 | # 对不同的 α 值进行 Lasso 回归 238 | lasso_alphas, lasso_coefs, _ = linear_model.lasso_path( 239 | X, y, fit_intercept = False) 240 | 241 | 242 | # In[23]: 243 | 244 | 245 | # Lasso 回归的解路径图 246 | 247 | # 对 α 取对数 248 | log_alphas = -sp.log10(lasso_alphas) 249 | # 绘制曲线, 横轴为 -log10(α), 纵轴为系数 250 | plt.plot(log_alphas, lasso_coefs.T, color = 'black') 251 | # 标出解释变量 X_1 的系数 252 | plt.text(max(log_alphas) + 0.1, lasso_coefs[0, -1], "X_1") 253 | # X 轴的范围 254 | plt.xlim([min(log_alphas)-0.1, max(log_alphas) + 0.3]) 255 | # 轴标签 256 | plt.title("Lasso") 257 | plt.xlabel("- log10(alpha)") 258 | plt.ylabel("Coefficients") 259 | 260 | 261 | # ### 10. 实现:Lasso 回归:确定最佳正则化强度 262 | 263 | # In[39]: 264 | 265 | 266 | # 通过交叉验证法求最佳的 α 267 | lasso_best = linear_model.LassoCV( 268 | cv = 10, alphas = lasso_alphas, fit_intercept = False) 269 | lasso_best.fit(X, y) 270 | 271 | # 最佳的 -log(α) 272 | -sp.log10(lasso_best.alpha_) 273 | 274 | 275 | # In[40]: 276 | 277 | 278 | # 最佳的 α 279 | lasso_best.alpha_ 280 | 281 | 282 | # In[41]: 283 | 284 | 285 | # 取最佳的 α 时的回归系数 286 | lasso_best.coef_ 287 | 288 | 289 | # In[ ]: 290 | 291 | 292 | 293 | 294 | -------------------------------------------------------------------------------- /_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_12_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_22_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_22_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_23_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_23_2.png -------------------------------------------------------------------------------- /_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_30_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_30_1.png -------------------------------------------------------------------------------- /_build/jupyter_execute/7-4-线性模型与神经网络.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # # 第 4 节 线性模型与神经网络 5 | # 6 | # ## 第 7 章 统计学与机器学习|用 Python 动手学统计学 7 | # 8 | # 9 | 10 | # ### 环境准备 11 | 12 | # In[1]: 13 | 14 | 15 | # 用于数值计算的库 16 | import numpy as np 17 | import pandas as pd 18 | import scipy as sp 19 | 20 | # 用于估计统计模型的库 (部分版本会有警告信息) 21 | import statsmodels.formula.api as smf 22 | import statsmodels.api as sm 23 | 24 | # 用于多层感知器的库 25 | from sklearn.neural_network import MLPClassifier 26 | 27 | # 导入示例数据 28 | from sklearn.datasets import load_iris 29 | 30 | # 区分训练集与测试集 31 | from sklearn.model_selection import train_test_split 32 | 33 | # 标准化数据 34 | from sklearn.preprocessing import StandardScaler 35 | 36 | # 设置浮点数打印精度 37 | get_ipython().run_line_magic('precision', '3') 38 | 39 | 40 | # ### 读入数据并整形 41 | 42 | # In[2]: 43 | 44 | 45 | # 导入示例数据 46 | iris = load_iris() 47 | 48 | 49 | # In[3]: 50 | 51 | 52 | # 解释变量的名称 53 | iris.feature_names 54 | 55 | 56 | # In[4]: 57 | 58 | 59 | # 响应变量的名称 60 | iris.target_names 61 | 62 | 63 | # In[5]: 64 | 65 | 66 | # 解释变量仅为萼片 (sepal) 67 | X = iris.data[50:150, 0:2] 68 | # 只取2种鸢尾花 69 | y = iris.target[50:150] 70 | 71 | print("解释变量行数与列数:", X.shape) 72 | print("响应变量行数与列数:", y.shape) 73 | 74 | 75 | # In[6]: 76 | 77 | 78 | # 把数据分为训练集与测试集 79 | X_train, X_test, y_train, y_test = train_test_split( 80 | X, y, random_state = 2) 81 | 82 | print("解释变量行数与列数:", X_train.shape) 83 | print("响应变量行数与列数:", y_train.shape) 84 | 85 | 86 | # ### 实现:logistic 回归 87 | 88 | # In[7]: 89 | 90 | 91 | # 打印响应变量 92 | y_train[0:10] 93 | 94 | 95 | # In[8]: 96 | 97 | 98 | # 数据整形 99 | # 解释变量的数据帧 100 | X_train_df = pd.DataFrame( 101 | X_train, columns = ["sepal_len", "sepal_wid"]) 102 | # 响应变量的数据帧 103 | y_train_df = pd.DataFrame({"species": y_train - 1}) 104 | # 连接数据帧 105 | iris_train_df = pd.concat( 106 | [y_train_df, X_train_df], axis=1) 107 | # 打印结果 108 | print(iris_train_df.head(3)) 109 | 110 | 111 | # In[9]: 112 | 113 | 114 | # 模型化 115 | # 长度与宽度模型 116 | logi_mod_full = smf.glm( 117 | "species ~ sepal_len + sepal_wid", data = iris_train_df, 118 | family=sm.families.Binomial()).fit() 119 | 120 | # 长度模型 121 | logi_mod_len = smf.glm( 122 | "species ~ sepal_len", data = iris_train_df, 123 | family=sm.families.Binomial()).fit() 124 | 125 | # 宽度模型 126 | logi_mod_wid = smf.glm( 127 | "species ~ sepal_wid", data = iris_train_df, 128 | family=sm.families.Binomial()).fit() 129 | 130 | # 空模型 131 | logi_mod_null = smf.glm( 132 | "species ~ 1", data = iris_train_df, 133 | family=sm.families.Binomial()).fit() 134 | 135 | # 对比 AIC 136 | print("full", logi_mod_full.aic.round(3)) 137 | print("len ", logi_mod_len.aic.round(3)) 138 | print("wid ", logi_mod_wid.aic.round(3)) 139 | print("null", logi_mod_null.aic.round(3)) 140 | 141 | 142 | # In[10]: 143 | 144 | 145 | # 查看估计的系数等指标 146 | logi_mod_len.summary().tables[1] 147 | 148 | 149 | # In[11]: 150 | 151 | 152 | # 预测精度 153 | # 数据整形 154 | X_test_df = pd.DataFrame( 155 | X_test, columns = ["sepal_len", "sepal_wid"]) 156 | 157 | # 拟合与预测 158 | logi_fit = logi_mod_len.fittedvalues.round(0) 159 | logi_pred = logi_mod_len.predict(X_test_df).round(0) 160 | 161 | # 正确数 162 | true_train = sp.sum(logi_fit == (y_train - 1)) 163 | true_test = sp.sum(logi_pred == (y_test - 1)) 164 | 165 | # 命中率 166 | result_train = true_train / len(y_train) 167 | result_test = true_test / len(y_test) 168 | 169 | # 打印结果 170 | print("训练集的命中率:", result_train) 171 | print("测试集的命中率:", result_test) 172 | 173 | 174 | # ### 实现:标准化 175 | 176 | # In[12]: 177 | 178 | 179 | # 准备标准化 180 | scaler = StandardScaler() 181 | scaler.fit(X_train) 182 | # 标准化 183 | X_train_scaled = scaler.transform(X_train) 184 | X_test_scaled = scaler.transform(X_test) 185 | 186 | 187 | # In[13]: 188 | 189 | 190 | sp.std(X_train_scaled, axis=0) 191 | 192 | 193 | # In[14]: 194 | 195 | 196 | sp.std(X_test_scaled, axis=0) 197 | 198 | 199 | # ### 实现:神经网络 200 | 201 | # In[15]: 202 | 203 | 204 | nnet = MLPClassifier( 205 | hidden_layer_sizes = (100,100), 206 | alpha = 0.07, 207 | max_iter = 10000, 208 | random_state = 0) 209 | nnet.fit(X_train_scaled, y_train) 210 | 211 | # 正确数 212 | print("训练集的命中率:", nnet.score(X_train_scaled, y_train)) 213 | print("测试集的命中率:", nnet.score(X_test_scaled, y_test)) 214 | 215 | 216 | # In[ ]: 217 | 218 | 219 | 220 | 221 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # A default configuration that will be loaded for all jupyter books 3 | # Users are expected to override these values in their own `_config.yml` file. 4 | # This is also the "master list" of all allowed keys and values. 5 | 6 | ####################################################################################### 7 | # Book settings 8 | title : Stats Book # The title of the book. Will be placed in the left navbar. 9 | author : The Jupyter Book community # The author of the book 10 | copyright : "2021" # Copyright year to be placed in the footer 11 | logo : "" # A path to the book logo 12 | # Patterns to skip when building the book. Can be glob-style (e.g. "*skip.ipynb") 13 | exclude_patterns : [_build, Thumbs.db, .DS_Store, "**.ipynb_checkpoints"] 14 | # Auto-exclude files not in the toc 15 | only_build_toc_files : false 16 | 17 | ####################################################################################### 18 | # Execution settings 19 | execute: 20 | execute_notebooks : auto # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off") 21 | cache : "" # A path to the jupyter cache that will be used to store execution artifacts. Defaults to `_build/.jupyter_cache/` 22 | exclude_patterns : [] # A list of patterns to *skip* in execution (e.g. a notebook that takes a really long time) 23 | timeout : 30 # The maximum time (in seconds) each notebook cell is allowed to run. 24 | run_in_temp : false # If `True`, then a temporary directory will be created and used as the command working directory (cwd), 25 | # otherwise the notebook's parent directory will be the cwd. 26 | allow_errors : false # If `False`, when a code cell raises an error the execution is stopped, otherwise all cells are always run. 27 | stderr_output : show # One of 'show', 'remove', 'remove-warn', 'warn', 'error', 'severe' 28 | 29 | ####################################################################################### 30 | # Parse and render settings 31 | parse: 32 | myst_enable_extensions: # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html 33 | # - amsmath 34 | - colon_fence 35 | # - deflist 36 | - dollarmath 37 | # - html_admonition 38 | # - html_image 39 | - linkify 40 | # - replacements 41 | # - smartquotes 42 | - substitution 43 | - tasklist 44 | myst_url_schemes: [mailto, http, https] # URI schemes that will be recognised as external URLs in Markdown links 45 | myst_dmath_double_inline: true # Allow display math ($$) within an inline context 46 | 47 | ####################################################################################### 48 | # HTML-specific settings 49 | html: 50 | favicon : "" # A path to a favicon image 51 | use_edit_page_button : false # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in 52 | use_repository_button : false # Whether to add a link to your repository button 53 | use_issues_button : false # Whether to add an "open an issue" button 54 | use_multitoc_numbering : true # Continuous numbering across parts/chapters 55 | extra_navbar : Powered by Jupyter Book # Will be displayed underneath the left navbar. 56 | extra_footer : "" # Will be displayed underneath the footer. 57 | google_analytics_id : "" # A GA id that can be used to track book views. 58 | home_page_in_navbar : true # Whether to include your home page in the left Navigation Bar 59 | baseurl : "" # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/ 60 | comments: 61 | hypothesis : false 62 | utterances : false 63 | 64 | ####################################################################################### 65 | # LaTeX-specific settings 66 | latex: 67 | latex_engine : pdflatex # one of 'pdflatex', 'xelatex' (recommended for unicode), 'luatex', 'platex', 'uplatex' 68 | use_jupyterbook_latex : true # use sphinx-jupyterbook-latex for pdf builds as default 69 | 70 | ####################################################################################### 71 | # Launch button settings 72 | launch_buttons: 73 | notebook_interface : classic # The interface interactive links will activate ["classic", "jupyterlab"] 74 | binderhub_url : https://mybinder.org # The URL of the BinderHub (e.g., https://mybinder.org) 75 | jupyterhub_url : "" # The URL of the JupyterHub (e.g., https://datahub.berkeley.edu) 76 | thebe : false # Add a thebe button to pages (requires the repository to run on Binder) 77 | colab_url : "" # The URL of Google Colab (https://colab.research.google.com) 78 | 79 | repository: 80 | url : https://github.com/executablebooks/jupyter-book # The URL to your book's repository 81 | path_to_book : "" # A path to your book's folder, relative to the repository root. 82 | branch : master # Which branch of the repository should be used when creating links 83 | 84 | ####################################################################################### 85 | # Advanced and power-user settings 86 | sphinx: 87 | extra_extensions : # A list of extra extensions to load by Sphinx (added to those already used by JB). 88 | local_extensions : # A list of local extensions to load by sphinx specified by "name: path" items 89 | config : # key-value pairs to directly over-ride the Sphinx configuration 90 | -------------------------------------------------------------------------------- /_toc.yml: -------------------------------------------------------------------------------- 1 | format: jb-book 2 | root: README 3 | 4 | chapters: 5 | 6 | - file: 2-2-认识Jupyter-Notebook 7 | sections: 8 | - file: 2-3-Python编程基础 9 | - file: 2-4-认识numpy与pandas 10 | 11 | - file: 3-1-使用Python进行描述统计单变量 12 | sections: 13 | - file: 3-2-使用Python进行描述统计多变量 14 | - file: 3-3-基于matplotlib-seaborn的数据可视化 15 | - file: 3-4-用Python模拟抽样 16 | - file: 3-5-样本统计量的性质 17 | - file: 3-6-正态分布及其应用 18 | - file: 3-7-参数估计 19 | - file: 3-8-假设检验 20 | - file: 3-9-均值差的检验 21 | - file: 3-10-列联表检验 22 | 23 | - file: 5-1-一元回归 24 | sections: 25 | - file: 5-2-方差分析 26 | - file: 5-3-含有多个解释变量的模型 27 | 28 | - file: 6-1-各种概率分布 29 | sections: 30 | - file: 6-3-logistic回归 31 | - file: 6-4-广义线性模型的评估 32 | - file: 6-5-泊松回归 33 | 34 | - file: 7-3-Python中的Ridge回归与Lasso回归 35 | sections: 36 | - file: 7-4-线性模型与神经网络 37 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | # ssh-add ~/.ssh/id_rsa 2 | # git remote set-url origin git@github.com:socratesacademy/statsbook.git 3 | # git pull origin main 4 | # git add . 5 | # git commit -m 'this is a message' 6 | # git push origin main 7 | # open atom master branch 8 | jupyter-book build ../statsbook/ 9 | # Publish your book's HTML manually to GitHub pages 10 | # publish the _site folder of the main branch to the gh-pages branch 11 | ghp-import -n -p -f _build/html 12 | --------------------------------------------------------------------------------