├── .DS_Store
├── .ipynb_checkpoints
├── 2-2-认识Jupyter-Notebook-checkpoint.ipynb
├── 3-1-使用Python进行描述统计单变量-checkpoint.ipynb
├── 3-2-使用Python进行描述统计多变量-checkpoint.ipynb
├── 5-1-一元回归-checkpoint.ipynb
├── 6-1-各种概率分布-checkpoint.ipynb
└── 7-3-Python中的Ridge回归与Lasso回归-checkpoint.ipynb
├── 2-2-认识Jupyter-Notebook.ipynb
├── 2-3-Python编程基础.ipynb
├── 2-4-1-sample_data.csv
├── 2-4-认识numpy与pandas.ipynb
├── 3-1-使用Python进行描述统计单变量.ipynb
├── 3-10-1-click_data.csv
├── 3-10-列联表检验.ipynb
├── 3-2-1-fish_multi.csv
├── 3-2-2-shoes.csv
├── 3-2-3-cov.csv
├── 3-2-使用Python进行描述统计多变量.ipynb
├── 3-3-2-fish_multi_2.csv
├── 3-3-基于matplotlib-seaborn的数据可视化.ipynb
├── 3-4-1-fish_length_100000.csv
├── 3-4-用Python模拟抽样.ipynb
├── 3-5-样本统计量的性质.ipynb
├── 3-6-正态分布及其应用.ipynb
├── 3-7-1-fish_length.csv
├── 3-7-参数估计.ipynb
├── 3-8-1-junk-food-weight.csv
├── 3-8-假设检验.ipynb
├── 3-9-1-paired-t-test.csv
├── 3-9-均值差的检验.ipynb
├── 5-1-1-beer.csv
├── 5-1-一元回归.ipynb
├── 5-2-方差分析.ipynb
├── 5-3-1-lm-model.csv
├── 5-3-含有多个解释变量的模型.ipynb
├── 6-1-各种概率分布.ipynb
├── 6-3-1-logistic-regression.csv
├── 6-3-logistic回归.ipynb
├── 6-4-广义线性模型的评估.ipynb
├── 6-5-1-poisson-regression.csv
├── 6-5-泊松回归.ipynb
├── 7-3-1-large-data.csv
├── 7-3-Python中的Ridge回归与Lasso回归.ipynb
├── 7-4-线性模型与神经网络.ipynb
├── README.md
├── _build
├── .DS_Store
├── .doctrees
│ ├── 2-2-认识Jupyter-Notebook.doctree
│ ├── 2-3-Python编程基础.doctree
│ ├── 2-4-认识numpy与pandas.doctree
│ ├── 3-1-使用Python进行描述统计单变量.doctree
│ ├── 3-1-使用Python进行描述统计:单变量.doctree
│ ├── 3-10-列联表检验.doctree
│ ├── 3-2-使用Python进行描述统计多变量.doctree
│ ├── 3-2-使用Python进行描述统计:多变量.doctree
│ ├── 3-3-基于matplotlib-seaborn的数据可视化.doctree
│ ├── 3-4-用Python模拟抽样.doctree
│ ├── 3-5-样本统计量的性质.doctree
│ ├── 3-6-正态分布及其应用.doctree
│ ├── 3-7-参数估计.doctree
│ ├── 3-8-假设检验.doctree
│ ├── 3-9-均值差的检验.doctree
│ ├── 5-1-一元回归.doctree
│ ├── 5-2-方差分析.doctree
│ ├── 5-3-含有多个解释变量的模型.doctree
│ ├── 6-1-各种概率分布.doctree
│ ├── 6-3-logistic回归.doctree
│ ├── 6-4-广义线性模型的评估.doctree
│ ├── 6-5-泊松回归.doctree
│ ├── 7-3-Python中的Ridge回归与Lasso回归.doctree
│ ├── 7-4-线性模型与神经网络.doctree
│ ├── README.doctree
│ ├── environment.pickle
│ └── glue_cache.json
├── html
│ ├── .DS_Store
│ ├── .buildinfo
│ ├── 2-2-认识Jupyter-Notebook.html
│ ├── 2-3-Python编程基础.html
│ ├── 2-4-认识numpy与pandas.html
│ ├── 3-1-使用Python进行描述统计单变量.html
│ ├── 3-10-列联表检验.html
│ ├── 3-2-使用Python进行描述统计多变量.html
│ ├── 3-3-基于matplotlib-seaborn的数据可视化.html
│ ├── 3-4-用Python模拟抽样.html
│ ├── 3-5-样本统计量的性质.html
│ ├── 3-6-正态分布及其应用.html
│ ├── 3-7-参数估计.html
│ ├── 3-8-假设检验.html
│ ├── 3-9-均值差的检验.html
│ ├── 5-1-一元回归.html
│ ├── 5-2-方差分析.html
│ ├── 5-3-含有多个解释变量的模型.html
│ ├── 6-1-各种概率分布.html
│ ├── 6-3-logistic回归.html
│ ├── 6-4-广义线性模型的评估.html
│ ├── 6-5-泊松回归.html
│ ├── 7-3-Python中的Ridge回归与Lasso回归.html
│ ├── 7-4-线性模型与神经网络.html
│ ├── README.html
│ ├── _images
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_11_2.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_13_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_14_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_19_2.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_21_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_24_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_26_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_29_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_33_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_5_1.png
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化_8_1.png
│ │ ├── 3-4-用Python模拟抽样_20_2.png
│ │ ├── 3-4-用Python模拟抽样_24_1.png
│ │ ├── 3-4-用Python模拟抽样_25_2.png
│ │ ├── 3-5-样本统计量的性质_11_2.png
│ │ ├── 3-5-样本统计量的性质_16_1.png
│ │ ├── 3-5-样本统计量的性质_22_1.png
│ │ ├── 3-5-样本统计量的性质_27_1.png
│ │ ├── 3-5-样本统计量的性质_30_1.png
│ │ ├── 3-5-样本统计量的性质_41_1.png
│ │ ├── 3-5-样本统计量的性质_43_3.png
│ │ ├── 3-6-正态分布及其应用_23_2.png
│ │ ├── 3-6-正态分布及其应用_25_1.png
│ │ ├── 3-6-正态分布及其应用_26_2.png
│ │ ├── 3-6-正态分布及其应用_9_1.png
│ │ ├── 5-1-一元回归_18_1.png
│ │ ├── 5-1-一元回归_40_2.png
│ │ ├── 5-1-一元回归_41_2.png
│ │ ├── 5-1-一元回归_43_0.png
│ │ ├── 5-1-一元回归_48_1.png
│ │ ├── 5-1-一元回归_5_1.png
│ │ ├── 5-2-方差分析_5_1.png
│ │ ├── 5-3-含有多个解释变量的模型_12_1.png
│ │ ├── 5-3-含有多个解释变量的模型_5_1.png
│ │ ├── 5-3-含有多个解释变量的模型_9_1.png
│ │ ├── 6-1-各种概率分布_10_2.png
│ │ ├── 6-1-各种概率分布_11_1.png
│ │ ├── 6-1-各种概率分布_6_2.png
│ │ ├── 6-3-logistic回归_16_1.png
│ │ ├── 6-3-logistic回归_5_1.png
│ │ ├── 6-5-泊松回归_10_1.png
│ │ ├── 7-3-Python中的Ridge回归与Lasso回归_12_1.png
│ │ ├── 7-3-Python中的Ridge回归与Lasso回归_22_2.png
│ │ ├── 7-3-Python中的Ridge回归与Lasso回归_23_2.png
│ │ └── 7-3-Python中的Ridge回归与Lasso回归_30_1.png
│ ├── _panels_static
│ │ ├── panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css
│ │ └── panels-variables.06eb56fa6e07937060861dad626602ad.css
│ ├── _sources
│ │ ├── .DS_Store
│ │ ├── 2-2-认识Jupyter-Notebook.ipynb
│ │ ├── 2-3-Python编程基础.ipynb
│ │ ├── 2-4-认识numpy与pandas.ipynb
│ │ ├── 3-1-使用Python进行描述统计单变量.ipynb
│ │ ├── 3-10-列联表检验.ipynb
│ │ ├── 3-2-使用Python进行描述统计多变量.ipynb
│ │ ├── 3-3-基于matplotlib-seaborn的数据可视化.ipynb
│ │ ├── 3-4-用Python模拟抽样.ipynb
│ │ ├── 3-5-样本统计量的性质.ipynb
│ │ ├── 3-6-正态分布及其应用.ipynb
│ │ ├── 3-7-参数估计.ipynb
│ │ ├── 3-8-假设检验.ipynb
│ │ ├── 3-9-均值差的检验.ipynb
│ │ ├── 5-1-一元回归.ipynb
│ │ ├── 5-2-方差分析.ipynb
│ │ ├── 5-3-含有多个解释变量的模型.ipynb
│ │ ├── 6-1-各种概率分布.ipynb
│ │ ├── 6-3-logistic回归.ipynb
│ │ ├── 6-4-广义线性模型的评估.ipynb
│ │ ├── 6-5-泊松回归.ipynb
│ │ ├── 7-3-Python中的Ridge回归与Lasso回归.ipynb
│ │ ├── 7-4-线性模型与神经网络.ipynb
│ │ └── README.md
│ ├── _static
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ └── __init__.cpython-37.pyc
│ │ ├── basic.css
│ │ ├── clipboard.min.js
│ │ ├── copy-button.svg
│ │ ├── copybutton.css
│ │ ├── copybutton.js
│ │ ├── copybutton_funcs.js
│ │ ├── css
│ │ │ ├── blank.css
│ │ │ ├── index.ff1ffe594081f20da1ef19478df9384b.css
│ │ │ └── theme.css
│ │ ├── doctools.js
│ │ ├── documentation_options.js
│ │ ├── file.png
│ │ ├── images
│ │ │ ├── logo_binder.svg
│ │ │ ├── logo_colab.png
│ │ │ └── logo_jupyterhub.svg
│ │ ├── jquery-3.5.1.js
│ │ ├── jquery.js
│ │ ├── js
│ │ │ └── index.be7d3bbb2ef33a8344ce.js
│ │ ├── language_data.js
│ │ ├── minus.png
│ │ ├── mystnb.css
│ │ ├── panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css
│ │ ├── panels-variables.06eb56fa6e07937060861dad626602ad.css
│ │ ├── plus.png
│ │ ├── pygments.css
│ │ ├── searchtools.js
│ │ ├── sphinx-book-theme.12a9622fbb08dcb3a2a40b2c02b83a57.js
│ │ ├── sphinx-book-theme.css
│ │ ├── sphinx-book-theme.e2363ea40746bee74734a24ffefccd78.css
│ │ ├── sphinx-thebe.css
│ │ ├── sphinx-thebe.js
│ │ ├── togglebutton.css
│ │ ├── togglebutton.js
│ │ ├── underscore-1.13.1.js
│ │ ├── underscore.js
│ │ ├── vendor
│ │ │ └── fontawesome
│ │ │ │ └── 5.13.0
│ │ │ │ ├── LICENSE.txt
│ │ │ │ ├── css
│ │ │ │ └── all.min.css
│ │ │ │ └── webfonts
│ │ │ │ ├── fa-brands-400.eot
│ │ │ │ ├── fa-brands-400.svg
│ │ │ │ ├── fa-brands-400.ttf
│ │ │ │ ├── fa-brands-400.woff
│ │ │ │ ├── fa-brands-400.woff2
│ │ │ │ ├── fa-regular-400.eot
│ │ │ │ ├── fa-regular-400.svg
│ │ │ │ ├── fa-regular-400.ttf
│ │ │ │ ├── fa-regular-400.woff
│ │ │ │ ├── fa-regular-400.woff2
│ │ │ │ ├── fa-solid-900.eot
│ │ │ │ ├── fa-solid-900.svg
│ │ │ │ ├── fa-solid-900.ttf
│ │ │ │ ├── fa-solid-900.woff
│ │ │ │ └── fa-solid-900.woff2
│ │ └── webpack-macros.html
│ ├── genindex.html
│ ├── index.html
│ ├── objects.inv
│ ├── reports
│ │ ├── 2-3-Python编程基础.log
│ │ ├── 3-7-参数估计.log
│ │ └── 7-3-Python中的Ridge回归与Lasso回归.log
│ ├── search.html
│ └── searchindex.js
└── jupyter_execute
│ ├── 2-2-认识Jupyter-Notebook.ipynb
│ ├── 2-2-认识Jupyter-Notebook.py
│ ├── 2-3-Python编程基础.ipynb
│ ├── 2-3-Python编程基础.py
│ ├── 2-4-认识numpy与pandas.ipynb
│ ├── 2-4-认识numpy与pandas.py
│ ├── 3-1-使用Python进行描述统计单变量.ipynb
│ ├── 3-1-使用Python进行描述统计单变量.py
│ ├── 3-1-使用Python进行描述统计:单变量.ipynb
│ ├── 3-1-使用Python进行描述统计:单变量.py
│ ├── 3-10-列联表检验.ipynb
│ ├── 3-10-列联表检验.py
│ ├── 3-2-使用Python进行描述统计多变量.ipynb
│ ├── 3-2-使用Python进行描述统计多变量.py
│ ├── 3-2-使用Python进行描述统计:多变量.ipynb
│ ├── 3-2-使用Python进行描述统计:多变量.py
│ ├── 3-3-基于matplotlib-seaborn的数据可视化.ipynb
│ ├── 3-3-基于matplotlib-seaborn的数据可视化.py
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_11_2.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_13_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_14_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_19_2.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_21_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_24_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_26_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_29_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_33_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_5_1.png
│ ├── 3-3-基于matplotlib-seaborn的数据可视化_8_1.png
│ ├── 3-4-用Python模拟抽样.ipynb
│ ├── 3-4-用Python模拟抽样.py
│ ├── 3-4-用Python模拟抽样_20_2.png
│ ├── 3-4-用Python模拟抽样_24_1.png
│ ├── 3-4-用Python模拟抽样_25_2.png
│ ├── 3-5-样本统计量的性质.ipynb
│ ├── 3-5-样本统计量的性质.py
│ ├── 3-5-样本统计量的性质_11_2.png
│ ├── 3-5-样本统计量的性质_16_1.png
│ ├── 3-5-样本统计量的性质_22_1.png
│ ├── 3-5-样本统计量的性质_27_1.png
│ ├── 3-5-样本统计量的性质_30_1.png
│ ├── 3-5-样本统计量的性质_41_1.png
│ ├── 3-5-样本统计量的性质_43_3.png
│ ├── 3-6-正态分布及其应用.ipynb
│ ├── 3-6-正态分布及其应用.py
│ ├── 3-6-正态分布及其应用_23_2.png
│ ├── 3-6-正态分布及其应用_25_1.png
│ ├── 3-6-正态分布及其应用_26_2.png
│ ├── 3-6-正态分布及其应用_9_1.png
│ ├── 3-7-参数估计.ipynb
│ ├── 3-7-参数估计.py
│ ├── 3-8-假设检验.ipynb
│ ├── 3-8-假设检验.py
│ ├── 3-9-均值差的检验.ipynb
│ ├── 3-9-均值差的检验.py
│ ├── 5-1-一元回归.ipynb
│ ├── 5-1-一元回归.py
│ ├── 5-1-一元回归_18_1.png
│ ├── 5-1-一元回归_40_2.png
│ ├── 5-1-一元回归_41_2.png
│ ├── 5-1-一元回归_43_0.png
│ ├── 5-1-一元回归_48_1.png
│ ├── 5-1-一元回归_5_1.png
│ ├── 5-2-方差分析.ipynb
│ ├── 5-2-方差分析.py
│ ├── 5-2-方差分析_5_1.png
│ ├── 5-3-含有多个解释变量的模型.ipynb
│ ├── 5-3-含有多个解释变量的模型.py
│ ├── 5-3-含有多个解释变量的模型_12_1.png
│ ├── 5-3-含有多个解释变量的模型_5_1.png
│ ├── 5-3-含有多个解释变量的模型_9_1.png
│ ├── 6-1-各种概率分布.ipynb
│ ├── 6-1-各种概率分布.py
│ ├── 6-1-各种概率分布_10_2.png
│ ├── 6-1-各种概率分布_11_1.png
│ ├── 6-1-各种概率分布_6_2.png
│ ├── 6-3-logistic回归.ipynb
│ ├── 6-3-logistic回归.py
│ ├── 6-3-logistic回归_16_1.png
│ ├── 6-3-logistic回归_5_1.png
│ ├── 6-4-广义线性模型的评估.ipynb
│ ├── 6-4-广义线性模型的评估.py
│ ├── 6-5-泊松回归.ipynb
│ ├── 6-5-泊松回归.py
│ ├── 6-5-泊松回归_10_1.png
│ ├── 7-3-Python中的Ridge回归与Lasso回归.ipynb
│ ├── 7-3-Python中的Ridge回归与Lasso回归.py
│ ├── 7-3-Python中的Ridge回归与Lasso回归_12_1.png
│ ├── 7-3-Python中的Ridge回归与Lasso回归_22_2.png
│ ├── 7-3-Python中的Ridge回归与Lasso回归_23_2.png
│ ├── 7-3-Python中的Ridge回归与Lasso回归_30_1.png
│ ├── 7-4-线性模型与神经网络.ipynb
│ └── 7-4-线性模型与神经网络.py
├── _config.yml
├── _toc.yml
└── deploy.sh
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/.DS_Store
--------------------------------------------------------------------------------
/.ipynb_checkpoints/2-2-认识Jupyter-Notebook-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n",
8 | "\n",
9 | "## 第 2 节 认识 Jupyter Notebook"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 3. 执行代码"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "1"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "### 5. Markdown 的用法"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "collapsed": true
41 | },
42 | "source": [
43 | "```\n",
44 | "# 一级标题\n",
45 | "## 二级标题\n",
46 | "### 三级标题\n",
47 | "#### 四级标题\n",
48 | "\n",
49 | "-----------------\n",
50 | "- 列表条目\n",
51 | "- 列表条目\n",
52 | "\n",
53 | "-----------------\n",
54 | "\n",
55 | "1. 序号条目\n",
56 | "2. 序号条目\n",
57 | "```"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": true
65 | },
66 | "outputs": [],
67 | "source": []
68 | }
69 | ],
70 | "metadata": {
71 | "kernelspec": {
72 | "display_name": "Python 3",
73 | "language": "python",
74 | "name": "python3"
75 | },
76 | "language_info": {
77 | "codemirror_mode": {
78 | "name": "ipython",
79 | "version": 3
80 | },
81 | "file_extension": ".py",
82 | "mimetype": "text/x-python",
83 | "name": "python",
84 | "nbconvert_exporter": "python",
85 | "pygments_lexer": "ipython3",
86 | "version": "3.7.6"
87 | },
88 | "toc": {
89 | "base_numbering": 1,
90 | "nav_menu": {},
91 | "number_sections": false,
92 | "sideBar": true,
93 | "skip_h1_title": false,
94 | "title_cell": "Table of Contents",
95 | "title_sidebar": "Contents",
96 | "toc_cell": false,
97 | "toc_position": {},
98 | "toc_section_display": true,
99 | "toc_window_display": true
100 | }
101 | },
102 | "nbformat": 4,
103 | "nbformat_minor": 2
104 | }
105 |
--------------------------------------------------------------------------------
/2-2-认识Jupyter-Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n",
8 | "\n",
9 | "## 第 2 节 认识 Jupyter Notebook"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 3. 执行代码"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "1"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "### 5. Markdown 的用法"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "collapsed": true
41 | },
42 | "source": [
43 | "```\n",
44 | "# 一级标题\n",
45 | "## 二级标题\n",
46 | "### 三级标题\n",
47 | "#### 四级标题\n",
48 | "\n",
49 | "-----------------\n",
50 | "- 列表条目\n",
51 | "- 列表条目\n",
52 | "\n",
53 | "-----------------\n",
54 | "\n",
55 | "1. 序号条目\n",
56 | "2. 序号条目\n",
57 | "```"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": true
65 | },
66 | "outputs": [],
67 | "source": []
68 | }
69 | ],
70 | "metadata": {
71 | "kernelspec": {
72 | "display_name": "Python 3",
73 | "language": "python",
74 | "name": "python3"
75 | },
76 | "language_info": {
77 | "codemirror_mode": {
78 | "name": "ipython",
79 | "version": 3
80 | },
81 | "file_extension": ".py",
82 | "mimetype": "text/x-python",
83 | "name": "python",
84 | "nbconvert_exporter": "python",
85 | "pygments_lexer": "ipython3",
86 | "version": "3.7.6"
87 | },
88 | "toc": {
89 | "base_numbering": 1,
90 | "nav_menu": {},
91 | "number_sections": false,
92 | "sideBar": true,
93 | "skip_h1_title": false,
94 | "title_cell": "Table of Contents",
95 | "title_sidebar": "Contents",
96 | "toc_cell": false,
97 | "toc_position": {},
98 | "toc_section_display": true,
99 | "toc_window_display": true
100 | }
101 | },
102 | "nbformat": 4,
103 | "nbformat_minor": 2
104 | }
105 |
--------------------------------------------------------------------------------
/2-4-1-sample_data.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | 1, A
3 | 2, A
4 | 3, B
5 | 4, B
6 | 5, C
7 | 6, C
--------------------------------------------------------------------------------
/3-10-1-click_data.csv:
--------------------------------------------------------------------------------
1 | color,click,freq
2 | blue,click,20
3 | blue,not,230
4 | red,click,10
5 | red,not,40
6 |
7 |
--------------------------------------------------------------------------------
/3-10-列联表检验.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 10 节 列联表检验\n",
8 | "\n",
9 | "## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 5. 实现:计算 p 值"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "# 用于数值计算的库\n",
28 | "import numpy as np\n",
29 | "import pandas as pd\n",
30 | "import scipy as sp\n",
31 | "from scipy import stats\n",
32 | "\n",
33 | "# 用于绘图的库\n",
34 | "from matplotlib import pyplot as plt\n",
35 | "import seaborn as sns\n",
36 | "sns.set()\n",
37 | "\n",
38 | "# 设置浮点数打印精度\n",
39 | "%precision 3\n",
40 | "# 在 Jupyter Notebook 里显示图形\n",
41 | "%matplotlib inline"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "0.010"
53 | ]
54 | },
55 | "execution_count": 2,
56 | "metadata": {},
57 | "output_type": "execute_result"
58 | }
59 | ],
60 | "source": [
61 | "# 计算 p 值\n",
62 | "1 - sp.stats.chi2.cdf(x = 6.667, df = 1)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "### 6. 实现:列联表检验"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 3,
75 | "metadata": {
76 | "scrolled": true
77 | },
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | " color click freq\n",
84 | "0 blue click 20\n",
85 | "1 blue not 230\n",
86 | "2 red click 10\n",
87 | "3 red not 40\n"
88 | ]
89 | }
90 | ],
91 | "source": [
92 | "# 读入数据\n",
93 | "click_data = pd.read_csv(\"3-10-1-click_data.csv\")\n",
94 | "print(click_data)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 4,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "click click not\n",
107 | "color \n",
108 | "blue 20 230\n",
109 | "red 10 40\n"
110 | ]
111 | }
112 | ],
113 | "source": [
114 | "# 转换为列联表\n",
115 | "cross = pd.pivot_table(\n",
116 | " data = click_data,\n",
117 | " values = \"freq\",\n",
118 | " aggfunc = \"sum\",\n",
119 | " index = \"color\",\n",
120 | " columns = \"click\"\n",
121 | ")\n",
122 | "print(cross)"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 5,
128 | "metadata": {},
129 | "outputs": [
130 | {
131 | "data": {
132 | "text/plain": [
133 | "(6.667, 0.010, 1, array([[ 25., 225.],\n",
134 | " [ 5., 45.]]))"
135 | ]
136 | },
137 | "execution_count": 5,
138 | "metadata": {},
139 | "output_type": "execute_result"
140 | }
141 | ],
142 | "source": [
143 | "# 进行检验\n",
144 | "sp.stats.chi2_contingency(cross, correction = False)"
145 | ]
146 | }
147 | ],
148 | "metadata": {
149 | "kernelspec": {
150 | "display_name": "Python 3",
151 | "language": "python",
152 | "name": "python3"
153 | },
154 | "language_info": {
155 | "codemirror_mode": {
156 | "name": "ipython",
157 | "version": 3
158 | },
159 | "file_extension": ".py",
160 | "mimetype": "text/x-python",
161 | "name": "python",
162 | "nbconvert_exporter": "python",
163 | "pygments_lexer": "ipython3",
164 | "version": "3.7.6"
165 | },
166 | "toc": {
167 | "base_numbering": 1,
168 | "nav_menu": {},
169 | "number_sections": false,
170 | "sideBar": true,
171 | "skip_h1_title": false,
172 | "title_cell": "Table of Contents",
173 | "title_sidebar": "Contents",
174 | "toc_cell": false,
175 | "toc_position": {},
176 | "toc_section_display": true,
177 | "toc_window_display": true
178 | }
179 | },
180 | "nbformat": 4,
181 | "nbformat_minor": 2
182 | }
183 |
--------------------------------------------------------------------------------
/3-2-1-fish_multi.csv:
--------------------------------------------------------------------------------
1 | species,length
2 | A,2
3 | A,3
4 | A,4
5 | B,6
6 | B,8
7 | B,10
8 |
--------------------------------------------------------------------------------
/3-2-2-shoes.csv:
--------------------------------------------------------------------------------
1 | store,color,sales
2 | tokyo,blue,10
3 | tokyo,red,15
4 | osaka,blue,13
5 | osaka,red,9
6 |
--------------------------------------------------------------------------------
/3-2-3-cov.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 18.5,34
3 | 18.7,39
4 | 19.1,41
5 | 19.7,38
6 | 21.5,45
7 | 21.7,41
8 | 21.8,52
9 | 22,44
10 | 23.4,44
11 | 23.8,49
12 |
--------------------------------------------------------------------------------
/3-3-2-fish_multi_2.csv:
--------------------------------------------------------------------------------
1 | species,length
2 | A,2
3 | A,3
4 | A,3
5 | A,4
6 | A,4
7 | A,4
8 | A,4
9 | A,5
10 | A,5
11 | A,6
12 | B,5
13 | B,6
14 | B,6
15 | B,7
16 | B,7
17 | B,7
18 | B,7
19 | B,8
20 | B,8
21 | B,9
22 |
--------------------------------------------------------------------------------
/3-7-1-fish_length.csv:
--------------------------------------------------------------------------------
1 | length
2 | 4.352981989508033500e+00
3 | 3.735303878484729889e+00
4 | 5.944616949606223777e+00
5 | 3.798326296317538375e+00
6 | 4.087687873262546567e+00
7 | 5.265984893649251042e+00
8 | 3.272614076115006654e+00
9 | 3.526690673655769270e+00
10 | 4.150082580669628207e+00
11 | 3.736104033776512789e+00
12 |
--------------------------------------------------------------------------------
/3-8-1-junk-food-weight.csv:
--------------------------------------------------------------------------------
1 | weight
2 | 5.852981989508032967e+01
3 | 5.235303878484729978e+01
4 | 7.444616949606223955e+01
5 | 5.298326296317538464e+01
6 | 5.587687873262546390e+01
7 | 6.765984893649250864e+01
8 | 4.772614076115006299e+01
9 | 5.026690673655769359e+01
10 | 5.650082580669628385e+01
11 | 5.236104033776512523e+01
12 | 4.545788310062555126e+01
13 | 5.336098791529930452e+01
14 | 5.212936842399005855e+01
15 | 5.982777282087596404e+01
16 | 4.168169176422644284e+01
17 | 4.939856769848039164e+01
18 | 6.421112807589736349e+01
19 | 6.985864805785050180e+01
20 | 4.291056353849307214e+01
21 | 6.015878008714222602e+01
22 |
--------------------------------------------------------------------------------
/3-9-1-paired-t-test.csv:
--------------------------------------------------------------------------------
1 | person,medicine,body_temperature
2 | A,before,36.2
3 | B,before,36.2
4 | C,before,35.3
5 | D,before,36.1
6 | E,before,36.1
7 | A,after,36.8
8 | B,after,36.1
9 | C,after,36.8
10 | D,after,37.1
11 | E,after,36.9
12 |
--------------------------------------------------------------------------------
/5-1-1-beer.csv:
--------------------------------------------------------------------------------
1 | beer,temperature
2 | 45.3,20.5
3 | 59.3,25.0
4 | 40.4,10.0
5 | 38.0,26.9
6 | 37.0,15.8
7 | 40.900000000000006,4.2
8 | 60.2,13.5
9 | 63.3,26.0
10 | 51.099999999999994,23.3
11 | 44.9,8.5
12 | 47.0,26.2
13 | 53.2,19.1
14 | 43.5,24.3
15 | 53.199999999999996,23.3
16 | 37.4,8.4
17 | 59.9,23.5
18 | 41.5,13.9
19 | 75.1,35.5
20 | 55.6,27.2
21 | 57.2,20.5
22 | 46.5,10.2
23 | 35.8,20.5
24 | 51.9,21.6
25 | 38.199999999999996,7.9
26 | 66.0,42.2
27 | 55.3,23.9
28 | 55.300000000000004,36.9
29 | 43.3,8.9
30 | 70.5,36.4
31 | 38.8,6.4
32 |
--------------------------------------------------------------------------------
/5-3-1-lm-model.csv:
--------------------------------------------------------------------------------
1 | humidity,price,sales,temperature,weather
2 | 29.5,290,229.7,17.8,rainy
3 | 38.1,290,206.1,26.1,rainy
4 | 31.5,290,202.5,22.0,rainy
5 | 39.7,290,195.5,23.0,rainy
6 | 24.7,290,214.4,14.5,rainy
7 | 27.2,290,174.4,20.8,rainy
8 | 29.3,290,238.6,23.7,rainy
9 | 26.4,290,190.8,15.2,rainy
10 | 27.9,290,211.2,18.7,rainy
11 | 38.0,290,204.5,20.2,rainy
12 | 26.9,290,221.7,13.1,rainy
13 | 28.9,290,179.0,21.6,rainy
14 | 37.9,290,208.2,24.2,rainy
15 | 27.7,290,197.1,15.7,rainy
16 | 29.4,290,227.2,21.8,rainy
17 | 30.7,290,183.6,13.4,rainy
18 | 33.8,290,205.2,19.8,rainy
19 | 25.8,290,185.1,11.9,rainy
20 | 39.6,290,215.4,25.6,rainy
21 | 33.6,290,219.1,22.0,rainy
22 | 36.8,290,191.4,19.9,rainy
23 | 31.2,290,220.5,16.1,rainy
24 | 40.5,290,229.2,26.4,rainy
25 | 38.2,290,227.3,29.8,rainy
26 | 22.8,290,210.4,10.7,rainy
27 | 43.1,290,205.2,26.2,rainy
28 | 35.2,290,215.9,28.1,rainy
29 | 32.4,290,196.0,21.7,rainy
30 | 26.3,290,196.8,14.0,rainy
31 | 34.0,290,221.1,24.3,rainy
32 | 32.2,295,194.5,19.1,rainy
33 | 30.8,295,196.4,17.0,rainy
34 | 30.3,295,188.3,13.8,rainy
35 | 39.1,295,196.7,22.8,rainy
36 | 34.0,295,200.7,24.0,rainy
37 | 26.4,295,202.1,16.9,rainy
38 | 30.0,295,192.4,22.6,rainy
39 | 26.6,295,202.7,14.3,rainy
40 | 29.4,295,235.7,24.0,rainy
41 | 34.6,295,217.4,20.2,rainy
42 | 30.0,310,196.0,19.1,rainy
43 | 30.8,310,186.1,19.5,rainy
44 | 34.6,310,194.0,24.3,rainy
45 | 30.1,310,229.9,23.8,rainy
46 | 31.0,310,203.9,22.6,rainy
47 | 34.4,315,189.3,20.7,rainy
48 | 34.1,315,201.3,20.4,rainy
49 | 32.5,315,233.2,23.1,rainy
50 | 38.2,315,201.2,21.2,rainy
51 | 30.1,315,211.3,23.4,rainy
52 | 28.0,290,229.6,18.4,sunny
53 | 22.0,290,207.1,7.8,sunny
54 | 24.7,290,216.1,25.2,sunny
55 | 42.4,290,234.8,30.9,sunny
56 | 32.4,290,226.5,22.2,sunny
57 | 26.6,295,233.2,19.5,sunny
58 | 35.7,295,236.7,19.3,sunny
59 | 31.4,295,238.2,19.4,sunny
60 | 31.3,295,229.4,20.1,sunny
61 | 24.8,295,231.6,14.4,sunny
62 | 28.8,310,204.6,17.4,sunny
63 | 22.4,310,231.8,15.0,sunny
64 | 33.1,310,228.1,21.2,sunny
65 | 28.0,310,212.7,18.5,sunny
66 | 31.9,310,229.3,22.5,sunny
67 | 33.1,310,216.8,19.1,sunny
68 | 33.8,310,241.9,24.9,sunny
69 | 31.4,310,243.1,21.1,sunny
70 | 37.4,310,265.0,31.0,sunny
71 | 22.1,310,186.7,10.5,sunny
72 | 30.6,315,191.5,16.8,sunny
73 | 37.3,315,214.8,24.5,sunny
74 | 39.8,315,234.5,32.6,sunny
75 | 31.9,315,228.7,18.8,sunny
76 | 27.5,315,222.0,20.2,sunny
77 | 26.7,315,185.3,18.9,sunny
78 | 29.7,315,220.4,26.7,sunny
79 | 32.9,315,227.7,18.6,sunny
80 | 31.3,315,224.5,23.4,sunny
81 | 33.2,315,226.5,18.4,sunny
82 | 23.8,315,206.0,13.6,sunny
83 | 29.6,315,215.9,21.6,sunny
84 | 31.8,315,222.8,22.5,sunny
85 | 36.7,315,231.0,26.5,sunny
86 | 29.8,315,219.3,19.4,sunny
87 | 28.8,315,215.1,16.9,sunny
88 | 31.7,315,210.3,22.8,sunny
89 | 31.3,315,224.1,21.2,sunny
90 | 27.5,315,220.5,21.4,sunny
91 | 30.0,315,233.5,19.6,sunny
92 | 32.9,315,241.9,25.8,sunny
93 | 30.9,315,221.9,21.8,sunny
94 | 37.2,315,222.8,29.5,sunny
95 | 31.3,315,214.1,25.6,sunny
96 | 31.7,315,227.3,23.3,sunny
97 | 24.2,315,208.4,11.9,sunny
98 | 33.1,315,215.4,23.0,sunny
99 | 33.6,315,220.2,22.1,sunny
100 | 29.6,315,212.6,24.1,sunny
101 | 34.9,315,233.7,25.2,sunny
102 |
--------------------------------------------------------------------------------
/6-3-1-logistic-regression.csv:
--------------------------------------------------------------------------------
1 | hours,result
2 | 0,0
3 | 0,0
4 | 0,0
5 | 0,0
6 | 0,0
7 | 0,0
8 | 0,0
9 | 0,0
10 | 0,0
11 | 0,0
12 | 1,0
13 | 1,0
14 | 1,0
15 | 1,0
16 | 1,0
17 | 1,0
18 | 1,0
19 | 1,0
20 | 1,0
21 | 1,0
22 | 2,0
23 | 2,1
24 | 2,0
25 | 2,0
26 | 2,0
27 | 2,0
28 | 2,0
29 | 2,0
30 | 2,0
31 | 2,0
32 | 3,0
33 | 3,0
34 | 3,1
35 | 3,0
36 | 3,0
37 | 3,0
38 | 3,0
39 | 3,0
40 | 3,0
41 | 3,0
42 | 4,1
43 | 4,1
44 | 4,0
45 | 4,1
46 | 4,0
47 | 4,0
48 | 4,1
49 | 4,0
50 | 4,0
51 | 4,0
52 | 5,0
53 | 5,1
54 | 5,0
55 | 5,0
56 | 5,0
57 | 5,0
58 | 5,1
59 | 5,0
60 | 5,1
61 | 5,1
62 | 6,1
63 | 6,1
64 | 6,1
65 | 6,1
66 | 6,1
67 | 6,1
68 | 6,1
69 | 6,1
70 | 6,0
71 | 6,1
72 | 7,0
73 | 7,1
74 | 7,1
75 | 7,1
76 | 7,1
77 | 7,1
78 | 7,0
79 | 7,1
80 | 7,1
81 | 7,1
82 | 8,1
83 | 8,1
84 | 8,1
85 | 8,1
86 | 8,1
87 | 8,1
88 | 8,1
89 | 8,0
90 | 8,1
91 | 8,1
92 | 9,1
93 | 9,1
94 | 9,1
95 | 9,1
96 | 9,1
97 | 9,1
98 | 9,1
99 | 9,1
100 | 9,1
101 | 9,1
102 |
--------------------------------------------------------------------------------
/6-5-1-poisson-regression.csv:
--------------------------------------------------------------------------------
1 | beer_number,temperature
2 | 6,17.5
3 | 11,26.6
4 | 2,5.0
5 | 4,14.1
6 | 2,9.4
7 | 2,7.8
8 | 3,10.6
9 | 5,15.4
10 | 6,16.9
11 | 7,21.2
12 | 6,17.6
13 | 11,25.6
14 | 4,11.1
15 | 16,31.3
16 | 4,5.8
17 | 13,25.1
18 | 5,17.5
19 | 7,21.8
20 | 3,9.2
21 | 5,10.9
22 | 14,29.0
23 | 22,34.0
24 | 7,14.4
25 | 11,25.8
26 | 18,31.3
27 | 17,31.8
28 | 2,7.6
29 | 2,6.2
30 | 4,10.1
31 | 16,31.3
32 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python Stats Book
2 |
3 | 
4 |
--------------------------------------------------------------------------------
/_build/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.DS_Store
--------------------------------------------------------------------------------
/_build/.doctrees/2-2-认识Jupyter-Notebook.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/2-2-认识Jupyter-Notebook.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/2-3-Python编程基础.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/2-3-Python编程基础.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/2-4-认识numpy与pandas.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/2-4-认识numpy与pandas.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-1-使用Python进行描述统计单变量.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-1-使用Python进行描述统计单变量.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-1-使用Python进行描述统计:单变量.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-1-使用Python进行描述统计:单变量.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-10-列联表检验.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-10-列联表检验.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-2-使用Python进行描述统计多变量.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-2-使用Python进行描述统计多变量.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-2-使用Python进行描述统计:多变量.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-2-使用Python进行描述统计:多变量.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-3-基于matplotlib-seaborn的数据可视化.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-3-基于matplotlib-seaborn的数据可视化.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-4-用Python模拟抽样.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-4-用Python模拟抽样.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-5-样本统计量的性质.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-5-样本统计量的性质.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-6-正态分布及其应用.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-6-正态分布及其应用.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-7-参数估计.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-7-参数估计.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-8-假设检验.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-8-假设检验.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/3-9-均值差的检验.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/3-9-均值差的检验.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/5-1-一元回归.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/5-1-一元回归.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/5-2-方差分析.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/5-2-方差分析.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/5-3-含有多个解释变量的模型.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/5-3-含有多个解释变量的模型.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/6-1-各种概率分布.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-1-各种概率分布.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/6-3-logistic回归.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-3-logistic回归.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/6-4-广义线性模型的评估.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-4-广义线性模型的评估.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/6-5-泊松回归.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/6-5-泊松回归.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/7-3-Python中的Ridge回归与Lasso回归.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/7-3-Python中的Ridge回归与Lasso回归.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/7-4-线性模型与神经网络.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/7-4-线性模型与神经网络.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/README.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/README.doctree
--------------------------------------------------------------------------------
/_build/.doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/.doctrees/environment.pickle
--------------------------------------------------------------------------------
/_build/.doctrees/glue_cache.json:
--------------------------------------------------------------------------------
1 | {}
--------------------------------------------------------------------------------
/_build/html/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/.DS_Store
--------------------------------------------------------------------------------
/_build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 9d17def4c2be3ed0b33cbdfec7fbe0e1
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 |
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_11_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_11_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_13_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_13_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_14_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_14_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_19_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_19_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_21_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_21_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_24_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_24_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_26_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_26_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_29_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_29_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_33_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_33_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_5_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_8_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-3-基于matplotlib-seaborn的数据可视化_8_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-4-用Python模拟抽样_20_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-4-用Python模拟抽样_20_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-4-用Python模拟抽样_24_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-4-用Python模拟抽样_24_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-4-用Python模拟抽样_25_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-4-用Python模拟抽样_25_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_11_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_11_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_16_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_22_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_22_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_27_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_27_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_30_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_30_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_41_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_41_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-5-样本统计量的性质_43_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-5-样本统计量的性质_43_3.png
--------------------------------------------------------------------------------
/_build/html/_images/3-6-正态分布及其应用_23_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_23_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-6-正态分布及其应用_25_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_25_1.png
--------------------------------------------------------------------------------
/_build/html/_images/3-6-正态分布及其应用_26_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_26_2.png
--------------------------------------------------------------------------------
/_build/html/_images/3-6-正态分布及其应用_9_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/3-6-正态分布及其应用_9_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-1-一元回归_18_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_18_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-1-一元回归_40_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_40_2.png
--------------------------------------------------------------------------------
/_build/html/_images/5-1-一元回归_41_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_41_2.png
--------------------------------------------------------------------------------
/_build/html/_images/5-1-一元回归_43_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_43_0.png
--------------------------------------------------------------------------------
/_build/html/_images/5-1-一元回归_48_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_48_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-1-一元回归_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-1-一元回归_5_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-2-方差分析_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-2-方差分析_5_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-3-含有多个解释变量的模型_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-3-含有多个解释变量的模型_12_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-3-含有多个解释变量的模型_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-3-含有多个解释变量的模型_5_1.png
--------------------------------------------------------------------------------
/_build/html/_images/5-3-含有多个解释变量的模型_9_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/5-3-含有多个解释变量的模型_9_1.png
--------------------------------------------------------------------------------
/_build/html/_images/6-1-各种概率分布_10_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-1-各种概率分布_10_2.png
--------------------------------------------------------------------------------
/_build/html/_images/6-1-各种概率分布_11_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-1-各种概率分布_11_1.png
--------------------------------------------------------------------------------
/_build/html/_images/6-1-各种概率分布_6_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-1-各种概率分布_6_2.png
--------------------------------------------------------------------------------
/_build/html/_images/6-3-logistic回归_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-3-logistic回归_16_1.png
--------------------------------------------------------------------------------
/_build/html/_images/6-3-logistic回归_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-3-logistic回归_5_1.png
--------------------------------------------------------------------------------
/_build/html/_images/6-5-泊松回归_10_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/6-5-泊松回归_10_1.png
--------------------------------------------------------------------------------
/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_12_1.png
--------------------------------------------------------------------------------
/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_22_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_22_2.png
--------------------------------------------------------------------------------
/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_23_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_23_2.png
--------------------------------------------------------------------------------
/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_30_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_images/7-3-Python中的Ridge回归与Lasso回归_30_1.png
--------------------------------------------------------------------------------
/_build/html/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css:
--------------------------------------------------------------------------------
1 | details.dropdown .summary-title{padding-right:3em !important;-moz-user-select:none;-ms-user-select:none;-webkit-user-select:none;user-select:none}details.dropdown:hover{cursor:pointer}details.dropdown .summary-content{cursor:default}details.dropdown summary{list-style:none;padding:1em}details.dropdown summary .octicon.no-title{vertical-align:middle}details.dropdown[open] summary .octicon.no-title{visibility:hidden}details.dropdown summary::-webkit-details-marker{display:none}details.dropdown summary:focus{outline:none}details.dropdown summary:hover .summary-up svg,details.dropdown summary:hover .summary-down svg{opacity:1}details.dropdown .summary-up svg,details.dropdown .summary-down svg{display:block;opacity:.6}details.dropdown .summary-up,details.dropdown .summary-down{pointer-events:none;position:absolute;right:1em;top:.75em}details.dropdown[open] .summary-down{visibility:hidden}details.dropdown:not([open]) .summary-up{visibility:hidden}details.dropdown.fade-in[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out;animation:panels-fade-in .5s ease-in-out}details.dropdown.fade-in-slide-down[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out}@keyframes panels-fade-in{0%{opacity:0}100%{opacity:1}}@keyframes panels-slide-down{0%{transform:translate(0, -10px)}100%{transform:translate(0, 0)}}.octicon{display:inline-block;fill:currentColor;vertical-align:text-top}.tabbed-content{box-shadow:0 -.0625rem var(--tabs-color-overline),0 .0625rem var(--tabs-color-underline);display:none;order:99;padding-bottom:.75rem;padding-top:.75rem;width:100%}.tabbed-content>:first-child{margin-top:0 !important}.tabbed-content>:last-child{margin-bottom:0 !important}.tabbed-content>.tabbed-set{margin:0}.tabbed-set{border-radius:.125rem;display:flex;flex-wrap:wrap;margin:1em 0;position:relative}.tabbed-set>input{opacity:0;position:absolute}.tabbed-set>input:checked+label{border-color:var(--tabs-color-label-active);color:var(--tabs-color-label-active)}.tabbed-set>input:checked+label+.tabbed-content{display:block}.tabbed-set>input:focus+label{outline-style:auto}.tabbed-set>input:not(.focus-visible)+label{outline:none;-webkit-tap-highlight-color:transparent}.tabbed-set>label{border-bottom:.125rem solid transparent;color:var(--tabs-color-label-inactive);cursor:pointer;font-size:var(--tabs-size-label);font-weight:700;padding:1em 1.25em .5em;transition:color 250ms;width:auto;z-index:1}html .tabbed-set>label:hover{color:var(--tabs-color-label-active)}
2 |
--------------------------------------------------------------------------------
/_build/html/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --tabs-color-label-active: hsla(231, 99%, 66%, 1);
3 | --tabs-color-label-inactive: rgba(178, 206, 245, 0.62);
4 | --tabs-color-overline: rgb(207, 236, 238);
5 | --tabs-color-underline: rgb(207, 236, 238);
6 | --tabs-size-label: 1rem;
7 | }
--------------------------------------------------------------------------------
/_build/html/_sources/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_sources/.DS_Store
--------------------------------------------------------------------------------
/_build/html/_sources/2-2-认识Jupyter-Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n",
8 | "\n",
9 | "## 第 2 节 认识 Jupyter Notebook"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 3. 执行代码"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "1"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "### 5. Markdown 的用法"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {
40 | "collapsed": true
41 | },
42 | "source": [
43 | "```\n",
44 | "# 一级标题\n",
45 | "## 二级标题\n",
46 | "### 三级标题\n",
47 | "#### 四级标题\n",
48 | "\n",
49 | "-----------------\n",
50 | "- 列表条目\n",
51 | "- 列表条目\n",
52 | "\n",
53 | "-----------------\n",
54 | "\n",
55 | "1. 序号条目\n",
56 | "2. 序号条目\n",
57 | "```"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": null,
63 | "metadata": {
64 | "collapsed": true
65 | },
66 | "outputs": [],
67 | "source": []
68 | }
69 | ],
70 | "metadata": {
71 | "kernelspec": {
72 | "display_name": "Python 3",
73 | "language": "python",
74 | "name": "python3"
75 | },
76 | "language_info": {
77 | "codemirror_mode": {
78 | "name": "ipython",
79 | "version": 3
80 | },
81 | "file_extension": ".py",
82 | "mimetype": "text/x-python",
83 | "name": "python",
84 | "nbconvert_exporter": "python",
85 | "pygments_lexer": "ipython3",
86 | "version": "3.7.6"
87 | },
88 | "toc": {
89 | "base_numbering": 1,
90 | "nav_menu": {},
91 | "number_sections": false,
92 | "sideBar": true,
93 | "skip_h1_title": false,
94 | "title_cell": "Table of Contents",
95 | "title_sidebar": "Contents",
96 | "toc_cell": false,
97 | "toc_position": {},
98 | "toc_section_display": true,
99 | "toc_window_display": true
100 | }
101 | },
102 | "nbformat": 4,
103 | "nbformat_minor": 2
104 | }
105 |
--------------------------------------------------------------------------------
/_build/html/_sources/3-10-列联表检验.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 10 节 列联表检验\n",
8 | "\n",
9 | "## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 5. 实现:计算 p 值"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "# 用于数值计算的库\n",
28 | "import numpy as np\n",
29 | "import pandas as pd\n",
30 | "import scipy as sp\n",
31 | "from scipy import stats\n",
32 | "\n",
33 | "# 用于绘图的库\n",
34 | "from matplotlib import pyplot as plt\n",
35 | "import seaborn as sns\n",
36 | "sns.set()\n",
37 | "\n",
38 | "# 设置浮点数打印精度\n",
39 | "%precision 3\n",
40 | "# 在 Jupyter Notebook 里显示图形\n",
41 | "%matplotlib inline"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "0.010"
53 | ]
54 | },
55 | "execution_count": 2,
56 | "metadata": {},
57 | "output_type": "execute_result"
58 | }
59 | ],
60 | "source": [
61 | "# 计算 p 值\n",
62 | "1 - sp.stats.chi2.cdf(x = 6.667, df = 1)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "### 6. 实现:列联表检验"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 3,
75 | "metadata": {
76 | "scrolled": true
77 | },
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | " color click freq\n",
84 | "0 blue click 20\n",
85 | "1 blue not 230\n",
86 | "2 red click 10\n",
87 | "3 red not 40\n"
88 | ]
89 | }
90 | ],
91 | "source": [
92 | "# 读入数据\n",
93 | "click_data = pd.read_csv(\"3-10-1-click_data.csv\")\n",
94 | "print(click_data)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 4,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "click click not\n",
107 | "color \n",
108 | "blue 20 230\n",
109 | "red 10 40\n"
110 | ]
111 | }
112 | ],
113 | "source": [
114 | "# 转换为列联表\n",
115 | "cross = pd.pivot_table(\n",
116 | " data = click_data,\n",
117 | " values = \"freq\",\n",
118 | " aggfunc = \"sum\",\n",
119 | " index = \"color\",\n",
120 | " columns = \"click\"\n",
121 | ")\n",
122 | "print(cross)"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 5,
128 | "metadata": {},
129 | "outputs": [
130 | {
131 | "data": {
132 | "text/plain": [
133 | "(6.667, 0.010, 1, array([[ 25., 225.],\n",
134 | " [ 5., 45.]]))"
135 | ]
136 | },
137 | "execution_count": 5,
138 | "metadata": {},
139 | "output_type": "execute_result"
140 | }
141 | ],
142 | "source": [
143 | "# 进行检验\n",
144 | "sp.stats.chi2_contingency(cross, correction = False)"
145 | ]
146 | }
147 | ],
148 | "metadata": {
149 | "kernelspec": {
150 | "display_name": "Python 3",
151 | "language": "python",
152 | "name": "python3"
153 | },
154 | "language_info": {
155 | "codemirror_mode": {
156 | "name": "ipython",
157 | "version": 3
158 | },
159 | "file_extension": ".py",
160 | "mimetype": "text/x-python",
161 | "name": "python",
162 | "nbconvert_exporter": "python",
163 | "pygments_lexer": "ipython3",
164 | "version": "3.7.6"
165 | },
166 | "toc": {
167 | "base_numbering": 1,
168 | "nav_menu": {},
169 | "number_sections": false,
170 | "sideBar": true,
171 | "skip_h1_title": false,
172 | "title_cell": "Table of Contents",
173 | "title_sidebar": "Contents",
174 | "toc_cell": false,
175 | "toc_position": {},
176 | "toc_section_display": true,
177 | "toc_window_display": true
178 | }
179 | },
180 | "nbformat": 4,
181 | "nbformat_minor": 2
182 | }
183 |
--------------------------------------------------------------------------------
/_build/html/_sources/README.md:
--------------------------------------------------------------------------------
1 | # Python Stats Book
2 |
3 | 
4 |
--------------------------------------------------------------------------------
/_build/html/_static/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/__init__.py
--------------------------------------------------------------------------------
/_build/html/_static/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/_build/html/_static/copy-button.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/_build/html/_static/copybutton.css:
--------------------------------------------------------------------------------
1 | /* Copy buttons */
2 | a.copybtn {
3 | position: absolute;
4 | top: .2em;
5 | right: .2em;
6 | width: 1em;
7 | height: 1em;
8 | opacity: .3;
9 | transition: opacity 0.5s;
10 | border: none;
11 | user-select: none;
12 | }
13 |
14 | div.highlight {
15 | position: relative;
16 | }
17 |
18 | a.copybtn > img {
19 | vertical-align: top;
20 | margin: 0;
21 | top: 0;
22 | left: 0;
23 | position: absolute;
24 | }
25 |
26 | .highlight:hover .copybtn {
27 | opacity: 1;
28 | }
29 |
30 | /**
31 | * A minimal CSS-only tooltip copied from:
32 | * https://codepen.io/mildrenben/pen/rVBrpK
33 | *
34 | * To use, write HTML like the following:
35 | *
36 | *
Short
37 | */
38 | .o-tooltip--left {
39 | position: relative;
40 | }
41 |
42 | .o-tooltip--left:after {
43 | opacity: 0;
44 | visibility: hidden;
45 | position: absolute;
46 | content: attr(data-tooltip);
47 | padding: 2px;
48 | top: 0;
49 | left: -.2em;
50 | background: grey;
51 | font-size: 1rem;
52 | color: white;
53 | white-space: nowrap;
54 | z-index: 2;
55 | border-radius: 2px;
56 | transform: translateX(-102%) translateY(0);
57 | transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1);
58 | }
59 |
60 | .o-tooltip--left:hover:after {
61 | display: block;
62 | opacity: 1;
63 | visibility: visible;
64 | transform: translateX(-100%) translateY(0);
65 | transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1);
66 | transition-delay: .5s;
67 | }
68 |
--------------------------------------------------------------------------------
/_build/html/_static/copybutton.js:
--------------------------------------------------------------------------------
1 | // Localization support
2 | const messages = {
3 | 'en': {
4 | 'copy': 'Copy',
5 | 'copy_to_clipboard': 'Copy to clipboard',
6 | 'copy_success': 'Copied!',
7 | 'copy_failure': 'Failed to copy',
8 | },
9 | 'es' : {
10 | 'copy': 'Copiar',
11 | 'copy_to_clipboard': 'Copiar al portapapeles',
12 | 'copy_success': '¡Copiado!',
13 | 'copy_failure': 'Error al copiar',
14 | },
15 | 'de' : {
16 | 'copy': 'Kopieren',
17 | 'copy_to_clipboard': 'In die Zwischenablage kopieren',
18 | 'copy_success': 'Kopiert!',
19 | 'copy_failure': 'Fehler beim Kopieren',
20 | }
21 | }
22 |
23 | let locale = 'en'
24 | if( document.documentElement.lang !== undefined
25 | && messages[document.documentElement.lang] !== undefined ) {
26 | locale = document.documentElement.lang
27 | }
28 |
29 | /**
30 | * Set up copy/paste for code blocks
31 | */
32 |
33 | const runWhenDOMLoaded = cb => {
34 | if (document.readyState != 'loading') {
35 | cb()
36 | } else if (document.addEventListener) {
37 | document.addEventListener('DOMContentLoaded', cb)
38 | } else {
39 | document.attachEvent('onreadystatechange', function() {
40 | if (document.readyState == 'complete') cb()
41 | })
42 | }
43 | }
44 |
45 | const codeCellId = index => `codecell${index}`
46 |
47 | // Clears selected text since ClipboardJS will select the text when copying
48 | const clearSelection = () => {
49 | if (window.getSelection) {
50 | window.getSelection().removeAllRanges()
51 | } else if (document.selection) {
52 | document.selection.empty()
53 | }
54 | }
55 |
56 | // Changes tooltip text for two seconds, then changes it back
57 | const temporarilyChangeTooltip = (el, newText) => {
58 | const oldText = el.getAttribute('data-tooltip')
59 | el.setAttribute('data-tooltip', newText)
60 | setTimeout(() => el.setAttribute('data-tooltip', oldText), 2000)
61 | }
62 |
63 | const addCopyButtonToCodeCells = () => {
64 | // If ClipboardJS hasn't loaded, wait a bit and try again. This
65 | // happens because we load ClipboardJS asynchronously.
66 | if (window.ClipboardJS === undefined) {
67 | setTimeout(addCopyButtonToCodeCells, 250)
68 | return
69 | }
70 |
71 | // Add copybuttons to all of our code cells
72 | const codeCells = document.querySelectorAll('div.highlight pre')
73 | codeCells.forEach((codeCell, index) => {
74 | const id = codeCellId(index)
75 | codeCell.setAttribute('id', id)
76 | const pre_bg = getComputedStyle(codeCell).backgroundColor;
77 |
78 | const clipboardButton = id =>
79 | `
80 |
81 | `
82 | codeCell.insertAdjacentHTML('afterend', clipboardButton(id))
83 | })
84 |
85 | function escapeRegExp(string) {
86 | return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
87 | }
88 |
89 | // Callback when a copy button is clicked. Will be passed the node that was clicked
90 | // should then grab the text and replace pieces of text that shouldn't be used in output
91 | function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true) {
92 |
93 | var regexp;
94 | var match;
95 |
96 | // create regexp to capture prompt and remaining line
97 | if (isRegexp) {
98 | regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)')
99 | } else {
100 | regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)')
101 | }
102 |
103 | const outputLines = [];
104 | var promptFound = false;
105 | for (const line of textContent.split('\n')) {
106 | match = line.match(regexp)
107 | if (match) {
108 | promptFound = true
109 | if (removePrompts) {
110 | outputLines.push(match[2])
111 | } else {
112 | outputLines.push(line)
113 | }
114 | } else {
115 | if (!onlyCopyPromptLines) {
116 | outputLines.push(line)
117 | }
118 | }
119 | }
120 |
121 | // If no lines with the prompt were found then just use original lines
122 | if (promptFound) {
123 | textContent = outputLines.join('\n');
124 | }
125 |
126 | // Remove a trailing newline to avoid auto-running when pasting
127 | if (textContent.endsWith("\n")) {
128 | textContent = textContent.slice(0, -1)
129 | }
130 | return textContent
131 | }
132 |
133 |
134 | var copyTargetText = (trigger) => {
135 | var target = document.querySelector(trigger.attributes['data-clipboard-target'].value);
136 | return formatCopyText(target.innerText, '', false, true, true)
137 | }
138 |
139 | // Initialize with a callback so we can modify the text before copy
140 | const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText})
141 |
142 | // Update UI with error/success messages
143 | clipboard.on('success', event => {
144 | clearSelection()
145 | temporarilyChangeTooltip(event.trigger, messages[locale]['copy_success'])
146 | })
147 |
148 | clipboard.on('error', event => {
149 | temporarilyChangeTooltip(event.trigger, messages[locale]['copy_failure'])
150 | })
151 | }
152 |
153 | runWhenDOMLoaded(addCopyButtonToCodeCells)
--------------------------------------------------------------------------------
/_build/html/_static/copybutton_funcs.js:
--------------------------------------------------------------------------------
1 | function escapeRegExp(string) {
2 | return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
3 | }
4 |
5 | // Callback when a copy button is clicked. Will be passed the node that was clicked
6 | // should then grab the text and replace pieces of text that shouldn't be used in output
7 | export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true) {
8 |
9 | var regexp;
10 | var match;
11 |
12 | // create regexp to capture prompt and remaining line
13 | if (isRegexp) {
14 | regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)')
15 | } else {
16 | regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)')
17 | }
18 |
19 | const outputLines = [];
20 | var promptFound = false;
21 | for (const line of textContent.split('\n')) {
22 | match = line.match(regexp)
23 | if (match) {
24 | promptFound = true
25 | if (removePrompts) {
26 | outputLines.push(match[2])
27 | } else {
28 | outputLines.push(line)
29 | }
30 | } else {
31 | if (!onlyCopyPromptLines) {
32 | outputLines.push(line)
33 | }
34 | }
35 | }
36 |
37 | // If no lines with the prompt were found then just use original lines
38 | if (promptFound) {
39 | textContent = outputLines.join('\n');
40 | }
41 |
42 | // Remove a trailing newline to avoid auto-running when pasting
43 | if (textContent.endsWith("\n")) {
44 | textContent = textContent.slice(0, -1)
45 | }
46 | return textContent
47 | }
48 |
--------------------------------------------------------------------------------
/_build/html/_static/css/blank.css:
--------------------------------------------------------------------------------
1 | /* This file is intentionally left blank to override the stylesheet of the
2 | parent theme via theme.conf. The parent style we import directly in theme.css */
--------------------------------------------------------------------------------
/_build/html/_static/css/theme.css:
--------------------------------------------------------------------------------
1 | /* Provided by the Sphinx base theme template at build time */
2 | @import "../basic.css";
3 |
4 | :root {
5 | /*****************************************************************************
6 | * Theme config
7 | **/
8 | --pst-header-height: 60px;
9 |
10 | /*****************************************************************************
11 | * Font size
12 | **/
13 | --pst-font-size-base: 15px; /* base font size - applied at body / html level */
14 |
15 | /* heading font sizes */
16 | --pst-font-size-h1: 36px;
17 | --pst-font-size-h2: 32px;
18 | --pst-font-size-h3: 26px;
19 | --pst-font-size-h4: 21px;
20 | --pst-font-size-h5: 18px;
21 | --pst-font-size-h6: 16px;
22 |
23 | /* smaller then heading font sizes*/
24 | --pst-font-size-milli: 12px;
25 |
26 | --pst-sidebar-font-size: .9em;
27 | --pst-sidebar-caption-font-size: .9em;
28 |
29 | /*****************************************************************************
30 | * Font family
31 | **/
32 | /* These are adapted from https://systemfontstack.com/ */
33 | --pst-font-family-base-system: -apple-system, BlinkMacSystemFont, Segoe UI, "Helvetica Neue",
34 | Arial, sans-serif, Apple Color Emoji, Segoe UI Emoji, Segoe UI Symbol;
35 | --pst-font-family-monospace-system: "SFMono-Regular", Menlo, Consolas, Monaco,
36 | Liberation Mono, Lucida Console, monospace;
37 |
38 | --pst-font-family-base: var(--pst-font-family-base-system);
39 | --pst-font-family-heading: var(--pst-font-family-base);
40 | --pst-font-family-monospace: var(--pst-font-family-monospace-system);
41 |
42 | /*****************************************************************************
43 | * Color
44 | *
45 | * Colors are defined in rgb string way, "red, green, blue"
46 | **/
47 | --pst-color-primary: 19, 6, 84;
48 | --pst-color-success: 40, 167, 69;
49 | --pst-color-info: 0, 123, 255; /*23, 162, 184;*/
50 | --pst-color-warning: 255, 193, 7;
51 | --pst-color-danger: 220, 53, 69;
52 | --pst-color-text-base: 51, 51, 51;
53 |
54 | --pst-color-h1: var(--pst-color-primary);
55 | --pst-color-h2: var(--pst-color-primary);
56 | --pst-color-h3: var(--pst-color-text-base);
57 | --pst-color-h4: var(--pst-color-text-base);
58 | --pst-color-h5: var(--pst-color-text-base);
59 | --pst-color-h6: var(--pst-color-text-base);
60 | --pst-color-paragraph: var(--pst-color-text-base);
61 | --pst-color-link: 0, 91, 129;
62 | --pst-color-link-hover: 227, 46, 0;
63 | --pst-color-headerlink: 198, 15, 15;
64 | --pst-color-headerlink-hover: 255, 255, 255;
65 | --pst-color-preformatted-text: 34, 34, 34;
66 | --pst-color-preformatted-background: 250, 250, 250;
67 | --pst-color-inline-code: 232, 62, 140;
68 |
69 | --pst-color-active-navigation: 19, 6, 84;
70 | --pst-color-navbar-link: 77, 77, 77;
71 | --pst-color-navbar-link-hover: var(--pst-color-active-navigation);
72 | --pst-color-navbar-link-active: var(--pst-color-active-navigation);
73 | --pst-color-sidebar-link: 77, 77, 77;
74 | --pst-color-sidebar-link-hover: var(--pst-color-active-navigation);
75 | --pst-color-sidebar-link-active: var(--pst-color-active-navigation);
76 | --pst-color-sidebar-expander-background-hover: 244, 244, 244;
77 | --pst-color-sidebar-caption: 77, 77, 77;
78 | --pst-color-toc-link: 119, 117, 122;
79 | --pst-color-toc-link-hover: var(--pst-color-active-navigation);
80 | --pst-color-toc-link-active: var(--pst-color-active-navigation);
81 |
82 | /*****************************************************************************
83 | * Icon
84 | **/
85 |
86 | /* font awesome icons*/
87 | --pst-icon-check-circle: '\f058';
88 | --pst-icon-info-circle: '\f05a';
89 | --pst-icon-exclamation-triangle: '\f071';
90 | --pst-icon-exclamation-circle: '\f06a';
91 | --pst-icon-times-circle: '\f057';
92 | --pst-icon-lightbulb: '\f0eb';
93 |
94 | /*****************************************************************************
95 | * Admonitions
96 | **/
97 |
98 | --pst-color-admonition-default: var(--pst-color-info);
99 | --pst-color-admonition-note: var(--pst-color-info);
100 | --pst-color-admonition-attention: var(--pst-color-warning);
101 | --pst-color-admonition-caution: var(--pst-color-warning);
102 | --pst-color-admonition-warning: var(--pst-color-warning);
103 | --pst-color-admonition-danger: var(--pst-color-danger);
104 | --pst-color-admonition-error: var(--pst-color-danger);
105 | --pst-color-admonition-hint: var(--pst-color-success);
106 | --pst-color-admonition-tip: var(--pst-color-success);
107 | --pst-color-admonition-important: var(--pst-color-success);
108 |
109 | --pst-icon-admonition-default: var(--pst-icon-info-circle);
110 | --pst-icon-admonition-note: var(--pst-icon-info-circle);
111 | --pst-icon-admonition-attention: var(--pst-icon-exclamation-circle);
112 | --pst-icon-admonition-caution: var(--pst-icon-exclamation-triangle);
113 | --pst-icon-admonition-warning: var(--pst-icon-exclamation-triangle);
114 | --pst-icon-admonition-danger: var(--pst-icon-exclamation-triangle);
115 | --pst-icon-admonition-error: var(--pst-icon-times-circle);
116 | --pst-icon-admonition-hint: var(--pst-icon-lightbulb);
117 | --pst-icon-admonition-tip: var(--pst-icon-lightbulb);
118 | --pst-icon-admonition-important: var(--pst-icon-exclamation-circle);
119 |
120 | }
121 |
--------------------------------------------------------------------------------
/_build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
1 | var DOCUMENTATION_OPTIONS = {
2 | URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
3 | VERSION: '',
4 | LANGUAGE: 'None',
5 | COLLAPSE_INDEX: false,
6 | BUILDER: 'html',
7 | FILE_SUFFIX: '.html',
8 | LINK_SUFFIX: '.html',
9 | HAS_SOURCE: true,
10 | SOURCELINK_SUFFIX: '',
11 | NAVIGATION_WITH_KEYS: true
12 | };
--------------------------------------------------------------------------------
/_build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/file.png
--------------------------------------------------------------------------------
/_build/html/_static/images/logo_binder.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
20 |
--------------------------------------------------------------------------------
/_build/html/_static/images/logo_colab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/images/logo_colab.png
--------------------------------------------------------------------------------
/_build/html/_static/images/logo_jupyterhub.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/_build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/minus.png
--------------------------------------------------------------------------------
/_build/html/_static/mystnb.css:
--------------------------------------------------------------------------------
1 | /* Whole cell */
2 | div.container.cell {
3 | padding-left: 0;
4 | margin-bottom: 1em;
5 | }
6 |
7 | /* Removing all background formatting so we can control at the div level */
8 | .cell_input div.highlight, .cell_output pre, .cell_input pre, .cell_output .output {
9 | border: none;
10 | box-shadow: none;
11 | }
12 |
13 | .cell_output .output pre, .cell_input pre {
14 | margin: 0px;
15 | }
16 |
17 | /* Input cells */
18 | div.cell div.cell_input {
19 | padding-left: 0em;
20 | padding-right: 0em;
21 | border: 1px #ccc solid;
22 | background-color: #f7f7f7;
23 | border-left-color: green;
24 | border-left-width: medium;
25 | }
26 |
27 | div.cell_input > div, div.cell_output div.output > div.highlight {
28 | margin: 0em !important;
29 | border: none !important;
30 | }
31 |
32 | /* All cell outputs */
33 | .cell_output {
34 | padding-left: 1em;
35 | padding-right: 0em;
36 | margin-top: 1em;
37 | }
38 |
39 | /* Outputs from jupyter_sphinx overrides to remove extra CSS */
40 | div.section div.jupyter_container {
41 | padding: .4em;
42 | margin: 0 0 .4em 0;
43 | background-color: none;
44 | border: none;
45 | -moz-box-shadow: none;
46 | -webkit-box-shadow: none;
47 | box-shadow: none;
48 | }
49 |
50 | /* Text outputs from cells */
51 | .cell_output .output.text_plain,
52 | .cell_output .output.traceback,
53 | .cell_output .output.stream,
54 | .cell_output .output.stderr
55 | {
56 | background: #fcfcfc;
57 | margin-top: 1em;
58 | margin-bottom: 0em;
59 | box-shadow: none;
60 | }
61 |
62 | .cell_output .output.text_plain,
63 | .cell_output .output.stream,
64 | .cell_output .output.stderr {
65 | border: 1px solid #f7f7f7;
66 | }
67 |
68 | .cell_output .output.stderr {
69 | background: #fdd;
70 | }
71 |
72 | .cell_output .output.traceback {
73 | border: 1px solid #ffd6d6;
74 | }
75 |
76 | /* Math align to the left */
77 | .cell_output .MathJax_Display {
78 | text-align: left !important;
79 | }
80 |
81 | /* Pandas tables. Pulled from the Jupyter / nbsphinx CSS */
82 | div.cell_output table {
83 | border: none;
84 | border-collapse: collapse;
85 | border-spacing: 0;
86 | color: black;
87 | font-size: 1em;
88 | table-layout: fixed;
89 | }
90 | div.cell_output thead {
91 | border-bottom: 1px solid black;
92 | vertical-align: bottom;
93 | }
94 | div.cell_output tr,
95 | div.cell_output th,
96 | div.cell_output td {
97 | text-align: right;
98 | vertical-align: middle;
99 | padding: 0.5em 0.5em;
100 | line-height: normal;
101 | white-space: normal;
102 | max-width: none;
103 | border: none;
104 | }
105 | div.cell_output th {
106 | font-weight: bold;
107 | }
108 | div.cell_output tbody tr:nth-child(odd) {
109 | background: #f5f5f5;
110 | }
111 | div.cell_output tbody tr:hover {
112 | background: rgba(66, 165, 245, 0.2);
113 | }
114 |
115 |
116 | /* Inline text from `paste` operation */
117 |
118 | span.pasted-text {
119 | font-weight: bold;
120 | }
121 |
122 | span.pasted-inline img {
123 | max-height: 2em;
124 | }
125 |
126 | tbody span.pasted-inline img {
127 | max-height: none;
128 | }
129 |
130 | /* Font colors for translated ANSI escape sequences
131 | Color values are adapted from share/jupyter/nbconvert/templates/classic/static/style.css
132 | */
133 | div.highlight .-Color-Bold {
134 | font-weight: bold;
135 | }
136 | div.highlight .-Color[class*=-Black] {
137 | color :#3E424D
138 | }
139 | div.highlight .-Color[class*=-Red] {
140 | color: #E75C58
141 | }
142 | div.highlight .-Color[class*=-Green] {
143 | color: #00A250
144 | }
145 | div.highlight .-Color[class*=-Yellow] {
146 | color: yellow
147 | }
148 | div.highlight .-Color[class*=-Blue] {
149 | color: #208FFB
150 | }
151 | div.highlight .-Color[class*=-Magenta] {
152 | color: #D160C4
153 | }
154 | div.highlight .-Color[class*=-Cyan] {
155 | color: #60C6C8
156 | }
157 | div.highlight .-Color[class*=-White] {
158 | color: #C5C1B4
159 | }
160 | div.highlight .-Color[class*=-BGBlack] {
161 | background-color: #3E424D
162 | }
163 | div.highlight .-Color[class*=-BGRed] {
164 | background-color: #E75C58
165 | }
166 | div.highlight .-Color[class*=-BGGreen] {
167 | background-color: #00A250
168 | }
169 | div.highlight .-Color[class*=-BGYellow] {
170 | background-color: yellow
171 | }
172 | div.highlight .-Color[class*=-BGBlue] {
173 | background-color: #208FFB
174 | }
175 | div.highlight .-Color[class*=-BGMagenta] {
176 | background-color: #D160C4
177 | }
178 | div.highlight .-Color[class*=-BGCyan] {
179 | background-color: #60C6C8
180 | }
181 | div.highlight .-Color[class*=-BGWhite] {
182 | background-color: #C5C1B4
183 | }
184 |
--------------------------------------------------------------------------------
/_build/html/_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css:
--------------------------------------------------------------------------------
1 | details.dropdown .summary-title{padding-right:3em !important;-moz-user-select:none;-ms-user-select:none;-webkit-user-select:none;user-select:none}details.dropdown:hover{cursor:pointer}details.dropdown .summary-content{cursor:default}details.dropdown summary{list-style:none;padding:1em}details.dropdown summary .octicon.no-title{vertical-align:middle}details.dropdown[open] summary .octicon.no-title{visibility:hidden}details.dropdown summary::-webkit-details-marker{display:none}details.dropdown summary:focus{outline:none}details.dropdown summary:hover .summary-up svg,details.dropdown summary:hover .summary-down svg{opacity:1}details.dropdown .summary-up svg,details.dropdown .summary-down svg{display:block;opacity:.6}details.dropdown .summary-up,details.dropdown .summary-down{pointer-events:none;position:absolute;right:1em;top:.75em}details.dropdown[open] .summary-down{visibility:hidden}details.dropdown:not([open]) .summary-up{visibility:hidden}details.dropdown.fade-in[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out;animation:panels-fade-in .5s ease-in-out}details.dropdown.fade-in-slide-down[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out}@keyframes panels-fade-in{0%{opacity:0}100%{opacity:1}}@keyframes panels-slide-down{0%{transform:translate(0, -10px)}100%{transform:translate(0, 0)}}.octicon{display:inline-block;fill:currentColor;vertical-align:text-top}.tabbed-content{box-shadow:0 -.0625rem var(--tabs-color-overline),0 .0625rem var(--tabs-color-underline);display:none;order:99;padding-bottom:.75rem;padding-top:.75rem;width:100%}.tabbed-content>:first-child{margin-top:0 !important}.tabbed-content>:last-child{margin-bottom:0 !important}.tabbed-content>.tabbed-set{margin:0}.tabbed-set{border-radius:.125rem;display:flex;flex-wrap:wrap;margin:1em 0;position:relative}.tabbed-set>input{opacity:0;position:absolute}.tabbed-set>input:checked+label{border-color:var(--tabs-color-label-active);color:var(--tabs-color-label-active)}.tabbed-set>input:checked+label+.tabbed-content{display:block}.tabbed-set>input:focus+label{outline-style:auto}.tabbed-set>input:not(.focus-visible)+label{outline:none;-webkit-tap-highlight-color:transparent}.tabbed-set>label{border-bottom:.125rem solid transparent;color:var(--tabs-color-label-inactive);cursor:pointer;font-size:var(--tabs-size-label);font-weight:700;padding:1em 1.25em .5em;transition:color 250ms;width:auto;z-index:1}html .tabbed-set>label:hover{color:var(--tabs-color-label-active)}
2 |
--------------------------------------------------------------------------------
/_build/html/_static/panels-variables.06eb56fa6e07937060861dad626602ad.css:
--------------------------------------------------------------------------------
1 | :root {
2 | --tabs-color-label-active: hsla(231, 99%, 66%, 1);
3 | --tabs-color-label-inactive: rgba(178, 206, 245, 0.62);
4 | --tabs-color-overline: rgb(207, 236, 238);
5 | --tabs-color-underline: rgb(207, 236, 238);
6 | --tabs-size-label: 1rem;
7 | }
--------------------------------------------------------------------------------
/_build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/plus.png
--------------------------------------------------------------------------------
/_build/html/_static/pygments.css:
--------------------------------------------------------------------------------
1 | .highlight .hll { background-color: #ffffcc }
2 | .highlight { background: #eeffcc; }
3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
6 | .highlight .o { color: #666666 } /* Operator */
7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */
51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */
65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
--------------------------------------------------------------------------------
/_build/html/_static/sphinx-book-theme.12a9622fbb08dcb3a2a40b2c02b83a57.js:
--------------------------------------------------------------------------------
1 | var initTriggerNavBar=()=>{if($(window).width()<768){$("#navbar-toggler").trigger("click")}}
2 | var scrollToActive=()=>{var navbar=document.getElementById('site-navigation')
3 | var active_pages=navbar.querySelectorAll(".active")
4 | var active_page=active_pages[active_pages.length-1]
5 | if(active_page!==undefined&&active_page.offsetTop>($(window).height()*.5)){navbar.scrollTop=active_page.offsetTop-($(window).height()*.2)}}
6 | var sbRunWhenDOMLoaded=cb=>{if(document.readyState!='loading'){cb()}else if(document.addEventListener){document.addEventListener('DOMContentLoaded',cb)}else{document.attachEvent('onreadystatechange',function(){if(document.readyState=='complete')cb()})}}
7 | function toggleFullScreen(){var navToggler=$("#navbar-toggler");if(!document.fullscreenElement){document.documentElement.requestFullscreen();if(!navToggler.hasClass("collapsed")){navToggler.click();}}else{if(document.exitFullscreen){document.exitFullscreen();if(navToggler.hasClass("collapsed")){navToggler.click();}}}}
8 | var initTooltips=()=>{$(document).ready(function(){$('[data-toggle="tooltip"]').tooltip();});}
9 | var initTocHide=()=>{var scrollTimeout;var throttle=200;var tocHeight=$("#bd-toc-nav").outerHeight(true)+$(".bd-toc").outerHeight(true);var hideTocAfter=tocHeight+200;var checkTocScroll=function(){var margin_content=$(".margin, .tag_margin, .full-width, .full_width, .tag_full-width, .tag_full_width, .sidebar, .tag_sidebar, .popout, .tag_popout");margin_content.each((index,item)=>{var topOffset=$(item).offset().top-$(window).scrollTop();var bottomOffset=topOffset+$(item).outerHeight(true);var topOverlaps=((topOffset>=0)&&(topOffset=0)&&(bottomOffset20){$("div.bd-toc").removeClass("show")
10 | return false}else{$("div.bd-toc").addClass("show")};})};var manageScrolledClassOnBody=function(){if(window.scrollY>0){document.body.classList.add("scrolled");}else{document.body.classList.remove("scrolled");}}
11 | $(window).on('scroll',function(){if(!scrollTimeout){scrollTimeout=setTimeout(function(){checkTocScroll();manageScrolledClassOnBody();scrollTimeout=null;},throttle);}});}
12 | var initThebeSBT=()=>{var title=$("div.section h1")[0]
13 | if(!$(title).next().hasClass("thebe-launch-button")){$("").insertAfter($(title))}
14 | initThebe();}
15 | sbRunWhenDOMLoaded(initTooltips)
16 | sbRunWhenDOMLoaded(initTriggerNavBar)
17 | sbRunWhenDOMLoaded(scrollToActive)
18 | sbRunWhenDOMLoaded(initTocHide)
19 |
--------------------------------------------------------------------------------
/_build/html/_static/sphinx-thebe.css:
--------------------------------------------------------------------------------
1 | /* Thebelab Buttons */
2 | .thebelab-button {
3 | z-index: 999;
4 | display: inline-block;
5 | padding: 0.35em 1.2em;
6 | margin: 0px 1px;
7 | border-radius: 0.12em;
8 | box-sizing: border-box;
9 | text-decoration: none;
10 | font-family: 'Roboto', sans-serif;
11 | font-weight: 300;
12 | text-align: center;
13 | transition: all 0.2s;
14 | background-color: #dddddd;
15 | border: 0.05em solid white;
16 | color: #000000;
17 | }
18 |
19 | .thebelab-button:hover{
20 | border: 0.05em solid black;
21 | background-color: #fcfcfc;
22 | }
23 |
24 | .thebe-launch-button {
25 | height: 2.2em;
26 | font-size: .8em;
27 | border: 1px black solid;
28 | }
29 |
30 | /* Thebelab Cell */
31 | .thebelab-cell pre {
32 | background: none;
33 | }
34 |
35 | .thebelab-cell .thebelab-input {
36 | padding-left: 1em;
37 | margin-bottom: .5em;
38 | margin-top: .5em;
39 | }
40 |
41 | .thebelab-cell .jp-OutputArea {
42 | margin-top: .5em;
43 | margin-left: 1em;
44 | }
45 |
46 | button.thebelab-button.thebelab-run-button {
47 | margin-left: 1.5em;
48 | margin-bottom: .5em;
49 | }
50 |
51 | /* Loading button */
52 | button.thebe-launch-button div.spinner {
53 | float: left;
54 | margin-right: 1em;
55 | }
56 |
57 | /* Remove the spinner when thebelab is ready */
58 | .thebe-launch-button.thebe-status-ready .spinner {
59 | display: none;
60 | }
61 |
62 | .thebe-launch-button span.status {
63 | font-family: monospace;
64 | font-weight: bold;
65 | }
66 |
67 | .thebe-launch-button.thebe-status-ready span.status {
68 | color: green;
69 | }
70 |
71 | .spinner {
72 | height: 2em;
73 | text-align: center;
74 | font-size: 0.7em;
75 | }
76 |
77 | .spinner > div {
78 | background-color: #F37726;
79 | height: 100%;
80 | width: 6px;
81 | display: inline-block;
82 |
83 | -webkit-animation: sk-stretchdelay 1.2s infinite ease-in-out;
84 | animation: sk-stretchdelay 1.2s infinite ease-in-out;
85 | }
86 |
87 | .spinner .rect2 {
88 | -webkit-animation-delay: -1.1s;
89 | animation-delay: -1.1s;
90 | }
91 |
92 | .spinner .rect3 {
93 | -webkit-animation-delay: -1.0s;
94 | animation-delay: -1.0s;
95 | }
96 |
97 | .spinner .rect4 {
98 | -webkit-animation-delay: -0.9s;
99 | animation-delay: -0.9s;
100 | }
101 |
102 | .spinner .rect5 {
103 | -webkit-animation-delay: -0.8s;
104 | animation-delay: -0.8s;
105 | }
106 |
107 | @-webkit-keyframes sk-stretchdelay {
108 | 0%, 40%, 100% { -webkit-transform: scaleY(0.4) }
109 | 20% { -webkit-transform: scaleY(1.0) }
110 | }
111 |
112 | @keyframes sk-stretchdelay {
113 | 0%, 40%, 100% {
114 | transform: scaleY(0.4);
115 | -webkit-transform: scaleY(0.4);
116 | } 20% {
117 | transform: scaleY(1.0);
118 | -webkit-transform: scaleY(1.0);
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/_build/html/_static/sphinx-thebe.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Add attributes to Thebe blocks to initialize thebe properly
3 | */
4 |
5 | var initThebe = () => {
6 | // If Thebelab hasn't loaded, wait a bit and try again. This
7 | // happens because we load ClipboardJS asynchronously.
8 | if (window.thebelab === undefined) {
9 | console.log("thebe not loaded, retrying...");
10 | setTimeout(initThebe, 500)
11 | return
12 | }
13 |
14 | console.log("Adding thebe to code cells...");
15 |
16 | // Load thebe config in case we want to update it as some point
17 | thebe_config = $('script[type="text/x-thebe-config"]')[0]
18 |
19 |
20 | // If we already detect a Thebe cell, don't re-run
21 | if (document.querySelectorAll('div.thebe-cell').length > 0) {
22 | return;
23 | }
24 |
25 | // Update thebe buttons with loading message
26 | $(".thebe-launch-button").each((ii, button) => {
27 | button.innerHTML = `
28 |
34 | `;
35 | })
36 |
37 | // Set thebe event hooks
38 | var thebeStatus;
39 | thebelab.on("status", function (evt, data) {
40 | console.log("Status changed:", data.status, data.message);
41 |
42 | $(".thebe-launch-button ")
43 | .removeClass("thebe-status-" + thebeStatus)
44 | .addClass("thebe-status-" + data.status)
45 | .find(".loading-text").html("Launching from mybinder.org: " + data.status + "");
46 |
47 | // Now update our thebe status
48 | thebeStatus = data.status;
49 |
50 | // Find any cells with an initialization tag and ask thebe to run them when ready
51 | if (data.status === "ready") {
52 | var thebeInitCells = document.querySelectorAll('.thebe-init, .tag_thebe-init');
53 | thebeInitCells.forEach((cell) => {
54 | console.log("Initializing Thebe with cell: " + cell.id);
55 | cell.querySelector('.thebelab-run-button').click();
56 | });
57 | }
58 | });
59 |
60 |
61 | // Find all code cells, replace with Thebe interactive code cells
62 | const codeCells = document.querySelectorAll(thebe_selector)
63 | codeCells.forEach((codeCell, index) => {
64 | const codeCellId = index => `codecell${index}`;
65 | codeCell.id = codeCellId(index);
66 | codeCellText = codeCell.querySelector(thebe_selector_input);
67 | codeCellOutput = codeCell.querySelector(thebe_selector_output);
68 |
69 | // Clean up the language to make it work w/ CodeMirror and add it to the cell
70 | dataLanguage = detectLanguage(kernelName);
71 |
72 | if (codeCellText) {
73 | codeCellText.setAttribute('data-language', dataLanguage);
74 | codeCellText.setAttribute('data-executable', 'true');
75 |
76 | // If we had an output, insert it just after the `pre` cell
77 | if (codeCellOutput) {
78 | $(codeCellOutput).attr("data-output", "");
79 | $(codeCellOutput).insertAfter(codeCellText);
80 | }
81 | }
82 | });
83 |
84 | // Init thebe
85 | thebelab.bootstrap();
86 | }
87 |
88 | // Helper function to munge the language name
89 | var detectLanguage = (language) => {
90 | if (language.indexOf('python') > -1) {
91 | language = "python";
92 | } else if (language === 'ir') {
93 | language = "r"
94 | }
95 | return language;
96 | }
97 |
--------------------------------------------------------------------------------
/_build/html/_static/togglebutton.css:
--------------------------------------------------------------------------------
1 | /* Visibility of the target */
2 | .toggle, div.admonition.toggle .admonition-title ~ * {
3 | transition: opacity .5s, height .5s;
4 | }
5 |
6 | .toggle-hidden:not(.admonition) {
7 | visibility: hidden;
8 | opacity: 0;
9 | height: 1.5em;
10 | margin: 0px;
11 | padding: 0px;
12 | }
13 |
14 | /* Overrides for admonition toggles */
15 |
16 | /* Titles should cut off earlier to avoid overlapping w/ button */
17 | div.admonition.toggle p.admonition-title {
18 | padding-right: 25%;
19 | }
20 |
21 | /* hides all the content of a page until de-toggled */
22 | div.admonition.toggle-hidden .admonition-title ~ * {
23 | height: 0;
24 | margin: 0;
25 | float: left; /* so they overlap when hidden */
26 | opacity: 0;
27 | visibility: hidden;
28 | }
29 |
30 | /* Toggle buttons inside admonitions so we see the title */
31 | .toggle.admonition {
32 | position: relative;
33 | }
34 |
35 | .toggle.admonition.admonition-title:after {
36 | content: "" !important;
37 | }
38 |
39 | /* Note, we'll over-ride this in sphinx-book-theme */
40 | .toggle.admonition button.toggle-button {
41 | margin-right: 0.5em;
42 | right: 0em;
43 | position: absolute;
44 | top: .2em;
45 | }
46 |
47 | /* General button style */
48 | button.toggle-button {
49 | background: #999;
50 | border: none;
51 | z-index: 100;
52 | right: -2.5em;
53 | margin-left: -2.5em; /* A hack to keep code blocks from being pushed left */
54 | position: relative;
55 | float: right;
56 | border-radius: 100%;
57 | width: 1.5em;
58 | height: 1.5em;
59 | padding: 0px;
60 | }
61 |
62 | @media (min-width: 768px) {
63 | button.toggle-button.toggle-button-hidden:before {
64 | content: "Click to show";
65 | position: absolute;
66 | font-size: .8em;
67 | left: -6.5em;
68 | bottom: .4em;
69 | }
70 | }
71 |
72 |
73 | /* Plus / minus toggles */
74 | .toggle-button .bar {
75 | background-color: white;
76 | position: absolute;
77 | left: 15%;
78 | top: 43%;
79 | width: 16px;
80 | height: 3px;
81 | }
82 |
83 | .toggle-button .vertical {
84 | transition: all 0.25s ease-in-out;
85 | transform-origin: center;
86 | }
87 |
88 | .toggle-button-hidden .vertical {
89 | transform: rotate(-90deg);
90 | }
--------------------------------------------------------------------------------
/_build/html/_static/togglebutton.js:
--------------------------------------------------------------------------------
1 | var initToggleItems = () => {
2 | var itemsToToggle = document.querySelectorAll(togglebuttonSelector);
3 | console.log(itemsToToggle, togglebuttonSelector)
4 | // Add the button to each admonition and hook up a callback to toggle visibility
5 | itemsToToggle.forEach((item, index) => {
6 | var toggleID = `toggle-${index}`;
7 | var buttonID = `button-${toggleID}`;
8 | var collapseButton = `
9 | `;
13 |
14 | item.setAttribute('id', toggleID);
15 |
16 | if (!item.classList.contains("toggle")){
17 | item.classList.add("toggle");
18 | }
19 |
20 | // If it's an admonition block, then we'll add the button inside
21 | if (item.classList.contains("admonition")) {
22 | item.insertAdjacentHTML("afterbegin", collapseButton);
23 | } else {
24 | item.insertAdjacentHTML('beforebegin', collapseButton);
25 | }
26 |
27 | thisButton = $(`#${buttonID}`);
28 | thisButton.on('click', toggleClickHandler);
29 | if (!item.classList.contains("toggle-shown")) {
30 | toggleHidden(thisButton[0]);
31 | }
32 | })
33 | };
34 |
35 | // This should simply add / remove the collapsed class and change the button text
36 | var toggleHidden = (button) => {
37 | target = button.dataset['target']
38 | var itemToToggle = document.getElementById(target);
39 | if (itemToToggle.classList.contains("toggle-hidden")) {
40 | itemToToggle.classList.remove("toggle-hidden");
41 | button.classList.remove("toggle-button-hidden");
42 | } else {
43 | itemToToggle.classList.add("toggle-hidden");
44 | button.classList.add("toggle-button-hidden");
45 | }
46 | }
47 |
48 | var toggleClickHandler = (click) => {
49 | button = document.getElementById(click.target.dataset['button']);
50 | toggleHidden(button);
51 | }
52 |
53 | // If we want to blanket-add toggle classes to certain cells
54 | var addToggleToSelector = () => {
55 | const selector = "";
56 | if (selector.length > 0) {
57 | document.querySelectorAll(selector).forEach((item) => {
58 | item.classList.add("toggle");
59 | })
60 | }
61 | }
62 |
63 | // Helper function to run when the DOM is finished
64 | const sphinxToggleRunWhenDOMLoaded = cb => {
65 | if (document.readyState != 'loading') {
66 | cb()
67 | } else if (document.addEventListener) {
68 | document.addEventListener('DOMContentLoaded', cb)
69 | } else {
70 | document.attachEvent('onreadystatechange', function() {
71 | if (document.readyState == 'complete') cb()
72 | })
73 | }
74 | }
75 | sphinxToggleRunWhenDOMLoaded(addToggleToSelector)
76 | sphinxToggleRunWhenDOMLoaded(initToggleItems)
77 |
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Font Awesome Free License
2 | -------------------------
3 |
4 | Font Awesome Free is free, open source, and GPL friendly. You can use it for
5 | commercial projects, open source projects, or really almost whatever you want.
6 | Full Font Awesome Free license: https://fontawesome.com/license/free.
7 |
8 | # Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/)
9 | In the Font Awesome Free download, the CC BY 4.0 license applies to all icons
10 | packaged as SVG and JS file types.
11 |
12 | # Fonts: SIL OFL 1.1 License (https://scripts.sil.org/OFL)
13 | In the Font Awesome Free download, the SIL OFL license applies to all icons
14 | packaged as web and desktop font files.
15 |
16 | # Code: MIT License (https://opensource.org/licenses/MIT)
17 | In the Font Awesome Free download, the MIT license applies to all non-font and
18 | non-icon files.
19 |
20 | # Attribution
21 | Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font
22 | Awesome Free files already contain embedded comments with sufficient
23 | attribution, so you shouldn't need to do anything additional when using these
24 | files normally.
25 |
26 | We've kept attribution comments terse, so we ask that you do not actively work
27 | to remove them from files, especially code. They're a great way for folks to
28 | learn about Font Awesome.
29 |
30 | # Brand Icons
31 | All brand icons are trademarks of their respective owners. The use of these
32 | trademarks does not indicate endorsement of the trademark holder by Font
33 | Awesome, nor vice versa. **Please do not use brand logos for any purpose except
34 | to represent the company, product, or service to which they refer.**
35 |
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.eot
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.ttf
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.eot
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.ttf
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-regular-400.woff2
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.eot
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.ttf
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff
--------------------------------------------------------------------------------
/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2
--------------------------------------------------------------------------------
/_build/html/_static/webpack-macros.html:
--------------------------------------------------------------------------------
1 |
2 | {% macro head_pre_icons() %}
3 |
5 |
7 |
9 | {% endmacro %}
10 |
11 | {% macro head_pre_fonts() %}
12 | {% endmacro %}
13 |
14 | {% macro head_pre_bootstrap() %}
15 |
16 |
17 | {% endmacro %}
18 |
19 | {% macro head_js_preload() %}
20 |
21 | {% endmacro %}
22 |
23 | {% macro body_post() %}
24 |
25 | {% endmacro %}
--------------------------------------------------------------------------------
/_build/html/index.html:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/html/objects.inv
--------------------------------------------------------------------------------
/_build/html/reports/2-3-Python编程基础.log:
--------------------------------------------------------------------------------
1 | Traceback (most recent call last):
2 | File "/opt/anaconda3/lib/python3.7/site-packages/jupyter_cache/executors/utils.py", line 56, in single_nb_execution
3 | record_timing=False,
4 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 1087, in execute
5 | return NotebookClient(nb=nb, resources=resources, km=km, **kwargs).execute()
6 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 74, in wrapped
7 | return just_run(coro(*args, **kwargs))
8 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 53, in just_run
9 | return loop.run_until_complete(coro)
10 | File "/opt/anaconda3/lib/python3.7/asyncio/base_events.py", line 583, in run_until_complete
11 | return future.result()
12 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 541, in async_execute
13 | cell, index, execution_count=self.code_cells_executed + 1
14 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 832, in async_execute_cell
15 | self._check_raise_for_error(cell, exec_reply)
16 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 740, in _check_raise_for_error
17 | raise CellExecutionError.from_cell_and_msg(cell, exec_reply['content'])
18 | nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
19 | ------------------
20 | "A" + 1
21 | ------------------
22 |
23 | [0;31m---------------------------------------------------------------------------[0m
24 | [0;31mTypeError[0m Traceback (most recent call last)
25 | [0;32m[0m in [0;36m[0;34m[0m
26 | [0;32m----> 1[0;31m [0;34m"A"[0m [0;34m+[0m [0;36m1[0m[0;34m[0m[0;34m[0m[0m
27 | [0m
28 | [0;31mTypeError[0m: can only concatenate str (not "int") to str
29 | TypeError: can only concatenate str (not "int") to str
30 |
31 |
--------------------------------------------------------------------------------
/_build/html/reports/3-7-参数估计.log:
--------------------------------------------------------------------------------
1 | Traceback (most recent call last):
2 | File "/opt/anaconda3/lib/python3.7/site-packages/jupyter_cache/executors/utils.py", line 56, in single_nb_execution
3 | record_timing=False,
4 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 1087, in execute
5 | return NotebookClient(nb=nb, resources=resources, km=km, **kwargs).execute()
6 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 74, in wrapped
7 | return just_run(coro(*args, **kwargs))
8 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 53, in just_run
9 | return loop.run_until_complete(coro)
10 | File "/opt/anaconda3/lib/python3.7/asyncio/base_events.py", line 583, in run_until_complete
11 | return future.result()
12 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 541, in async_execute
13 | cell, index, execution_count=self.code_cells_executed + 1
14 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 832, in async_execute_cell
15 | self._check_raise_for_error(cell, exec_reply)
16 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 740, in _check_raise_for_error
17 | raise CellExecutionError.from_cell_and_msg(cell, exec_reply['content'])
18 | nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
19 | ------------------
20 | # 标准误差
21 | se = sigma / sp.sqrt(len(fish))
22 | se
23 | ------------------
24 |
25 | [0;31m---------------------------------------------------------------------------[0m
26 | [0;31mNameError[0m Traceback (most recent call last)
27 | [0;32m[0m in [0;36m[0;34m[0m
28 | [1;32m 1[0m [0;31m# 标准误差[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
29 | [0;32m----> 2[0;31m [0mse[0m [0;34m=[0m [0msigma[0m [0;34m/[0m [0msp[0m[0;34m.[0m[0msqrt[0m[0;34m([0m[0mlen[0m[0;34m([0m[0mfish[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
30 | [0m[1;32m 3[0m [0mse[0m[0;34m[0m[0;34m[0m[0m
31 |
32 | [0;31mNameError[0m: name 'sigma' is not defined
33 | NameError: name 'sigma' is not defined
34 |
35 |
--------------------------------------------------------------------------------
/_build/html/reports/7-3-Python中的Ridge回归与Lasso回归.log:
--------------------------------------------------------------------------------
1 | Traceback (most recent call last):
2 | File "/opt/anaconda3/lib/python3.7/site-packages/jupyter_cache/executors/utils.py", line 56, in single_nb_execution
3 | record_timing=False,
4 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 1087, in execute
5 | return NotebookClient(nb=nb, resources=resources, km=km, **kwargs).execute()
6 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 74, in wrapped
7 | return just_run(coro(*args, **kwargs))
8 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/util.py", line 53, in just_run
9 | return loop.run_until_complete(coro)
10 | File "/opt/anaconda3/lib/python3.7/asyncio/base_events.py", line 583, in run_until_complete
11 | return future.result()
12 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 541, in async_execute
13 | cell, index, execution_count=self.code_cells_executed + 1
14 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 832, in async_execute_cell
15 | self._check_raise_for_error(cell, exec_reply)
16 | File "/opt/anaconda3/lib/python3.7/site-packages/nbclient/client.py", line 740, in _check_raise_for_error
17 | raise CellExecutionError.from_cell_and_msg(cell, exec_reply['content'])
18 | nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
19 | ------------------
20 | # 对不同的 α 值进行 Lasso 回归
21 | lasso_alphas, lasso_coefs, _ = linear_model.lasso_path(
22 | X, y, fit_intercept = False)
23 | ------------------
24 |
25 | [0;31m---------------------------------------------------------------------------[0m
26 | [0;31mValueError[0m Traceback (most recent call last)
27 | [0;32m[0m in [0;36m[0;34m[0m
28 | [1;32m 1[0m [0;31m# 对不同的 α 值进行 Lasso 回归[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
29 | [1;32m 2[0m lasso_alphas, lasso_coefs, _ = linear_model.lasso_path(
30 | [0;32m----> 3[0;31m X, y, fit_intercept = False)
31 | [0m
32 | [0;32m/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/_coordinate_descent.py[0m in [0;36mlasso_path[0;34m(X, y, eps, n_alphas, alphas, precompute, Xy, copy_X, coef_init, verbose, return_n_iter, positive, **params)[0m
33 | [1;32m 359[0m [0mpositive[0m[0;34m=[0m[0mpositive[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
34 | [1;32m 360[0m [0mreturn_n_iter[0m[0;34m=[0m[0mreturn_n_iter[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
35 | [0;32m--> 361[0;31m [0;34m**[0m[0mparams[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
36 | [0m[1;32m 362[0m )
37 | [1;32m 363[0m [0;34m[0m[0m
38 |
39 | [0;32m/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/_coordinate_descent.py[0m in [0;36menet_path[0;34m(X, y, l1_ratio, eps, n_alphas, alphas, precompute, Xy, copy_X, coef_init, verbose, return_n_iter, positive, check_input, **params)[0m
40 | [1;32m 505[0m [0;34m[0m[0m
41 | [1;32m 506[0m [0;32mif[0m [0mlen[0m[0;34m([0m[0mparams[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
42 | [0;32m--> 507[0;31m [0;32mraise[0m [0mValueError[0m[0;34m([0m[0;34m"Unexpected parameters in params"[0m[0;34m,[0m [0mparams[0m[0;34m.[0m[0mkeys[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
43 | [0m[1;32m 508[0m [0;34m[0m[0m
44 | [1;32m 509[0m [0;31m# We expect X and y to be already Fortran ordered when bypassing[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
45 |
46 | [0;31mValueError[0m: ('Unexpected parameters in params', dict_keys(['fit_intercept']))
47 | ValueError: ('Unexpected parameters in params', dict_keys(['fit_intercept']))
48 |
49 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/2-2-认识Jupyter-Notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 2 章 Python 与 Jupyter Notebook 基础\n",
8 | "\n",
9 | "## 第 2 节 认识 Jupyter Notebook"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 3. 执行代码"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/plain": [
29 | "1"
30 | ]
31 | },
32 | "execution_count": 1,
33 | "metadata": {},
34 | "output_type": "execute_result"
35 | }
36 | ],
37 | "source": [
38 | "1"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "### 5. Markdown 的用法"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "metadata": {
51 | "collapsed": true
52 | },
53 | "source": [
54 | "```\n",
55 | "# 一级标题\n",
56 | "## 二级标题\n",
57 | "### 三级标题\n",
58 | "#### 四级标题\n",
59 | "\n",
60 | "-----------------\n",
61 | "- 列表条目\n",
62 | "- 列表条目\n",
63 | "\n",
64 | "-----------------\n",
65 | "\n",
66 | "1. 序号条目\n",
67 | "2. 序号条目\n",
68 | "```"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {
75 | "collapsed": true
76 | },
77 | "outputs": [],
78 | "source": []
79 | }
80 | ],
81 | "metadata": {
82 | "kernelspec": {
83 | "display_name": "Python 3",
84 | "language": "python",
85 | "name": "python3"
86 | },
87 | "language_info": {
88 | "codemirror_mode": {
89 | "name": "ipython",
90 | "version": 3
91 | },
92 | "file_extension": ".py",
93 | "mimetype": "text/x-python",
94 | "name": "python",
95 | "nbconvert_exporter": "python",
96 | "pygments_lexer": "ipython3",
97 | "version": "3.7.6"
98 | },
99 | "toc": {
100 | "base_numbering": 1,
101 | "nav_menu": {},
102 | "number_sections": false,
103 | "sideBar": true,
104 | "skip_h1_title": false,
105 | "title_cell": "Table of Contents",
106 | "title_sidebar": "Contents",
107 | "toc_cell": false,
108 | "toc_position": {},
109 | "toc_section_display": true,
110 | "toc_window_display": true
111 | }
112 | },
113 | "nbformat": 4,
114 | "nbformat_minor": 2
115 | }
--------------------------------------------------------------------------------
/_build/jupyter_execute/2-2-认识Jupyter-Notebook.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 2 章 Python 与 Jupyter Notebook 基础
5 | #
6 | # ## 第 2 节 认识 Jupyter Notebook
7 |
8 | # ### 3. 执行代码
9 |
10 | # In[1]:
11 |
12 |
13 | 1
14 |
15 |
16 | # ### 5. Markdown 的用法
17 |
18 | # ```
19 | # # 一级标题
20 | # ## 二级标题
21 | # ### 三级标题
22 | # #### 四级标题
23 | #
24 | # -----------------
25 | # - 列表条目
26 | # - 列表条目
27 | #
28 | # -----------------
29 | #
30 | # 1. 序号条目
31 | # 2. 序号条目
32 | # ```
33 |
34 | # In[ ]:
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/2-3-Python编程基础.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 3 节 Python 编程基础
5 | #
6 | # ## 第 2 章 Python 与 Jupyter Notebook 基础|用 Python 动手学统计学
7 |
8 | # ### 1. 实现:四则运算
9 |
10 | # In[1]:
11 |
12 |
13 | 1 + 1
14 |
15 |
16 | # In[2]:
17 |
18 |
19 | 5 - 2
20 |
21 |
22 | # In[3]:
23 |
24 |
25 | 2 * 3
26 |
27 |
28 | # In[4]:
29 |
30 |
31 | 2 ** 3
32 |
33 |
34 | # In[5]:
35 |
36 |
37 | 6 / 3
38 |
39 |
40 | # In[6]:
41 |
42 |
43 | 7 // 3
44 |
45 |
46 | # ### 2. 实现:编写注释
47 |
48 | # In[7]:
49 |
50 |
51 | # 1 + 1
52 |
53 |
54 | # ### 3. 实现:数据类型
55 |
56 | # In[8]:
57 |
58 |
59 | "A"
60 |
61 |
62 | # In[9]:
63 |
64 |
65 | 'A'
66 |
67 |
68 | # In[10]:
69 |
70 |
71 | # 字符串
72 | type("A")
73 |
74 |
75 | # In[11]:
76 |
77 |
78 | type('A')
79 |
80 |
81 | # In[12]:
82 |
83 |
84 | # 整型
85 | type(1)
86 |
87 |
88 | # In[13]:
89 |
90 |
91 | # 浮点型
92 | type(2.4)
93 |
94 |
95 | # In[14]:
96 |
97 |
98 | # 布尔型
99 | type(True)
100 |
101 |
102 | # In[15]:
103 |
104 |
105 | # 布尔型
106 | type(False)
107 |
108 |
109 | # In[16]:
110 |
111 |
112 | "A" + 1
113 |
114 |
115 | # ### 4. 实现:比较运算符
116 |
117 | # In[17]:
118 |
119 |
120 | 1 > 0.89
121 |
122 |
123 | # In[18]:
124 |
125 |
126 | 3 >= 2
127 |
128 |
129 | # In[19]:
130 |
131 |
132 | 3 < 2
133 |
134 |
135 | # In[20]:
136 |
137 |
138 | 3 <= 2
139 |
140 |
141 | # In[21]:
142 |
143 |
144 | 3 == 2
145 |
146 |
147 | # In[22]:
148 |
149 |
150 | 3 != 2
151 |
152 |
153 | # ### 5. 实现:变量
154 |
155 | # In[23]:
156 |
157 |
158 | x = 2
159 | x + 1
160 |
161 |
162 | # ### 6. 实现:函数
163 |
164 | # In[24]:
165 |
166 |
167 | (x + 2) * 4
168 |
169 |
170 | # In[25]:
171 |
172 |
173 | def sample_function(data):
174 | return((data + 2) * 4)
175 |
176 |
177 | # In[26]:
178 |
179 |
180 | sample_function(x)
181 |
182 |
183 | # In[27]:
184 |
185 |
186 | sample_function(3)
187 |
188 |
189 | # In[28]:
190 |
191 |
192 | sample_function(x) + sample_function(3)
193 |
194 |
195 | # ### 7. 实现:类与实例
196 |
197 | # In[29]:
198 |
199 |
200 | class Sample_Class:
201 | def __init__(self, data1, data2):
202 | self.data1 = data1
203 | self.data2 = data2
204 |
205 | def method2(self):
206 | return(self.data1 + self.data2)
207 |
208 |
209 | # In[30]:
210 |
211 |
212 | sample_instance = Sample_Class(data1 = 2, data2 = 3)
213 |
214 |
215 | # In[31]:
216 |
217 |
218 | sample_instance.data1
219 |
220 |
221 | # In[32]:
222 |
223 |
224 | sample_instance.method2()
225 |
226 |
227 | # ### 8. 实现:基于 if 语句的程序分支
228 |
229 | # In[3]:
230 |
231 |
232 | data = 1
233 | if(data < 2):
234 | print("数字小于 2")
235 | else:
236 | print("数字不小于 2")
237 |
238 |
239 | # In[2]:
240 |
241 |
242 | data = 3
243 | if(data < 2):
244 | print("数字小于 2")
245 | else:
246 | print("数字不小于 2")
247 |
248 |
249 | # ### 9. 实现:基于 for 语句的循环
250 |
251 | # In[35]:
252 |
253 |
254 | range(0, 3)
255 |
256 |
257 | # In[36]:
258 |
259 |
260 | for i in range(0, 3):
261 | print(i)
262 |
263 |
264 | # In[37]:
265 |
266 |
267 | for i in range(0, 3):
268 | print("hello")
269 |
270 |
271 | # In[ ]:
272 |
273 |
274 |
275 |
276 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/2-4-认识numpy与pandas.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 4 节 认识 numpy 与 pandas
5 | #
6 | # ## 第 2 章 Python 与 Jupyter Notebook 基础|用 Python 动手学统计学
7 |
8 | # ### 1. 导入用于分析的功能
9 |
10 | # In[1]:
11 |
12 |
13 | import numpy as np
14 | import pandas as pd
15 |
16 |
17 | # ### 3. 实现:列表
18 |
19 | # In[2]:
20 |
21 |
22 | sample_list = [1,2,3,4,5]
23 | sample_list
24 |
25 |
26 | # ### 5. 实现:数组
27 |
28 | # In[3]:
29 |
30 |
31 | sample_array = np.array([1,2,3,4,5])
32 | sample_array
33 |
34 |
35 | # In[4]:
36 |
37 |
38 | sample_array + 2
39 |
40 |
41 | # In[5]:
42 |
43 |
44 | sample_array * 2
45 |
46 |
47 | # In[6]:
48 |
49 |
50 | np.array([1 ,2, "A"])
51 |
52 |
53 | # In[7]:
54 |
55 |
56 | # 矩阵
57 | sample_array_2 = np.array(
58 | [[1,2,3,4,5],
59 | [6,7,8,9,10]])
60 | sample_array_2
61 |
62 |
63 | # In[8]:
64 |
65 |
66 | # 获取行数与列数
67 | sample_array_2.shape
68 |
69 |
70 | # ### 6. 实现:生成等差数列的方法
71 |
72 | # In[9]:
73 |
74 |
75 | np.arange(start = 1, stop = 6, step = 1)
76 |
77 |
78 | # In[10]:
79 |
80 |
81 | np.arange(start = 0.1, stop = 0.8, step = 0.2)
82 |
83 |
84 | # In[11]:
85 |
86 |
87 | np.arange(0.1, 0.8, 0.2)
88 |
89 |
90 | # ### 7. 实现:多种生成数组的方式
91 |
92 | # In[12]:
93 |
94 |
95 | # 元素相同的数组
96 | np.tile("A", 5)
97 |
98 |
99 | # In[13]:
100 |
101 |
102 | # 存放 4 个 0
103 | np.tile(0, 4)
104 |
105 |
106 | # In[14]:
107 |
108 |
109 | # 只有 0 的数组
110 | np.zeros(4)
111 |
112 |
113 | # In[15]:
114 |
115 |
116 | # 二维数组
117 | np.zeros([2,3])
118 |
119 |
120 | # In[16]:
121 |
122 |
123 | # 只有 1 的数组
124 | np.ones(3)
125 |
126 |
127 | # ### 8. 实现:切片
128 |
129 | # In[17]:
130 |
131 |
132 | # 一维数组
133 | d1_array = np.array([1,2,3,4,5])
134 | d1_array
135 |
136 |
137 | # In[18]:
138 |
139 |
140 | # 取得第一个元素
141 | d1_array[0]
142 |
143 |
144 | # In[19]:
145 |
146 |
147 | # 获取索引中的 1 号和 2 号元素
148 | d1_array[1:3]
149 |
150 |
151 | # In[20]:
152 |
153 |
154 | # 二维数组
155 | d2_array = np.array(
156 | [[1,2,3,4,5],
157 | [6,7,8,9,10]])
158 | d2_array
159 |
160 |
161 | # In[21]:
162 |
163 |
164 | d2_array[0, 3]
165 |
166 |
167 | # In[22]:
168 |
169 |
170 | d2_array[1, 2:4]
171 |
172 |
173 | # ### 9. 实现:数据帧
174 |
175 | # In[23]:
176 |
177 |
178 | sample_df = pd.DataFrame({
179 | 'col1' : sample_array,
180 | 'col2' : sample_array * 2,
181 | 'col3' : ["A", "B", "C", "D", "E"]
182 | })
183 | print(sample_df)
184 |
185 |
186 | # In[24]:
187 |
188 |
189 | sample_df
190 |
191 |
192 | # ### 10. 实现:读取文件中的数据
193 |
194 | # In[25]:
195 |
196 |
197 | file_data = pd.read_csv("2-4-1-sample_data.csv")
198 | print(file_data)
199 |
200 |
201 | # In[26]:
202 |
203 |
204 | type(file_data)
205 |
206 |
207 | # ### 11. 实现:连接数据帧
208 |
209 | # In[27]:
210 |
211 |
212 | df_1 = pd.DataFrame({
213 | 'col1' : np.array([1, 2, 3]),
214 | 'col2' : np.array(["A", "B", "C"])
215 | })
216 | df_2 = pd.DataFrame({
217 | 'col1' : np.array([4, 5, 6]),
218 | 'col2' : np.array(["D", "E", "F"])
219 | })
220 |
221 |
222 | # In[28]:
223 |
224 |
225 | # 在纵向上连接
226 | print(pd.concat([df_1, df_2]))
227 |
228 |
229 | # In[29]:
230 |
231 |
232 | # 在横向上连接
233 | print(pd.concat([df_1, df_2], axis = 1))
234 |
235 |
236 | # ### 12. 实现:数据帧的列操作
237 |
238 | # In[30]:
239 |
240 |
241 | # 对象数据
242 | print(sample_df)
243 |
244 |
245 | # In[31]:
246 |
247 |
248 | # 按列名获取数据
249 | print(sample_df.col2)
250 |
251 |
252 | # In[32]:
253 |
254 |
255 | print(sample_df["col2"])
256 |
257 |
258 | # In[33]:
259 |
260 |
261 | print(sample_df[["col2", "col3"]])
262 |
263 |
264 | # In[34]:
265 |
266 |
267 | # 删除指定的列
268 | print(sample_df.drop("col1", axis = 1))
269 |
270 |
271 | # ### 13. 实现:数据帧的行操作
272 |
273 | # In[35]:
274 |
275 |
276 | # 获取前 3 行
277 | print(sample_df.head(n = 3))
278 |
279 |
280 | # In[36]:
281 |
282 |
283 | # 获取第 1 行
284 | print(sample_df.query('index == 0'))
285 |
286 |
287 | # In[37]:
288 |
289 |
290 | # 通过多种条件获取数据
291 | print(sample_df.query('col3 == "A"'))
292 |
293 |
294 | # In[38]:
295 |
296 |
297 | # 按 OR 条件获取数据
298 | print(sample_df.query('col3 == "A" | col3 == "D"'))
299 |
300 |
301 | # In[39]:
302 |
303 |
304 | # 按 AND 条件获取数据
305 | print(sample_df.query('col3 == "A" & col1 == 3'))
306 |
307 |
308 | # In[40]:
309 |
310 |
311 | # 同时指定行和列的条件
312 | print(sample_df.query('col3 == "A"')[["col2", "col3"]])
313 |
314 |
315 | # ### 14. 补充:序列
316 |
317 | # In[41]:
318 |
319 |
320 | type(sample_df)
321 |
322 |
323 | # In[42]:
324 |
325 |
326 | type(sample_df.col1)
327 |
328 |
329 | # In[43]:
330 |
331 |
332 | # 转换为数组
333 | type(np.array(sample_df.col1))
334 |
335 |
336 | # In[44]:
337 |
338 |
339 | type(sample_df.col1.values)
340 |
341 |
342 | # ### 15. 补充:函数文档
343 |
344 | # In[45]:
345 |
346 |
347 | help(sample_df.query)
348 |
349 |
350 | # In[ ]:
351 |
352 |
353 |
354 |
355 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-1-使用Python进行描述统计单变量.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | #
5 | # # 第 3 章 使用 Pyhton 进行数据分析
6 | #
7 | # ## 第 1 节 使用 Python 进行描述统计:单变量
8 | #
9 | #
10 |
11 | # ### 1. 统计分析与 scipy
12 |
13 | # In[1]:
14 |
15 |
16 | # 用于数值计算的库
17 | import numpy as np
18 | import scipy as sp
19 |
20 | # 设置浮点数打印精度
21 | get_ipython().run_line_magic('precision', '3')
22 |
23 |
24 | # ### 2. 单变量数据的操作
25 |
26 | # In[2]:
27 |
28 |
29 | fish_data = np.array([2,3,3,4,4,4,4,5,5,6])
30 | fish_data
31 |
32 |
33 | # ### 3. 实现:总和与样本容量
34 |
35 | # In[3]:
36 |
37 |
38 | # 总和
39 | sp.sum(fish_data)
40 |
41 |
42 | # In[4]:
43 |
44 |
45 | # 参考
46 | np.sum(fish_data)
47 |
48 |
49 | # In[5]:
50 |
51 |
52 | # 参考
53 | fish_data.sum()
54 |
55 |
56 | # In[6]:
57 |
58 |
59 | # 参考
60 | sum(fish_data)
61 |
62 |
63 | # In[7]:
64 |
65 |
66 | # 样本容量
67 | len(fish_data)
68 |
69 |
70 | # ### 4. 实现:均值(期望值)
71 |
72 | # In[8]:
73 |
74 |
75 | # 计算均值
76 | N = len(fish_data)
77 | sum_value = sp.sum(fish_data)
78 | mu = sum_value / N
79 | mu
80 |
81 |
82 | # In[9]:
83 |
84 |
85 | # 计算均值的函数
86 | sp.mean(fish_data)
87 |
88 |
89 | # ### 5. 实现:样本方差
90 |
91 | # In[10]:
92 |
93 |
94 | # 样本方差
95 | sigma_2_sample = sp.sum((fish_data - mu) ** 2) / N
96 | sigma_2_sample
97 |
98 |
99 | # In[11]:
100 |
101 |
102 | fish_data
103 |
104 |
105 | # In[12]:
106 |
107 |
108 | fish_data - mu
109 |
110 |
111 | # In[13]:
112 |
113 |
114 | (fish_data - mu) ** 2
115 |
116 |
117 | # In[14]:
118 |
119 |
120 | sp.sum((fish_data - mu) ** 2)
121 |
122 |
123 | # In[15]:
124 |
125 |
126 | # 计算样本方差的函数
127 | sp.var(fish_data, ddof = 0)
128 |
129 |
130 | # ### 6. 实现:无偏方差
131 |
132 | # In[16]:
133 |
134 |
135 | # 无偏方差
136 | sigma_2 = sp.sum((fish_data - mu) ** 2) / (N - 1)
137 | sigma_2
138 |
139 |
140 | # In[17]:
141 |
142 |
143 | # 无偏方差
144 | sp.var(fish_data, ddof = 1)
145 |
146 |
147 | # ### 7. 实现:标准差
148 |
149 | # In[18]:
150 |
151 |
152 | # 标准差
153 | sigma = sp.sqrt(sigma_2)
154 | sigma
155 |
156 |
157 | # In[19]:
158 |
159 |
160 | # 计算标准差的函数
161 | sp.std(fish_data, ddof = 1)
162 |
163 |
164 | # ### 8. 补充:标准化
165 |
166 | # In[20]:
167 |
168 |
169 | fish_data - mu
170 |
171 |
172 | # In[21]:
173 |
174 |
175 | sp.mean(fish_data - mu)
176 |
177 |
178 | # In[22]:
179 |
180 |
181 | fish_data / sigma
182 |
183 |
184 | # In[23]:
185 |
186 |
187 | sp.std(fish_data / sigma, ddof = 1)
188 |
189 |
190 | # In[24]:
191 |
192 |
193 | standard = (fish_data - mu) / sigma
194 | standard
195 |
196 |
197 | # In[25]:
198 |
199 |
200 | sp.mean(standard)
201 |
202 |
203 | # In[26]:
204 |
205 |
206 | sp.std(standard, ddof = 1)
207 |
208 |
209 | # ### 9. 补充:其他统计量
210 |
211 | # In[27]:
212 |
213 |
214 | # 最大值
215 | sp.amax(fish_data)
216 |
217 |
218 | # In[28]:
219 |
220 |
221 | # 最小值
222 | sp.amin(fish_data)
223 |
224 |
225 | # In[29]:
226 |
227 |
228 | # 中位数
229 | sp.median(fish_data)
230 |
231 |
232 | # In[30]:
233 |
234 |
235 | fish_data_2 = np.array([2,3,3,4,4,4,4,5,5,100])
236 |
237 |
238 | # In[31]:
239 |
240 |
241 | sp.mean(fish_data_2)
242 |
243 |
244 | # In[32]:
245 |
246 |
247 | sp.median(fish_data_2)
248 |
249 |
250 | # ### 10. 实现:scipy.stats 与四分位数
251 |
252 | # In[33]:
253 |
254 |
255 | from scipy import stats
256 |
257 |
258 | # In[34]:
259 |
260 |
261 | fish_data_3 = np.array([1,2,3,4,5,6,7,8,9])
262 | stats.scoreatpercentile(fish_data_3, 25)
263 |
264 |
265 | # In[35]:
266 |
267 |
268 | stats.scoreatpercentile(fish_data_3, 75)
269 |
270 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-1-使用Python进行描述统计:单变量.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 1 节 使用 Python 进行描述统计:单变量
5 | #
6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
7 |
8 | # ### 1. 统计分析与 scipy
9 |
10 | # In[1]:
11 |
12 |
13 | # 用于数值计算的库
14 | import numpy as np
15 | import scipy as sp
16 |
17 | # 设置浮点数打印精度
18 | get_ipython().run_line_magic('precision', '3')
19 |
20 |
21 | # ### 2. 单变量数据的操作
22 |
23 | # In[2]:
24 |
25 |
26 | fish_data = np.array([2,3,3,4,4,4,4,5,5,6])
27 | fish_data
28 |
29 |
30 | # ### 3. 实现:总和与样本容量
31 |
32 | # In[3]:
33 |
34 |
35 | # 总和
36 | sp.sum(fish_data)
37 |
38 |
39 | # In[4]:
40 |
41 |
42 | # 参考
43 | np.sum(fish_data)
44 |
45 |
46 | # In[5]:
47 |
48 |
49 | # 参考
50 | fish_data.sum()
51 |
52 |
53 | # In[6]:
54 |
55 |
56 | # 参考
57 | sum(fish_data)
58 |
59 |
60 | # In[7]:
61 |
62 |
63 | # 样本容量
64 | len(fish_data)
65 |
66 |
67 | # ### 4. 实现:均值(期望值)
68 |
69 | # In[8]:
70 |
71 |
72 | # 计算均值
73 | N = len(fish_data)
74 | sum_value = sp.sum(fish_data)
75 | mu = sum_value / N
76 | mu
77 |
78 |
79 | # In[9]:
80 |
81 |
82 | # 计算均值的函数
83 | sp.mean(fish_data)
84 |
85 |
86 | # ### 5. 实现:样本方差
87 |
88 | # In[10]:
89 |
90 |
91 | # 样本方差
92 | sigma_2_sample = sp.sum((fish_data - mu) ** 2) / N
93 | sigma_2_sample
94 |
95 |
96 | # In[11]:
97 |
98 |
99 | fish_data
100 |
101 |
102 | # In[12]:
103 |
104 |
105 | fish_data - mu
106 |
107 |
108 | # In[13]:
109 |
110 |
111 | (fish_data - mu) ** 2
112 |
113 |
114 | # In[14]:
115 |
116 |
117 | sp.sum((fish_data - mu) ** 2)
118 |
119 |
120 | # In[15]:
121 |
122 |
123 | # 计算样本方差的函数
124 | sp.var(fish_data, ddof = 0)
125 |
126 |
127 | # ### 6. 实现:无偏方差
128 |
129 | # In[16]:
130 |
131 |
132 | # 无偏方差
133 | sigma_2 = sp.sum((fish_data - mu) ** 2) / (N - 1)
134 | sigma_2
135 |
136 |
137 | # In[17]:
138 |
139 |
140 | # 无偏方差
141 | sp.var(fish_data, ddof = 1)
142 |
143 |
144 | # ### 7. 实现:标准差
145 |
146 | # In[18]:
147 |
148 |
149 | # 标准差
150 | sigma = sp.sqrt(sigma_2)
151 | sigma
152 |
153 |
154 | # In[19]:
155 |
156 |
157 | # 计算标准差的函数
158 | sp.std(fish_data, ddof = 1)
159 |
160 |
161 | # ### 8. 补充:标准化
162 |
163 | # In[20]:
164 |
165 |
166 | fish_data - mu
167 |
168 |
169 | # In[21]:
170 |
171 |
172 | sp.mean(fish_data - mu)
173 |
174 |
175 | # In[22]:
176 |
177 |
178 | fish_data / sigma
179 |
180 |
181 | # In[23]:
182 |
183 |
184 | sp.std(fish_data / sigma, ddof = 1)
185 |
186 |
187 | # In[24]:
188 |
189 |
190 | standard = (fish_data - mu) / sigma
191 | standard
192 |
193 |
194 | # In[25]:
195 |
196 |
197 | sp.mean(standard)
198 |
199 |
200 | # In[26]:
201 |
202 |
203 | sp.std(standard, ddof = 1)
204 |
205 |
206 | # ### 9. 补充:其他统计量
207 |
208 | # In[27]:
209 |
210 |
211 | # 最大值
212 | sp.amax(fish_data)
213 |
214 |
215 | # In[28]:
216 |
217 |
218 | # 最小值
219 | sp.amin(fish_data)
220 |
221 |
222 | # In[29]:
223 |
224 |
225 | # 中位数
226 | sp.median(fish_data)
227 |
228 |
229 | # In[30]:
230 |
231 |
232 | fish_data_2 = np.array([2,3,3,4,4,4,4,5,5,100])
233 |
234 |
235 | # In[31]:
236 |
237 |
238 | sp.mean(fish_data_2)
239 |
240 |
241 | # In[32]:
242 |
243 |
244 | sp.median(fish_data_2)
245 |
246 |
247 | # ### 10. 实现:scipy.stats 与四分位数
248 |
249 | # In[33]:
250 |
251 |
252 | from scipy import stats
253 |
254 |
255 | # In[34]:
256 |
257 |
258 | fish_data_3 = np.array([1,2,3,4,5,6,7,8,9])
259 | stats.scoreatpercentile(fish_data_3, 25)
260 |
261 |
262 | # In[35]:
263 |
264 |
265 | stats.scoreatpercentile(fish_data_3, 75)
266 |
267 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-10-列联表检验.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 第 10 节 列联表检验\n",
8 | "\n",
9 | "## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "### 5. 实现:计算 p 值"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "collapsed": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "# 用于数值计算的库\n",
28 | "import numpy as np\n",
29 | "import pandas as pd\n",
30 | "import scipy as sp\n",
31 | "from scipy import stats\n",
32 | "\n",
33 | "# 用于绘图的库\n",
34 | "from matplotlib import pyplot as plt\n",
35 | "import seaborn as sns\n",
36 | "sns.set()\n",
37 | "\n",
38 | "# 设置浮点数打印精度\n",
39 | "%precision 3\n",
40 | "# 在 Jupyter Notebook 里显示图形\n",
41 | "%matplotlib inline"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "0.009821437357809604"
53 | ]
54 | },
55 | "execution_count": 2,
56 | "metadata": {},
57 | "output_type": "execute_result"
58 | }
59 | ],
60 | "source": [
61 | "# 计算 p 值\n",
62 | "1 - sp.stats.chi2.cdf(x = 6.667, df = 1)"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {},
68 | "source": [
69 | "### 6. 实现:列联表检验"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 3,
75 | "metadata": {
76 | "scrolled": true
77 | },
78 | "outputs": [
79 | {
80 | "name": "stdout",
81 | "output_type": "stream",
82 | "text": [
83 | " color click freq\n",
84 | "0 blue click 20\n",
85 | "1 blue not 230\n",
86 | "2 red click 10\n",
87 | "3 red not 40\n"
88 | ]
89 | }
90 | ],
91 | "source": [
92 | "# 读入数据\n",
93 | "click_data = pd.read_csv(\"3-10-1-click_data.csv\")\n",
94 | "print(click_data)"
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 4,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "name": "stdout",
104 | "output_type": "stream",
105 | "text": [
106 | "click click not\n",
107 | "color \n",
108 | "blue 20 230\n",
109 | "red 10 40\n"
110 | ]
111 | }
112 | ],
113 | "source": [
114 | "# 转换为列联表\n",
115 | "cross = pd.pivot_table(\n",
116 | " data = click_data,\n",
117 | " values = \"freq\",\n",
118 | " aggfunc = \"sum\",\n",
119 | " index = \"color\",\n",
120 | " columns = \"click\"\n",
121 | ")\n",
122 | "print(cross)"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 5,
128 | "metadata": {},
129 | "outputs": [
130 | {
131 | "data": {
132 | "text/plain": [
133 | "(6.666666666666666,\n",
134 | " 0.009823274507519247,\n",
135 | " 1,\n",
136 | " array([[ 25., 225.],\n",
137 | " [ 5., 45.]]))"
138 | ]
139 | },
140 | "execution_count": 5,
141 | "metadata": {},
142 | "output_type": "execute_result"
143 | }
144 | ],
145 | "source": [
146 | "# 进行检验\n",
147 | "sp.stats.chi2_contingency(cross, correction = False)"
148 | ]
149 | }
150 | ],
151 | "metadata": {
152 | "kernelspec": {
153 | "display_name": "Python 3",
154 | "language": "python",
155 | "name": "python3"
156 | },
157 | "language_info": {
158 | "codemirror_mode": {
159 | "name": "ipython",
160 | "version": 3
161 | },
162 | "file_extension": ".py",
163 | "mimetype": "text/x-python",
164 | "name": "python",
165 | "nbconvert_exporter": "python",
166 | "pygments_lexer": "ipython3",
167 | "version": "3.7.6"
168 | },
169 | "toc": {
170 | "base_numbering": 1,
171 | "nav_menu": {},
172 | "number_sections": false,
173 | "sideBar": true,
174 | "skip_h1_title": false,
175 | "title_cell": "Table of Contents",
176 | "title_sidebar": "Contents",
177 | "toc_cell": false,
178 | "toc_position": {},
179 | "toc_section_display": true,
180 | "toc_window_display": true
181 | }
182 | },
183 | "nbformat": 4,
184 | "nbformat_minor": 2
185 | }
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-10-列联表检验.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 10 节 列联表检验
5 | #
6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
7 |
8 | # ### 5. 实现:计算 p 值
9 |
10 | # In[1]:
11 |
12 |
13 | # 用于数值计算的库
14 | import numpy as np
15 | import pandas as pd
16 | import scipy as sp
17 | from scipy import stats
18 |
19 | # 用于绘图的库
20 | from matplotlib import pyplot as plt
21 | import seaborn as sns
22 | sns.set()
23 |
24 | # 设置浮点数打印精度
25 | get_ipython().run_line_magic('precision', '3')
26 | # 在 Jupyter Notebook 里显示图形
27 | get_ipython().run_line_magic('matplotlib', 'inline')
28 |
29 |
30 | # In[2]:
31 |
32 |
33 | # 计算 p 值
34 | 1 - sp.stats.chi2.cdf(x = 6.667, df = 1)
35 |
36 |
37 | # ### 6. 实现:列联表检验
38 |
39 | # In[3]:
40 |
41 |
42 | # 读入数据
43 | click_data = pd.read_csv("3-10-1-click_data.csv")
44 | print(click_data)
45 |
46 |
47 | # In[4]:
48 |
49 |
50 | # 转换为列联表
51 | cross = pd.pivot_table(
52 | data = click_data,
53 | values = "freq",
54 | aggfunc = "sum",
55 | index = "color",
56 | columns = "click"
57 | )
58 | print(cross)
59 |
60 |
61 | # In[5]:
62 |
63 |
64 | # 进行检验
65 | sp.stats.chi2_contingency(cross, correction = False)
66 |
67 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-2-使用Python进行描述统计多变量.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 2 节 使用 Python 进行描述统计:多变量
5 | #
6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 4. 多变量数据的管理
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import pandas as pd
17 | import scipy as sp
18 |
19 | # 设置浮点数打印精度
20 | get_ipython().run_line_magic('precision', '3')
21 |
22 |
23 | # ### 5. 实现:求各分组的统计量
24 |
25 | # In[2]:
26 |
27 |
28 | fish_multi = pd.read_csv("3-2-1-fish_multi.csv")
29 | print(fish_multi)
30 |
31 |
32 | # In[3]:
33 |
34 |
35 | # 按鱼的种类计算
36 | group = fish_multi.groupby("species")
37 | print(group.mean())
38 |
39 |
40 | # In[4]:
41 |
42 |
43 | print(group.std(ddof = 1))
44 |
45 |
46 | # In[5]:
47 |
48 |
49 | group.describe()
50 |
51 |
52 | # ### 6. 实现:列联表
53 |
54 | # In[6]:
55 |
56 |
57 | shoes = pd.read_csv("3-2-2-shoes.csv")
58 | print(shoes)
59 |
60 |
61 | # In[7]:
62 |
63 |
64 | cross = pd.pivot_table(
65 | data = shoes,
66 | values = "sales",
67 | aggfunc = "sum",
68 | index = "store",
69 | columns = "color"
70 | )
71 | print(cross)
72 |
73 |
74 | # ### 9. 实现:协方差
75 |
76 | # In[8]:
77 |
78 |
79 | cov_data = pd.read_csv("3-2-3-cov.csv")
80 | print(cov_data)
81 |
82 |
83 | # In[9]:
84 |
85 |
86 | # 读取数据的列
87 | x = cov_data["x"]
88 | y = cov_data["y"]
89 |
90 | # 求样本容量
91 | N = len(cov_data)
92 |
93 | # 求各变量均值
94 | mu_x = sp.mean(x)
95 | mu_y = sp.mean(y)
96 |
97 |
98 | # In[10]:
99 |
100 |
101 | # 样本协方差
102 | cov_sample = sum((x - mu_x) * (y - mu_y)) / N
103 | cov_sample
104 |
105 |
106 | # In[11]:
107 |
108 |
109 | # 协方差
110 | cov = sum((x - mu_x) * (y - mu_y)) / (N - 1)
111 | cov
112 |
113 |
114 | # ### 10. 实现:协方差矩阵
115 |
116 | # In[12]:
117 |
118 |
119 | # 样本协方差
120 | sp.cov(x, y, ddof = 0)
121 |
122 |
123 | # In[13]:
124 |
125 |
126 | # 无偏协方差
127 | sp.cov(x, y, ddof = 1)
128 |
129 |
130 | # ### 13. 实现:皮尔逊积矩相关系数
131 |
132 | # In[14]:
133 |
134 |
135 | # 计算两个变量的方差
136 | sigma_2_x = sp.var(x, ddof = 1)
137 | sigma_2_y = sp.var(y, ddof = 1)
138 |
139 | # 计算相关系数
140 | rho = cov / sp.sqrt(sigma_2_x * sigma_2_y)
141 | rho
142 |
143 |
144 | # In[15]:
145 |
146 |
147 | # 计算两个变量的方差
148 | sigma_2_x_sample = sp.var(x, ddof = 0)
149 | sigma_2_y_sample = sp.var(y, ddof = 0)
150 |
151 | # 计算相关系数
152 | cov_sample / sp.sqrt(sigma_2_x_sample * sigma_2_y_sample)
153 |
154 |
155 | # In[16]:
156 |
157 |
158 | # 相关矩阵
159 | sp.corrcoef(x, y)
160 |
161 |
162 | # In[ ]:
163 |
164 |
165 |
166 |
167 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-2-使用Python进行描述统计:多变量.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | #
5 | #
6 | # # 第 2 节 使用 Python 进行描述统计:多变量
7 | #
8 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
9 |
10 | # ### 4. 多变量数据的管理
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import pandas as pd
17 | import scipy as sp
18 |
19 | # 设置浮点数打印精度
20 | get_ipython().run_line_magic('precision', '3')
21 |
22 |
23 | # ### 5. 实现:求各分组的统计量
24 |
25 | # In[2]:
26 |
27 |
28 | fish_multi = pd.read_csv("3-2-1-fish_multi.csv")
29 | print(fish_multi)
30 |
31 |
32 | # In[3]:
33 |
34 |
35 | # 按鱼的种类计算
36 | group = fish_multi.groupby("species")
37 | print(group.mean())
38 |
39 |
40 | # In[4]:
41 |
42 |
43 | print(group.std(ddof = 1))
44 |
45 |
46 | # In[5]:
47 |
48 |
49 | group.describe()
50 |
51 |
52 | # ### 6. 实现:列联表
53 |
54 | # In[6]:
55 |
56 |
57 | shoes = pd.read_csv("3-2-2-shoes.csv")
58 | print(shoes)
59 |
60 |
61 | # In[7]:
62 |
63 |
64 | cross = pd.pivot_table(
65 | data = shoes,
66 | values = "sales",
67 | aggfunc = "sum",
68 | index = "store",
69 | columns = "color"
70 | )
71 | print(cross)
72 |
73 |
74 | # ### 9. 实现:协方差
75 |
76 | # In[8]:
77 |
78 |
79 | cov_data = pd.read_csv("3-2-3-cov.csv")
80 | print(cov_data)
81 |
82 |
83 | # In[9]:
84 |
85 |
86 | # 读取数据的列
87 | x = cov_data["x"]
88 | y = cov_data["y"]
89 |
90 | # 求样本容量
91 | N = len(cov_data)
92 |
93 | # 求各变量均值
94 | mu_x = sp.mean(x)
95 | mu_y = sp.mean(y)
96 |
97 |
98 | # In[10]:
99 |
100 |
101 | # 样本协方差
102 | cov_sample = sum((x - mu_x) * (y - mu_y)) / N
103 | cov_sample
104 |
105 |
106 | # In[11]:
107 |
108 |
109 | # 协方差
110 | cov = sum((x - mu_x) * (y - mu_y)) / (N - 1)
111 | cov
112 |
113 |
114 | # ### 10. 实现:协方差矩阵
115 |
116 | # In[12]:
117 |
118 |
119 | # 样本协方差
120 | sp.cov(x, y, ddof = 0)
121 |
122 |
123 | # In[13]:
124 |
125 |
126 | # 无偏协方差
127 | sp.cov(x, y, ddof = 1)
128 |
129 |
130 | # ### 13. 实现:皮尔逊积矩相关系数
131 |
132 | # In[14]:
133 |
134 |
135 | # 计算两个变量的方差
136 | sigma_2_x = sp.var(x, ddof = 1)
137 | sigma_2_y = sp.var(y, ddof = 1)
138 |
139 | # 计算相关系数
140 | rho = cov / sp.sqrt(sigma_2_x * sigma_2_y)
141 | rho
142 |
143 |
144 | # In[15]:
145 |
146 |
147 | # 计算两个变量的方差
148 | sigma_2_x_sample = sp.var(x, ddof = 0)
149 | sigma_2_y_sample = sp.var(y, ddof = 0)
150 |
151 | # 计算相关系数
152 | cov_sample / sp.sqrt(sigma_2_x_sample * sigma_2_y_sample)
153 |
154 |
155 | # In[16]:
156 |
157 |
158 | # 相关矩阵
159 | sp.corrcoef(x, y)
160 |
161 |
162 | # In[ ]:
163 |
164 |
165 |
166 |
167 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 3 节 基于 matplotlib、seaborn 的数据可视化
5 | #
6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
7 |
8 | # ### 2. 实现:数据可视化的环境准备
9 |
10 | # In[1]:
11 |
12 |
13 | # 用于数值计算的库
14 | import numpy as np
15 | import pandas as pd
16 |
17 | # 设置浮点数打印精度
18 | get_ipython().run_line_magic('precision', '3')
19 |
20 | # 用于绘图的库
21 | from matplotlib import pyplot as plt
22 |
23 | # 在 Jupyter Notebook 里显示图形
24 | get_ipython().run_line_magic('matplotlib', 'inline')
25 |
26 |
27 | # ### 3. 实现:用 pyplot 绘制折线图
28 |
29 | # In[2]:
30 |
31 |
32 | x = np.array([0,1,2,3,4,5,6,7,8,9])
33 | y = np.array([2,3,4,3,5,4,6,7,4,8])
34 |
35 |
36 | # In[3]:
37 |
38 |
39 | plt.plot(x, y, color = 'black')
40 | plt.title("lineplot matplotlib")
41 | plt.xlabel("x")
42 | plt.ylabel("y")
43 |
44 |
45 | # ### 4. 实现:用 seaborn 和 pyplot 绘制折线图
46 |
47 | # In[4]:
48 |
49 |
50 | import seaborn as sns
51 | sns.set()
52 |
53 |
54 | # In[5]:
55 |
56 |
57 | plt.plot(x, y, color = 'black')
58 | plt.title("lineplot seaborn")
59 | plt.xlabel("x")
60 | plt.ylabel("y")
61 |
62 |
63 | # ### 5. 实现:用 seaborn 绘制直方图
64 |
65 | # In[6]:
66 |
67 |
68 | fish_data = np.array([2,3,3,4,4,4,4,5,5,6])
69 | fish_data
70 |
71 |
72 | # In[7]:
73 |
74 |
75 | sns.distplot(fish_data, bins = 5,
76 | color = 'black', kde = False)
77 |
78 |
79 | # ### 6. 实现:通过核密度估计将直方图平滑化
80 |
81 | # In[8]:
82 |
83 |
84 | sns.distplot(fish_data, bins = 1,
85 | color = 'black', kde = False)
86 |
87 |
88 | # In[9]:
89 |
90 |
91 | sns.distplot(fish_data, color = 'black')
92 |
93 |
94 | # ### 7. 实现:两个变量的直方图
95 |
96 | # In[10]:
97 |
98 |
99 | fish_multi = pd.read_csv("3-3-2-fish_multi_2.csv")
100 | print(fish_multi)
101 |
102 |
103 | # In[11]:
104 |
105 |
106 | print(fish_multi.groupby("species").describe())
107 |
108 |
109 | # In[12]:
110 |
111 |
112 | # 按鱼的种类区分数据
113 | length_a = fish_multi.query('species == "A"')["length"]
114 | length_b = fish_multi.query('species == "B"')["length"]
115 |
116 |
117 | # In[13]:
118 |
119 |
120 | # 绘制这两个直方图
121 | sns.distplot(length_a, bins = 5,
122 | color = 'black', kde = False)
123 | sns.distplot(length_b, bins = 5,
124 | color = 'gray', kde = False)
125 |
126 |
127 | # ### 9. 实现:箱形图
128 |
129 | # In[14]:
130 |
131 |
132 | # 箱形图
133 | sns.boxplot(x = "species", y = "length",
134 | data = fish_multi, color = 'gray')
135 |
136 |
137 | # In[15]:
138 |
139 |
140 | fish_multi.groupby("species").describe()
141 |
142 |
143 | # ### 10. 实现:小提琴图
144 |
145 | # In[16]:
146 |
147 |
148 | sns.violinplot(x = "species", y = "length",
149 | data = fish_multi, color = 'gray')
150 |
151 |
152 | # ### 11. 实现:条形图
153 |
154 | # In[17]:
155 |
156 |
157 | sns.barplot(x = "species", y = "length",
158 | data = fish_multi, color = 'gray')
159 |
160 |
161 | # ### 12. 实现:散点图
162 |
163 | # In[18]:
164 |
165 |
166 | cov_data = pd.read_csv("3-2-3-cov.csv")
167 | print(cov_data)
168 |
169 |
170 | # In[19]:
171 |
172 |
173 | sns.jointplot(x = "x", y = "y",
174 | data = cov_data, color = 'black')
175 |
176 |
177 | # ### 13. 实现:散点图矩阵
178 |
179 | # In[20]:
180 |
181 |
182 | # 导入 seaborn 内置的鸢尾花数据
183 | iris = sns.load_dataset("iris")
184 | iris.head(n = 3)
185 |
186 |
187 | # In[21]:
188 |
189 |
190 | # 每种类鸢尾花各个规格的均值
191 | iris.groupby("species").mean()
192 |
193 |
194 | # In[22]:
195 |
196 |
197 | # 散点图矩阵
198 | sns.pairplot(iris, hue="species", palette='gray')
199 |
200 |
201 | # In[ ]:
202 |
203 |
204 |
205 |
206 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_11_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_11_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_13_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_13_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_14_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_14_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_19_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_19_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_21_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_21_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_24_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_24_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_26_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_26_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_29_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_29_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_33_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_33_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_5_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_8_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-3-基于matplotlib-seaborn的数据可视化_8_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-4-用Python模拟抽样.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 4 节 用 Python 模拟抽样
5 | #
6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 1. 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 设置浮点数打印精度
27 | get_ipython().run_line_magic('precision', '3')
28 | # 在 Jupyter Notebook 里显示图形
29 | get_ipython().run_line_magic('matplotlib', 'inline')
30 |
31 |
32 | # ### 3. 在只有 5 条鱼的湖中抽样
33 |
34 | # In[2]:
35 |
36 |
37 | fish_5 = np.array([2,3,4,5,6])
38 | fish_5
39 |
40 |
41 | # In[3]:
42 |
43 |
44 | # 从总体中随机抽样
45 | np.random.choice(fish_5, size = 1, replace = False)
46 |
47 |
48 | # In[4]:
49 |
50 |
51 | # 从总体中随机抽样
52 | np.random.choice(fish_5, size = 3, replace = False)
53 |
54 |
55 | # In[5]:
56 |
57 |
58 | np.random.choice(fish_5, size = 3, replace = False)
59 |
60 |
61 | # In[6]:
62 |
63 |
64 | # 设定随机数种子以得到相同结果
65 | np.random.seed(1)
66 | np.random.choice(fish_5, size = 3, replace = False)
67 |
68 |
69 | # In[7]:
70 |
71 |
72 | np.random.seed(1)
73 | np.random.choice(fish_5, size = 3, replace = False)
74 |
75 |
76 | # In[8]:
77 |
78 |
79 | # 计算样本均值
80 | np.random.seed(1)
81 | sp.mean(
82 | np.random.choice(fish_5, size = 3, replace = False)
83 | )
84 |
85 |
86 | # ### 6. 从鱼较多的湖中抽样
87 |
88 | # In[9]:
89 |
90 |
91 | # 鱼较多的总体
92 | fish_100000 = pd.read_csv(
93 | "3-4-1-fish_length_100000.csv")["length"]
94 | fish_100000.head()
95 |
96 |
97 | # In[10]:
98 |
99 |
100 | len(fish_100000)
101 |
102 |
103 | # In[11]:
104 |
105 |
106 | # 抽样模拟实验
107 | sampling_result = np.random.choice(
108 | fish_100000, size = 10, replace = False)
109 | sampling_result
110 |
111 |
112 | # In[12]:
113 |
114 |
115 | # 样本均值
116 | sp.mean(sampling_result)
117 |
118 |
119 | # ### 7. 总体分布
120 |
121 | # In[13]:
122 |
123 |
124 | sp.mean(fish_100000)
125 |
126 |
127 | # In[14]:
128 |
129 |
130 | sp.std(fish_100000, ddof = 0)
131 |
132 |
133 | # In[15]:
134 |
135 |
136 | sp.var(fish_100000, ddof = 0)
137 |
138 |
139 | # In[16]:
140 |
141 |
142 | sns.distplot(fish_100000, kde = False, color = 'black')
143 |
144 |
145 | # ### 8. 对比总体分布和正态分布的概率密度函数
146 |
147 | # In[17]:
148 |
149 |
150 | x = np.arange(start = 1, stop = 7.1, step = 0.1)
151 | x
152 |
153 |
154 | # In[18]:
155 |
156 |
157 | stats.norm.pdf(x = x, loc = 4, scale = 0.8)
158 |
159 |
160 | # In[19]:
161 |
162 |
163 | plt.plot(x,
164 | stats.norm.pdf(x = x, loc = 4, scale = 0.8),
165 | color = 'black')
166 |
167 |
168 | # In[20]:
169 |
170 |
171 | # 把正态分布的概率密度和总体的直方图重合
172 | sns.distplot(fish_100000, kde = False,
173 | norm_hist = True, color = 'black')
174 | plt.plot(x,
175 | stats.norm.pdf(x = x, loc = 4, scale = 0.8),
176 | color = 'black')
177 |
178 |
179 | # ### 9. 抽样过程的抽象描述
180 |
181 | # In[21]:
182 |
183 |
184 | sampling_norm = stats.norm.rvs(
185 | loc = 4, scale = 0.8, size = 10)
186 | sampling_norm
187 |
188 |
189 | # In[22]:
190 |
191 |
192 | # 样本均值
193 | sp.mean(sampling_norm)
194 |
195 |
196 | # In[ ]:
197 |
198 |
199 |
200 |
201 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-4-用Python模拟抽样_20_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-4-用Python模拟抽样_20_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-4-用Python模拟抽样_24_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-4-用Python模拟抽样_24_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-4-用Python模拟抽样_25_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-4-用Python模拟抽样_25_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_11_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_11_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_16_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_22_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_22_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_27_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_27_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_30_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_30_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_41_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_41_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-5-样本统计量的性质_43_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-5-样本统计量的性质_43_3.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-6-正态分布及其应用.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 6 节 正态分布及其应用
5 | #
6 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 1. 导入函数库
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 设置浮点数打印精度
27 | get_ipython().run_line_magic('precision', '3')
28 | # 在 Jupyter Notebook 里显示图形
29 | get_ipython().run_line_magic('matplotlib', 'inline')
30 |
31 |
32 | # ### 2. 实现:概率密度
33 |
34 | # In[2]:
35 |
36 |
37 | # 圆周率
38 | sp.pi
39 |
40 |
41 | # In[3]:
42 |
43 |
44 | # 指数函数
45 | sp.exp(1)
46 |
47 |
48 | # In[4]:
49 |
50 |
51 | # 均值为 4 标准差为 0.8 的正态分布在随机变量为 3 时的概率密度
52 | x = 3
53 | mu = 4
54 | sigma = 0.8
55 |
56 | 1 / (sp.sqrt(2 * sp.pi * sigma**2)) * sp.exp(- ((x - mu)**2) / (2 * sigma**2))
57 |
58 |
59 | # In[5]:
60 |
61 |
62 | stats.norm.pdf(loc = 4, scale = 0.8, x = 3)
63 |
64 |
65 | # In[6]:
66 |
67 |
68 | norm_dist = stats.norm(loc = 4, scale = 0.8)
69 | norm_dist.pdf(x = 3)
70 |
71 |
72 | # In[7]:
73 |
74 |
75 | x_plot = np.arange(start = 1, stop = 7.1, step = 0.1)
76 | plt.plot(
77 | x_plot,
78 | stats.norm.pdf(x = x_plot, loc = 4, scale = 0.8),
79 | color = 'black'
80 | )
81 |
82 |
83 | # ### 3. 样本小于等于某值的比例
84 |
85 | # In[8]:
86 |
87 |
88 | np.random.seed(1)
89 | simulated_sample = stats.norm.rvs(
90 | loc = 4, scale = 0.8, size = 100000)
91 | simulated_sample
92 |
93 |
94 | # In[9]:
95 |
96 |
97 | sp.sum(simulated_sample <= 3)
98 |
99 |
100 | # In[10]:
101 |
102 |
103 | sp.sum(simulated_sample <= 3) / len(simulated_sample)
104 |
105 |
106 | # ### 5. 实现:累积分布函数
107 |
108 | # In[11]:
109 |
110 |
111 | stats.norm.cdf(loc = 4, scale = 0.8, x = 3)
112 |
113 |
114 | # In[12]:
115 |
116 |
117 | stats.norm.cdf(loc = 4, scale = 0.8, x = 4)
118 |
119 |
120 | # ### 7. 实现:百分位数
121 |
122 | # In[13]:
123 |
124 |
125 | stats.norm.ppf(loc = 4, scale = 0.8, q = 0.025)
126 |
127 |
128 | # In[14]:
129 |
130 |
131 | left = stats.norm.cdf(loc = 4, scale = 0.8, x = 3)
132 | stats.norm.ppf(loc = 4, scale = 0.8, q = left)
133 |
134 |
135 | # In[15]:
136 |
137 |
138 | stats.norm.ppf(loc = 4, scale = 0.8, q = 0.5)
139 |
140 |
141 | # ### 10. t 值的样本分布
142 |
143 | # In[16]:
144 |
145 |
146 | # 随机数种子
147 | np.random.seed(1)
148 | # 存放 t 值的空间
149 | t_value_array = np.zeros(10000)
150 | # 实例化一个正态分布
151 | norm_dist = stats.norm(loc = 4, scale = 0.8)
152 | # 开始实验
153 | for i in range(0, 10000):
154 | sample = norm_dist.rvs(size = 10)
155 | sample_mean = sp.mean(sample)
156 | sample_std = sp.std(sample, ddof = 1)
157 | sample_se = sample_std / sp.sqrt(len(sample))
158 | t_value_array[i] = (sample_mean - 4) / sample_se
159 |
160 |
161 | # In[17]:
162 |
163 |
164 | # t 值的直方图
165 | sns.distplot(t_value_array, color = 'black')
166 |
167 | # 标准正态分布的概率祺
168 | x = np.arange(start = -8, stop = 8.1, step = 0.1)
169 | plt.plot(x, stats.norm.pdf(x = x),
170 | color = 'black', linestyle = 'dotted')
171 |
172 |
173 | # ### 12. 实现:t 分布
174 |
175 | # In[18]:
176 |
177 |
178 | plt.plot(x, stats.norm.pdf(x = x),
179 | color = 'black', linestyle = 'dotted')
180 | plt.plot(x, stats.t.pdf(x = x, df = 9),
181 | color = 'black')
182 |
183 |
184 | # In[19]:
185 |
186 |
187 | sns.distplot(t_value_array,
188 | color = 'black', norm_hist = True)
189 | plt.plot(x, stats.t.pdf(x = x, df = 9),
190 | color = 'black', linestyle = 'dotted')
191 |
192 |
193 | # In[ ]:
194 |
195 |
196 |
197 |
198 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-6-正态分布及其应用_23_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_23_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-6-正态分布及其应用_25_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_25_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-6-正态分布及其应用_26_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_26_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-6-正态分布及其应用_9_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/3-6-正态分布及其应用_9_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-7-参数估计.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 7 节 参数估计
5 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
6 | #
7 | #
8 |
9 | # ### 2. 环境准备
10 |
11 | # In[1]:
12 |
13 |
14 | # 用于数值计算的库
15 | import numpy as np
16 | import pandas as pd
17 | import scipy as sp
18 | from scipy import stats
19 |
20 | # 用于绘图的库
21 | from matplotlib import pyplot as plt
22 | import seaborn as sns
23 | sns.set()
24 |
25 | # 设置浮点数打印精度
26 | get_ipython().run_line_magic('precision', '3')
27 | # 在 Jupyter Notebook 里显示图形
28 | get_ipython().run_line_magic('matplotlib', 'inline')
29 |
30 |
31 | # In[2]:
32 |
33 |
34 | # 读入数据
35 | fish = pd.read_csv("3-7-1-fish_length.csv")["length"]
36 | fish
37 |
38 |
39 | # ### 4. 实现:点估计
40 |
41 | # In[3]:
42 |
43 |
44 | # 总体均值的点估计
45 | mu = sp.mean(fish)
46 | mu
47 |
48 |
49 | # In[4]:
50 |
51 |
52 | # 总体方差的点估计
53 | sigma_2 = sp.var(fish, ddof = 1)
54 | sigma_2
55 |
56 |
57 | # ### 9. 实现:区间估计
58 |
59 | # In[5]:
60 |
61 |
62 | # 自由度
63 | df = len(fish) - 1
64 | df
65 |
66 |
67 | # In[6]:
68 |
69 |
70 | # 标准误差
71 | se = sigma / sp.sqrt(len(fish))
72 | se
73 |
74 |
75 | # In[8]:
76 |
77 |
78 | # 区间估计
79 | interval = stats.t.interval(
80 | alpha = 0.95, df = df, loc = mu, scale = se)
81 | interval
82 |
83 |
84 | # ### 10. 补充:置信区间的求解细节
85 |
86 | # In[9]:
87 |
88 |
89 | # 97.5% 分位数
90 | t_975 = stats.t.ppf(q = 0.975, df = df)
91 | t_975
92 |
93 |
94 | # In[10]:
95 |
96 |
97 | # 下置信界限
98 | lower = mu - t_975 * se
99 | lower
100 |
101 |
102 | # In[11]:
103 |
104 |
105 | # 上置信界限
106 | upper = mu + t_975 * se
107 | upper
108 |
109 |
110 | # ### 11. 决定置信区间大小的因素
111 |
112 | # In[12]:
113 |
114 |
115 | # 样本方差越大, 置信区间越大
116 | se2 = (sigma*10) / sp.sqrt(len(fish))
117 | stats.t.interval(
118 | alpha = 0.95, df = df, loc = mu, scale = se2)
119 |
120 |
121 | # In[13]:
122 |
123 |
124 | # 样本容量越大, 置信区间越小
125 | df2 = (len(fish)*10) - 1
126 | se3 = sigma / sp.sqrt(len(fish)*10)
127 | stats.t.interval(
128 | alpha = 0.95, df = df2, loc = mu, scale = se3)
129 |
130 |
131 | # In[14]:
132 |
133 |
134 | # 99% 置信区间
135 | stats.t.interval(
136 | alpha = 0.99, df = df, loc = mu, scale = se)
137 |
138 |
139 | # ### 12. 区间估计结果的解读
140 |
141 | # In[19]:
142 |
143 |
144 | # 如果置信区间包含总体均值 (4) 就取 True
145 | be_included_array = np.zeros(20000, dtype = "bool")
146 | be_included_array
147 |
148 |
149 | # In[20]:
150 |
151 |
152 | # 执行 20,000 次求 95% 置信区间的操作
153 | # 如果置信区间包含总体均值 (4) 就取 True
154 | np.random.seed(1)
155 | norm_dist = stats.norm(loc = 4, scale = 0.8)
156 | for i in range(0, 20000):
157 | sample = norm_dist.rvs(size = 10)
158 | df = len(sample) - 1
159 | mu = sp.mean(sample)
160 | std = sp.std(sample, ddof = 1)
161 | se = std / sp.sqrt(len(sample))
162 | interval = stats.t.interval(0.95, df, mu, se)
163 | if(interval[0] <= 4 and interval[1] >= 4):
164 | be_included_array[i] = True
165 |
166 |
167 | # In[21]:
168 |
169 |
170 | sum(be_included_array) / len(be_included_array)
171 |
172 |
173 | # In[ ]:
174 |
175 |
176 |
177 |
178 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-8-假设检验.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 8 节 假设检验
5 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
6 | #
7 | #
8 |
9 | # ### 13. t 检验的实现:环境准备
10 |
11 | # In[1]:
12 |
13 |
14 | # 用于数值计算的库
15 | import numpy as np
16 | import pandas as pd
17 | import scipy as sp
18 | from scipy import stats
19 |
20 | # 用于绘图的库
21 | from matplotlib import pyplot as plt
22 | import seaborn as sns
23 | sns.set()
24 |
25 | # 设置浮点数打印精度
26 | get_ipython().run_line_magic('precision', '3')
27 | # 在 Jupyter Notebook 里显示图形
28 | get_ipython().run_line_magic('matplotlib', 'inline')
29 |
30 |
31 | # In[2]:
32 |
33 |
34 | # 读入数据
35 | junk_food = pd.read_csv(
36 | "3-8-1-junk-food-weight.csv")["weight"]
37 | junk_food.head()
38 |
39 |
40 | # ### 14. t 检验的实现:计算 t 值
41 |
42 | # In[3]:
43 |
44 |
45 | # 样本均值
46 | mu = sp.mean(junk_food)
47 | mu
48 |
49 |
50 | # In[4]:
51 |
52 |
53 | # 自由度
54 | df = len(junk_food) - 1
55 | df
56 |
57 |
58 | # In[5]:
59 |
60 |
61 | # 标准误差
62 | sigma = sp.std(junk_food, ddof = 1)
63 | se = sigma / sp.sqrt(len(junk_food))
64 | se
65 |
66 |
67 | # In[6]:
68 |
69 |
70 | # t 值
71 | t_value = (mu - 50) / se
72 | t_value
73 |
74 |
75 | # ### 15. t 检验的实现:计算 p 值
76 |
77 | # In[7]:
78 |
79 |
80 | # p 值
81 | alpha = stats.t.cdf(t_value, df = df)
82 | (1 - alpha) * 2
83 |
84 |
85 | # In[8]:
86 |
87 |
88 | # t 检验
89 | stats.ttest_1samp(junk_food, 50)
90 |
91 |
92 | # ### 16. 通过模拟实验计算 p 值
93 |
94 | # In[9]:
95 |
96 |
97 | # 样本的相关信息 (一部分)
98 | size = len(junk_food)
99 | sigma = sp.std(junk_food, ddof = 1)
100 |
101 |
102 | # In[10]:
103 |
104 |
105 | # 存放 t 值的窗口
106 | t_value_array = np.zeros(50000)
107 |
108 |
109 | # In[11]:
110 |
111 |
112 | # 总体均值为 50, 以接受零假设为前提进行 50,000 次抽样并计算 t 值的实验
113 | np.random.seed(1)
114 | norm_dist = stats.norm(loc = 50, scale = sigma)
115 | for i in range(0, 50000):
116 | sample = norm_dist.rvs(size = size)
117 | sample_mean = sp.mean(sample)
118 | sample_std = sp.std(sample, ddof = 1)
119 | sample_se = sample_std / sp.sqrt(size)
120 | t_value_array[i] = (sample_mean - 50) / sample_se
121 |
122 |
123 | # In[12]:
124 |
125 |
126 | (sum(t_value_array > t_value) / 50000) * 2
127 |
128 |
129 | # In[ ]:
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/3-9-均值差的检验.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 9 节 均值差的检验
5 | # ## 第 3 章 使用 Pyhton 进行数据分析|用 Python 动手学统计学
6 | #
7 | #
8 |
9 | # ### 3. 实现:实验准备
10 |
11 | # In[1]:
12 |
13 |
14 | # 用于数值计算的库
15 | import numpy as np
16 | import pandas as pd
17 | import scipy as sp
18 | from scipy import stats
19 |
20 | # 用于绘图的库
21 | from matplotlib import pyplot as plt
22 | import seaborn as sns
23 | sns.set()
24 |
25 | # 设置浮点数打印精度
26 | get_ipython().run_line_magic('precision', '3')
27 | # 在 Jupyter Notebook 里显示图形
28 | get_ipython().run_line_magic('matplotlib', 'inline')
29 |
30 |
31 | # In[2]:
32 |
33 |
34 | # 读入数据
35 | paired_test_data = pd.read_csv(
36 | "3-9-1-paired-t-test.csv")
37 | print(paired_test_data)
38 |
39 |
40 | # ### 4. 实现:配对样本 t 检验
41 |
42 | # In[3]:
43 |
44 |
45 | # 服药前后的样本均值
46 | before = paired_test_data.query(
47 | 'medicine == "before"')["body_temperature"]
48 | after = paired_test_data.query(
49 | 'medicine == "after"')["body_temperature"]
50 | # 转为数组类型
51 | before = np.array(before)
52 | after = np.array(after)
53 | # 计算差值
54 | diff = after - before
55 | diff
56 |
57 |
58 | # In[4]:
59 |
60 |
61 | # 检验均值是否与 0 存在差异
62 | stats.ttest_1samp(diff, 0)
63 |
64 |
65 | # In[5]:
66 |
67 |
68 | # 配对样本 t 检验
69 | stats.ttest_rel(after, before)
70 |
71 |
72 | # ### 6. 实现:独立样本 t 检验
73 |
74 | # In[6]:
75 |
76 |
77 | # 均值
78 | mean_bef = sp.mean(before)
79 | mean_aft = sp.mean(after)
80 |
81 | # 方差
82 | sigma_bef = sp.var(before, ddof = 1)
83 | sigma_aft = sp.var(after, ddof = 1)
84 |
85 | # 样本容量
86 | m = len(before)
87 | n = len(after)
88 |
89 | # t 值
90 | t_value = (mean_aft - mean_bef) / sp.sqrt((sigma_bef/m + sigma_aft/n))
91 | t_value
92 |
93 |
94 | # In[7]:
95 |
96 |
97 | stats.ttest_ind(after, before, equal_var = False)
98 |
99 |
100 | # In[ ]:
101 |
102 |
103 |
104 |
105 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | #
5 | # # 第 5 章 正态线性模型
6 | #
7 | # ## 第 1 节 含有单个连续型解释变量的模型(一元回归)
8 |
9 | # ### 1. 环境准备
10 |
11 | # In[1]:
12 |
13 |
14 | # 用于数值计算的库
15 | import numpy as np
16 | import pandas as pd
17 | import scipy as sp
18 | from scipy import stats
19 |
20 | # 用于绘图的库
21 | from matplotlib import pyplot as plt
22 | import seaborn as sns
23 | sns.set()
24 |
25 | # 用于估计统计模型的库 (部分版本会报出警告信息)
26 | import statsmodels.formula.api as smf
27 | import statsmodels.api as sm
28 |
29 | # 设置浮点数打印精度
30 | get_ipython().run_line_magic('precision', '3')
31 | # 在 Jupyter Notebook 里显示图形
32 | get_ipython().run_line_magic('matplotlib', 'inline')
33 |
34 |
35 | # ### 2. 实现:读入数据并绘制其图形
36 |
37 | # In[2]:
38 |
39 |
40 | # 读入数据
41 | beer = pd.read_csv("5-1-1-beer.csv")
42 | print(beer.head())
43 |
44 |
45 | # In[3]:
46 |
47 |
48 | # 绘制图像
49 | sns.jointplot(x = "temperature", y = "beer",
50 | data = beer, color = 'black')
51 |
52 |
53 | # ### 4. 实现:使用 statsmodels 实现模型化
54 |
55 | # In[4]:
56 |
57 |
58 | # 建模
59 | lm_model = smf.ols(formula = "beer ~ temperature",
60 | data = beer).fit()
61 |
62 |
63 | # ### 5. 实现:打印估计结果并检验系数
64 |
65 | # In[5]:
66 |
67 |
68 | # 打印估计的结果
69 | lm_model.summary()
70 |
71 |
72 | # ### 7. 实现:使用 AIC 进行模型选择
73 |
74 | # In[6]:
75 |
76 |
77 | # 空模型
78 | null_model = smf.ols("beer ~ 1", data = beer).fit()
79 |
80 |
81 | # In[7]:
82 |
83 |
84 | # 空模型的 AIC
85 | null_model.aic
86 |
87 |
88 | # In[8]:
89 |
90 |
91 | # 含有解释变量的模型的 AIC
92 | lm_model.aic
93 |
94 |
95 | # In[9]:
96 |
97 |
98 | # 对数似然度
99 | lm_model.llf
100 |
101 |
102 | # In[10]:
103 |
104 |
105 | # 解释变量的个数
106 | lm_model.df_model
107 |
108 |
109 | # In[11]:
110 |
111 |
112 | # AIC
113 | -2*(lm_model.llf - (lm_model.df_model + 1))
114 |
115 |
116 | # ### 9. 实现:用 seaborn 绘制回归直线
117 |
118 | # In[12]:
119 |
120 |
121 | sns.lmplot(x = "temperature", y = "beer", data = beer,
122 | scatter_kws = {"color": "black"},
123 | line_kws = {"color": "black"})
124 |
125 |
126 | # ### 10. 实现:使用模型进行预测
127 |
128 | # In[13]:
129 |
130 |
131 | # 拟合值
132 | lm_model.predict()
133 |
134 |
135 | # In[14]:
136 |
137 |
138 | # 预测
139 | lm_model.predict(pd.DataFrame({"temperature":[0]}))
140 |
141 |
142 | # In[15]:
143 |
144 |
145 | # 气温为 0 度时的预测值等于截距
146 | lm_model.params
147 |
148 |
149 | # In[16]:
150 |
151 |
152 | # 预测
153 | lm_model.predict(pd.DataFrame({"temperature":[20]}))
154 |
155 |
156 | # In[17]:
157 |
158 |
159 | # 不使用 predict 函数进行预测
160 | beta0 = lm_model.params[0]
161 | beta1 = lm_model.params[1]
162 | temperature = 20
163 |
164 | beta0 + beta1 * temperature
165 |
166 |
167 | # ### 11. 实现:获取残差
168 |
169 | # In[18]:
170 |
171 |
172 | # 获得残差
173 | resid = lm_model.resid
174 | resid.head(3)
175 |
176 |
177 | # In[19]:
178 |
179 |
180 | # 计算拟合值
181 | y_hat = beta0 + beta1 * beer.temperature
182 | y_hat.head(3)
183 |
184 |
185 | # In[20]:
186 |
187 |
188 | # 获得拟合值
189 | lm_model.fittedvalues.head(3)
190 |
191 |
192 | # In[21]:
193 |
194 |
195 | # 手动计算残差
196 | (beer.beer - y_hat).head(3)
197 |
198 |
199 | # ### 13. 实现:决定系数
200 |
201 | # In[22]:
202 |
203 |
204 | # 决定系数
205 | mu = sp.mean(beer.beer)
206 | y = beer.beer
207 | yhat = lm_model.predict()
208 |
209 | sp.sum((yhat - mu)**2) / sp.sum((y - mu)**2)
210 |
211 |
212 | # In[23]:
213 |
214 |
215 | lm_model.rsquared
216 |
217 |
218 | # In[24]:
219 |
220 |
221 | sp.sum((yhat - mu)**2) + sum(resid**2)
222 |
223 |
224 | # In[25]:
225 |
226 |
227 | sp.sum((y - mu)**2)
228 |
229 |
230 | # In[26]:
231 |
232 |
233 | 1 - sp.sum(resid**2) / sp.sum((y - mu)**2)
234 |
235 |
236 | # ### 15. 实现:修正决定系数
237 |
238 | # In[27]:
239 |
240 |
241 | n = len(beer.beer)
242 | s = 1
243 | 1 - ((sp.sum(resid**2) / (n - s - 1)) /
244 | (sp.sum((y - mu)**2) / (n - 1)))
245 |
246 |
247 | # In[28]:
248 |
249 |
250 | lm_model.rsquared_adj
251 |
252 |
253 | # ### 16. 实现:残差的直方图和散点图
254 |
255 | # In[29]:
256 |
257 |
258 | # 残差的直方图
259 | sns.distplot(resid, color = 'black')
260 |
261 |
262 | # In[30]:
263 |
264 |
265 | # 残差的散点图
266 | sns.jointplot(lm_model.fittedvalues, resid,
267 | joint_kws={"color": "black"},
268 | marginal_kws={"color": "black"})
269 |
270 |
271 | # ### 18. 实现:分位图
272 |
273 | # In[31]:
274 |
275 |
276 | # 分位图
277 | fig = sm.qqplot(resid, line = "s")
278 |
279 |
280 | # In[32]:
281 |
282 |
283 | # 递增排列
284 | resid_sort = resid.sort_values()
285 | resid_sort.head()
286 |
287 |
288 | # In[33]:
289 |
290 |
291 | # 最小的数据所在位置
292 | 1 / 31
293 |
294 |
295 | # In[34]:
296 |
297 |
298 | # 按样本容量变换为 0 到 1 的范围, 得到理论累积概率
299 | #
300 | nobs = len(resid_sort)
301 | cdf = np.arange(1, nobs + 1) / (nobs + 1)
302 | cdf
303 |
304 |
305 | # In[35]:
306 |
307 |
308 | # 累积概率对应的百分位数
309 | ppf = stats.norm.ppf(cdf)
310 | ppf
311 |
312 |
313 | # In[36]:
314 |
315 |
316 | # 参考: 横轴为理论分位数, 纵轴为已排序的实际数据, 绘出的散点图就是分位图
317 | fig = sm.qqplot(resid, line = "s")
318 |
319 | plt.plot(stats.norm.ppf(cdf), resid_sort, "o", color = "black")
320 |
321 |
322 | # ### 19. 根据 summary 函数的输出分析残差
323 |
324 | # In[37]:
325 |
326 |
327 | # 打印估计的结果
328 | lm_model.summary()
329 |
330 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归_18_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_18_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归_40_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_40_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归_41_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_41_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归_43_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_43_0.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归_48_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_48_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-1-一元回归_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-1-一元回归_5_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-2-方差分析.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 2 节 方差分析
5 | #
6 | # ## 第 5 章 正态线性模型|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 8. 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 用于估计统计模型的库 (部分版本会报出警告信息)
27 | import statsmodels.formula.api as smf
28 | import statsmodels.api as sm
29 |
30 | # 设置浮点数打印精度
31 | get_ipython().run_line_magic('precision', '3')
32 | # 在 Jupyter Notebook 里显示图形
33 | get_ipython().run_line_magic('matplotlib', 'inline')
34 |
35 |
36 | # ### 9. 生成数据并可视化
37 |
38 | # In[2]:
39 |
40 |
41 | # 定义一组示例数据
42 | weather = [
43 | "cloudy","cloudy",
44 | "rainy","rainy",
45 | "sunny","sunny"
46 | ]
47 | beer = [6,8,2,4,10,12]
48 |
49 | # 转换成数据帧
50 | weather_beer = pd.DataFrame({
51 | "beer" : beer,
52 | "weather": weather
53 | })
54 | print(weather_beer)
55 |
56 |
57 | # In[3]:
58 |
59 |
60 | # 绘出箱形图
61 | sns.boxplot(x = "weather",y = "beer",
62 | data = weather_beer, color='gray')
63 |
64 |
65 | # In[4]:
66 |
67 |
68 | # 每种天气下销售额的均值
69 | print(weather_beer.groupby("weather").mean())
70 |
71 |
72 | # ### 10. 实现:方差分析①:计算组间平方和与组内平方和
73 |
74 | # In[5]:
75 |
76 |
77 | # 天气的影响 (对应 weather 变量)
78 | effect = [7,7,3,3,11,11]
79 |
80 |
81 | # In[6]:
82 |
83 |
84 | # 组间偏差平方和
85 | mu_effect = sp.mean(effect)
86 | squares_model = sp.sum((effect - mu_effect) ** 2 )
87 | squares_model
88 |
89 |
90 | # In[7]:
91 |
92 |
93 | # 无法用天气来解释的部分, 即误差
94 | resid = weather_beer.beer - effect
95 | resid
96 |
97 |
98 | # In[8]:
99 |
100 |
101 | # 组内偏差平方和
102 | squares_resid = sp.sum(resid ** 2)
103 | squares_resid
104 |
105 |
106 | # ### 11. 实现:方差分析②:计算组间方差与组内方差
107 |
108 | # In[9]:
109 |
110 |
111 | df_model = 2 # 组间差异的自由度
112 | df_resid = 3 # 组内差异的自由度
113 |
114 |
115 | # In[10]:
116 |
117 |
118 | # 组间均方 (方差)
119 | variance_model = squares_model / df_model
120 | variance_model
121 |
122 |
123 | # In[11]:
124 |
125 |
126 | # 组内均方 (方差)
127 | variance_resid = squares_resid / df_resid
128 | variance_resid
129 |
130 |
131 | # ### 12. 实现:方差分析③:计算 p 值
132 |
133 | # In[12]:
134 |
135 |
136 | # F 比
137 | f_ratio = variance_model / variance_resid
138 | f_ratio
139 |
140 |
141 | # In[13]:
142 |
143 |
144 | # p 值
145 | 1 - sp.stats.f.cdf(x=f_ratio,dfn=df_model,dfd=df_resid)
146 |
147 |
148 | # ### 15. 实现:statsmodels 中的方差分析
149 |
150 | # In[14]:
151 |
152 |
153 | # 建立正态线性模型
154 | anova_model = smf.ols("beer ~ weather",
155 | data = weather_beer).fit()
156 |
157 |
158 | # In[15]:
159 |
160 |
161 | # 方差分析的结果
162 | print(sm.stats.anova_lm(anova_model, typ=2))
163 |
164 |
165 | # ### 17. 模型系数的含义
166 |
167 | # In[16]:
168 |
169 |
170 | anova_model.params
171 |
172 |
173 | # ### 18. 使用模型分离效应和误差
174 |
175 | # In[17]:
176 |
177 |
178 | # 拟合值
179 | fitted = anova_model.fittedvalues
180 | fitted
181 |
182 |
183 | # In[18]:
184 |
185 |
186 | # 残差
187 | anova_model.resid
188 |
189 |
190 | # ### 19. 回归模型中的方差分析
191 |
192 | # In[19]:
193 |
194 |
195 | # 读取数据
196 | beer = pd.read_csv("5-1-1-beer.csv")
197 |
198 | # 估计模型
199 | lm_model = smf.ols(formula = "beer ~ temperature",
200 | data = beer).fit()
201 |
202 |
203 | # In[20]:
204 |
205 |
206 | df_lm_model = 1 # 模型自由度
207 | df_lm_resid = 28 # 残差自由度
208 |
209 |
210 | # In[21]:
211 |
212 |
213 | # 拟合值
214 | lm_effect = lm_model.fittedvalues
215 | # 残差
216 | lm_resid = lm_model.resid
217 | # 气温的影响力度
218 | mu = sp.mean(lm_effect)
219 | squares_lm_model = sp.sum((lm_effect - mu) ** 2)
220 | variance_lm_model = squares_lm_model / df_lm_model
221 | # 残差的方差
222 | squares_lm_resid = sp.sum((lm_resid) ** 2)
223 | variance_lm_resid = squares_lm_resid / df_lm_resid
224 | # F 比
225 | f_value_lm = variance_lm_model / variance_lm_resid
226 | f_value_lm
227 |
228 |
229 | # In[22]:
230 |
231 |
232 | # 参考: p 值 (结果在截断后约等于 0)
233 | 1 - sp.stats.f.cdf(
234 | x=f_value_lm,dfn=df_lm_model,dfd=df_lm_resid)
235 |
236 |
237 | # In[23]:
238 |
239 |
240 | # 方差分析表
241 | print(sm.stats.anova_lm(lm_model, typ=2))
242 |
243 |
244 | # In[24]:
245 |
246 |
247 | # 模型的 sumamry
248 | lm_model.summary()
249 |
250 |
251 | # In[ ]:
252 |
253 |
254 |
255 |
256 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-2-方差分析_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-2-方差分析_5_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-3-含有多个解释变量的模型.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 3 节 含有多个解释变量的模型
5 | #
6 | # ## 第 5 章 正态线性模型|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 1. 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 用于估计统计模型的库 (部分版本会报出警告信息)
27 | import statsmodels.formula.api as smf
28 | import statsmodels.api as sm
29 |
30 | # 设置浮点数打印精度
31 | get_ipython().run_line_magic('precision', '3')
32 | # 在Jupyter Notebook里显示图形
33 | get_ipython().run_line_magic('matplotlib', 'inline')
34 |
35 |
36 | # In[2]:
37 |
38 |
39 | # 读入数据
40 | sales = pd.read_csv("5-3-1-lm-model.csv")
41 | print(sales.head(3))
42 |
43 |
44 | # ### 2. 实现:数据可视化
45 |
46 | # In[3]:
47 |
48 |
49 | # 数据可视化
50 | sns.pairplot(data = sales, hue = "weather",
51 | palette="gray")
52 |
53 |
54 | # ### 3. 错误的分析:建立只有 1 个变量的模型
55 |
56 | # In[4]:
57 |
58 |
59 | # 只使用价格这 1 种解释变量进行建模
60 | lm_dame = smf.ols("sales ~ price", sales).fit()
61 | lm_dame.params
62 |
63 |
64 | # In[5]:
65 |
66 |
67 | # 价格的系数与 0 存在显著性差异
68 | print(sm.stats.anova_lm(lm_dame, typ=2))
69 |
70 |
71 | # In[6]:
72 |
73 |
74 | # 价格与销售额的关系
75 | sns.lmplot(x = "price", y = "sales", data = sales,
76 | scatter_kws = {"color": "black"},
77 | line_kws = {"color": "black"})
78 |
79 |
80 | # ### 4. 分析解释变量之间的关系
81 |
82 | # In[7]:
83 |
84 |
85 | # 计算各天气下的均值
86 | print(sales.groupby("weather").mean())
87 |
88 |
89 | # In[8]:
90 |
91 |
92 | # 不同天气中销售额—价格回归直线
93 | sns.lmplot(x = "price", y = "sales", data = sales,
94 | hue="weather", palette='gray')
95 |
96 |
97 | # ### 5. 实现:多解释变量的模型
98 |
99 | # In[9]:
100 |
101 |
102 | # 估计多解释变量的模型
103 | lm_sales = smf.ols(
104 | "sales ~ weather + humidity + temperature + price",
105 | data=sales).fit()
106 | # 估计的结果
107 | lm_sales.params
108 |
109 |
110 | # ### 6. 错误的分析:使用普通方差分析
111 |
112 | # In[10]:
113 |
114 |
115 | # 普通方差分析
116 | print(sm.stats.anova_lm(lm_sales, typ=1).round(3))
117 |
118 |
119 | # In[11]:
120 |
121 |
122 | # 改变解释变量的顺序
123 | lm_sales_2 = smf.ols(
124 | "sales ~ weather + temperature + humidity + price",
125 | data=sales).fit()
126 | # 检验结果
127 | print(sm.stats.anova_lm(lm_sales_2, typ=1).round(3))
128 |
129 |
130 | # ### 7. 实现:回归系数的 t 检验
131 |
132 | # In[12]:
133 |
134 |
135 | # 模型 1 的回归系数的 t 检验
136 | lm_sales.summary().tables[1]
137 |
138 |
139 | # In[13]:
140 |
141 |
142 | # 模型 2 的回归系数的 t 检验
143 | lm_sales_2.summary().tables[1]
144 |
145 |
146 | # ### 9. 模型选择与方差分析
147 |
148 | # In[14]:
149 |
150 |
151 | # 空模型的残差平方和
152 | mod_null = smf.ols("sales ~ 1", sales).fit()
153 | resid_sq_null = sp.sum(mod_null.resid ** 2)
154 | resid_sq_null
155 |
156 |
157 | # In[15]:
158 |
159 |
160 | # 天气模型的残差平方和
161 | mod_1 = smf.ols("sales ~ weather", sales).fit()
162 | resid_sq_1 = sp.sum(mod_1.resid ** 2)
163 | resid_sq_1
164 |
165 |
166 | # In[16]:
167 |
168 |
169 | # 残差平方和的差
170 | resid_sq_null - resid_sq_1
171 |
172 |
173 | # In[17]:
174 |
175 |
176 | print(sm.stats.anova_lm(mod_1).round(3))
177 |
178 |
179 | # In[18]:
180 |
181 |
182 | # "天气 + 湿度" 模型的残差平方和
183 | mod_2 = smf.ols(
184 | "sales ~ weather + humidity", sales).fit()
185 | resid_sq_2 = sp.sum(mod_2.resid ** 2)
186 | resid_sq_2
187 |
188 |
189 | # In[19]:
190 |
191 |
192 | # 残差平方和的差
193 | resid_sq_1 - resid_sq_2
194 |
195 |
196 | # In[20]:
197 |
198 |
199 | print(sm.stats.anova_lm(mod_2).round(3))
200 |
201 |
202 | # In[21]:
203 |
204 |
205 | # "天气 + 气温" 模型的残差平方和
206 | mod_2_2 = smf.ols(
207 | "sales ~ weather + temperature", sales).fit()
208 | resid_sq_2_2 = sp.sum(mod_2_2.resid ** 2)
209 | resid_sq_2_2
210 |
211 |
212 | # In[22]:
213 |
214 |
215 | # "天气 + 气温 + 湿度" 模型的残差平方和
216 | mod_3_2 = smf.ols(
217 | "sales ~ weather + temperature + humidity",
218 | sales).fit()
219 | resid_sq_3_2 = sp.sum(mod_3_2.resid ** 2)
220 | resid_sq_3_2
221 |
222 |
223 | # In[23]:
224 |
225 |
226 | resid_sq_2_2 - resid_sq_3_2
227 |
228 |
229 | # In[24]:
230 |
231 |
232 | print(sm.stats.anova_lm(mod_3_2).round(3))
233 |
234 |
235 | # ### 11. 实现:Type II ANOVA
236 |
237 | # In[25]:
238 |
239 |
240 | # 包含所有解释变量的模型的残差平方和
241 | mod_full = smf.ols(
242 | "sales ~ weather + humidity + temperature + price",
243 | sales).fit()
244 | resid_sq_full = sp.sum(mod_full.resid ** 2)
245 | resid_sq_full
246 |
247 |
248 | # In[26]:
249 |
250 |
251 | # 不含湿度的模型的残差平方和
252 | mod_non_humi = smf.ols(
253 | "sales ~ weather + temperature + price",
254 | sales).fit()
255 | resid_sq_non_humi = sp.sum(mod_non_humi.resid ** 2)
256 | resid_sq_non_humi
257 |
258 |
259 | # In[27]:
260 |
261 |
262 | # 调整平方和
263 | resid_sq_non_humi - resid_sq_full
264 |
265 |
266 | # In[28]:
267 |
268 |
269 | # Type II ANOVA
270 | print(sm.stats.anova_lm(mod_full, typ=2).round(3))
271 |
272 |
273 | # In[29]:
274 |
275 |
276 | # 对比这两个模型
277 | mod_full.compare_f_test(mod_non_humi)
278 |
279 |
280 | # ### 13. 实现:变量选择与模型选择
281 |
282 | # In[30]:
283 |
284 |
285 | print(sm.stats.anova_lm(mod_non_humi, typ=2).round(3))
286 |
287 |
288 | # In[31]:
289 |
290 |
291 | mod_non_humi.params
292 |
293 |
294 | # ### 14. 实现:用 AIC 进行变量选择
295 |
296 | # In[32]:
297 |
298 |
299 | print("包含所有变量的模型:", mod_full.aic.round(3))
300 | print("不含湿度的模型 :", mod_non_humi.aic.round(3))
301 |
302 |
303 | # In[ ]:
304 |
305 |
306 |
307 |
308 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-3-含有多个解释变量的模型_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-3-含有多个解释变量的模型_12_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-3-含有多个解释变量的模型_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-3-含有多个解释变量的模型_5_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/5-3-含有多个解释变量的模型_9_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/5-3-含有多个解释变量的模型_9_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-1-各种概率分布.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 6 章 广义线性模型
5 | #
6 | # ## 第 1 节 各种概率分布
7 | #
8 | #
9 |
10 | # ### 8. 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 设置浮点数打印精度
27 | get_ipython().run_line_magic('precision', '3')
28 | # 在 Jupyter Notebook 里显示图形
29 | get_ipython().run_line_magic('matplotlib', 'inline')
30 |
31 |
32 | # ### 9. 实现:二项分布
33 |
34 | # In[2]:
35 |
36 |
37 | # 二项分布的概率质量函数
38 | sp.stats.binom.pmf(k = 1, n = 2, p = 0.5)
39 |
40 |
41 | # In[3]:
42 |
43 |
44 | # 从 N = 10, p = 0.5 的二项分布中生成随机数
45 | np.random.seed(1)
46 | sp.stats.binom.rvs(n = 10, p = 0.2, size = 5)
47 |
48 |
49 | # In[4]:
50 |
51 |
52 | # N = 10, p = 0.2 的二项分布
53 | binomial = sp.stats.binom(n = 10, p = 0.2)
54 |
55 | # 生成随机数
56 | np.random.seed(1)
57 | rvs_binomial = binomial.rvs(size = 10000)
58 |
59 | # 概率质量函数
60 | m = np.arange(0,10,1)
61 | pmf_binomial = binomial.pmf(k = m)
62 |
63 | # 绘制出样本直方图与概率质量函数
64 | sns.distplot(rvs_binomial, bins = m, kde = False,
65 | norm_hist = True, color = 'gray')
66 | plt.plot(m, pmf_binomial, color = 'black')
67 |
68 |
69 | # ### 14. 实现:泊松分布
70 |
71 | # In[5]:
72 |
73 |
74 | # 泊松分布的概率质量函数
75 | sp.stats.poisson.pmf(k = 2, mu = 5)
76 |
77 |
78 | # In[6]:
79 |
80 |
81 | # 从 λ = 2 的泊松分布中生成随机数
82 | np.random.seed(1)
83 | sp.stats.poisson.rvs(mu = 2, size = 5)
84 |
85 |
86 | # In[7]:
87 |
88 |
89 | # λ = 2 的泊松分布
90 | poisson = sp.stats.poisson(mu = 2)
91 |
92 | # 生成随机数
93 | np.random.seed(1)
94 | rvs_poisson = poisson.rvs(size = 10000)
95 |
96 | # 概率质量函数
97 | pmf_poisson = poisson.pmf(k = m)
98 |
99 | # 绘制样本直方图与概率质量函数
100 | sns.distplot(rvs_poisson, bins = m, kde = False,
101 | norm_hist = True, color = 'gray')
102 | plt.plot(m, pmf_poisson, color = 'black')
103 |
104 |
105 | # In[8]:
106 |
107 |
108 | # N 非常大但 p 非常小的二项分布
109 | N = 100000000
110 | p = 0.00000002
111 | binomial_2 = sp.stats.binom(n = N, p = p)
112 |
113 | # 概率质量函数
114 | pmf_binomial_2 = binomial_2.pmf(k = m)
115 |
116 | # 绘制概率质量函数
117 | plt.plot(m, pmf_poisson, color = 'gray')
118 | plt.plot(m, pmf_binomial_2, color = 'black',
119 | linestyle = 'dotted')
120 |
121 |
122 | # In[ ]:
123 |
124 |
125 |
126 |
127 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-1-各种概率分布_10_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-1-各种概率分布_10_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-1-各种概率分布_11_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-1-各种概率分布_11_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-1-各种概率分布_6_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-1-各种概率分布_6_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-3-logistic回归.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 3 节 logistic 回归
5 | # ## 第 6 章 广义线性模型|用 Python 动手学统计学
6 | #
7 | #
8 |
9 | # ### 10. 环境准备
10 |
11 | # In[1]:
12 |
13 |
14 | # 用于数值计算的库
15 | import numpy as np
16 | import pandas as pd
17 | import scipy as sp
18 | from scipy import stats
19 |
20 | # 用于绘图的库
21 | from matplotlib import pyplot as plt
22 | import seaborn as sns
23 | sns.set()
24 |
25 | # 用于估计统计模型的库 (部分版本会报出警告信息)
26 | import statsmodels.formula.api as smf
27 | import statsmodels.api as sm
28 |
29 | # 设置浮点数打印精度
30 | get_ipython().run_line_magic('precision', '3')
31 | # 在 Jupyter Notebook 里显示图形
32 | get_ipython().run_line_magic('matplotlib', 'inline')
33 |
34 |
35 | # ### 11. 实现:读取数据并可视化
36 |
37 | # In[2]:
38 |
39 |
40 | # 读取数据
41 | test_result = pd.read_csv("6-3-1-logistic-regression.csv")
42 | print(test_result.head(3))
43 |
44 |
45 | # In[3]:
46 |
47 |
48 | # 可视化
49 | sns.barplot(x = "hours",y = "result",
50 | data = test_result, palette='gray_r')
51 |
52 |
53 | # In[4]:
54 |
55 |
56 | # 学习时间与合格率的关系
57 | print(test_result.groupby("hours").mean())
58 |
59 |
60 | # ### 12. 实现:logistic 回归
61 |
62 | # In[5]:
63 |
64 |
65 | # 建模
66 | mod_glm = smf.glm(formula = "result ~ hours",
67 | data = test_result,
68 | family=sm.families.Binomial()).fit()
69 |
70 |
71 | # In[6]:
72 |
73 |
74 | # 参考: 指定联系函数
75 | logistic_reg = smf.glm(formula = "result ~ hours",
76 | data = test_result,
77 | family=sm.families.Binomial(link=sm.families.links.logit)).fit()
78 |
79 |
80 | # ### 13. 实现:logistic 回归的结果
81 |
82 | # In[7]:
83 |
84 |
85 | # 打印估计的结果
86 | mod_glm.summary()
87 |
88 |
89 | # ### 14. 实现:模型选择
90 |
91 | # In[8]:
92 |
93 |
94 | # 空模型
95 | mod_glm_null = smf.glm(
96 | "result ~ 1", data = test_result,
97 | family=sm.families.Binomial()).fit()
98 |
99 |
100 | # In[9]:
101 |
102 |
103 | # 对比 AIC
104 | print("空模型 :", mod_glm_null.aic.round(3))
105 | print("学习时间模型:", mod_glm.aic.round(3))
106 |
107 |
108 | # ### 15. 实现:回归曲线
109 |
110 | # In[10]:
111 |
112 |
113 | # 用 lmplot 绘制 logistic 回归曲线
114 | sns.lmplot(x = "hours", y = "result",
115 | data = test_result,
116 | logistic = True,
117 | scatter_kws = {"color": "black"},
118 | line_kws = {"color": "black"},
119 | x_jitter = 0.1, y_jitter = 0.02)
120 |
121 |
122 | # ### 16. 实现:预测成功概率
123 |
124 | # In[11]:
125 |
126 |
127 | # 0~9 上公差为 1 的等差数列
128 | exp_val = pd.DataFrame({
129 | "hours": np.arange(0, 10, 1)
130 | })
131 | # 成功概率的预测值
132 | pred = mod_glm.predict(exp_val)
133 | pred
134 |
135 |
136 | # ### 19. logistic 回归的系数与优势比的关系
137 |
138 | # In[12]:
139 |
140 |
141 | # 学习时间为 1 小时的合格率
142 | exp_val_1 = pd.DataFrame({"hours": [1]})
143 | pred_1 = mod_glm.predict(exp_val_1)
144 |
145 | # 学习时间为 2 小时的合格率
146 | exp_val_2 = pd.DataFrame({"hours": [2]})
147 | pred_2 = mod_glm.predict(exp_val_2)
148 |
149 |
150 | # In[13]:
151 |
152 |
153 | # 优势
154 | odds_1 = pred_1 / (1 - pred_1)
155 | odds_2 = pred_2 / (1 - pred_2)
156 |
157 | # 对数优势比
158 | sp.log(odds_2 / odds_1)
159 |
160 |
161 | # In[14]:
162 |
163 |
164 | # 系数
165 | mod_glm.params["hours"]
166 |
167 |
168 | # In[15]:
169 |
170 |
171 | # 补充: 系数为 e 的指数时,其结果就是优势比
172 | sp.exp(mod_glm.params["hours"])
173 |
174 |
175 | # In[ ]:
176 |
177 |
178 |
179 |
180 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-3-logistic回归_16_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-3-logistic回归_16_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-3-logistic回归_5_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-3-logistic回归_5_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-4-广义线性模型的评估.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 4 节 广义线性模型的评估
5 | #
6 | # ## 第 6 章 广义线性模型|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 1. 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 用于估计统计模型的库 (部分版本会报出警告信息)
27 | import statsmodels.formula.api as smf
28 | import statsmodels.api as sm
29 |
30 | # 设置浮点数打印精度
31 | get_ipython().run_line_magic('precision', '3')
32 | # 在 Jupyter Notebook 里显示图形
33 | get_ipython().run_line_magic('matplotlib', 'inline')
34 |
35 |
36 | # In[2]:
37 |
38 |
39 | # 读取数据
40 | test_result = pd.read_csv("6-3-1-logistic-regression.csv")
41 |
42 | # 模型化
43 | mod_glm = smf.glm("result ~ hours", data = test_result,
44 | family=sm.families.Binomial()).fit()
45 |
46 |
47 | # ### 4. 皮尔逊残差
48 |
49 | # In[3]:
50 |
51 |
52 | # 计算皮尔逊残差
53 |
54 | # 预测的成功概率
55 | pred = mod_glm.predict()
56 | # 响应变量 (合格情况)
57 | y = test_result.result
58 |
59 | # 皮尔逊残差
60 | peason_resid = (y - pred) / sp.sqrt(pred * (1 - pred))
61 | peason_resid.head(3)
62 |
63 |
64 | # In[4]:
65 |
66 |
67 | # 获取皮尔逊残差
68 | mod_glm.resid_pearson.head(3)
69 |
70 |
71 | # In[5]:
72 |
73 |
74 | # 皮尔逊残差的平方和
75 | sp.sum(mod_glm.resid_pearson**2)
76 |
77 |
78 | # In[6]:
79 |
80 |
81 | # 同样出现在 summary 函数的结果中
82 | mod_glm.pearson_chi2
83 |
84 |
85 | # ### 9. 偏差残差
86 |
87 | # In[7]:
88 |
89 |
90 | # 计算偏差残差
91 |
92 | # 预测的成功概率
93 | pred = mod_glm.predict()
94 | # 响应变量 (合格情况)
95 | y = test_result.result
96 |
97 | # 与完美预测了合格情况时的对数似然度的差值
98 | resid_tmp = 0 - sp.log(
99 | sp.stats.binom.pmf(k = y, n = 1, p = pred))
100 | # 偏差残差
101 | deviance_resid = sp.sqrt(
102 | 2 * resid_tmp
103 | ) * np.sign(y - pred)
104 | # 打印结果
105 | deviance_resid.head(3)
106 |
107 |
108 | # In[8]:
109 |
110 |
111 | mod_glm.resid_deviance.head(3)
112 |
113 |
114 | # In[9]:
115 |
116 |
117 | # deviance
118 | sp.sum(mod_glm.resid_deviance ** 2)
119 |
120 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-5-泊松回归.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 5 节 泊松回归
5 | #
6 | # ## 第 6 章 广义线性模型|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 4. 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 | from scipy import stats
20 |
21 | # 用于绘图的库
22 | from matplotlib import pyplot as plt
23 | import seaborn as sns
24 | sns.set()
25 |
26 | # 用于估计统计模型的库 (部分版本会报出警告信息)
27 | import statsmodels.formula.api as smf
28 | import statsmodels.api as sm
29 |
30 | # 设置浮点数打印精度
31 | get_ipython().run_line_magic('precision', '3')
32 | # 在 Jupyter Notebook 里显示图形
33 | get_ipython().run_line_magic('matplotlib', 'inline')
34 |
35 |
36 | # In[2]:
37 |
38 |
39 | # 读取数据
40 | beer = pd.read_csv("6-5-1-poisson-regression.csv")
41 | print(beer.head(3))
42 |
43 |
44 | # ### 5. 实现:泊松回归
45 |
46 | # In[3]:
47 |
48 |
49 | # 建模
50 | mod_pois = smf.glm("beer_number ~ temperature", beer,
51 | family=sm.families.Poisson()).fit()
52 | mod_pois.summary()
53 |
54 |
55 | # ### 6. 实现:模型选择
56 |
57 | # In[4]:
58 |
59 |
60 | # 空模型
61 | mod_pois_null = smf.glm(
62 | "beer_number ~ 1", data = beer,
63 | family=sm.families.Poisson()).fit()
64 |
65 |
66 | # In[5]:
67 |
68 |
69 | # 对比 AIC
70 | print("空模型 :", mod_pois_null.aic.round(3))
71 | print("气温模型:", mod_pois.aic.round(3))
72 |
73 |
74 | # ### 7. 实现:回归曲线
75 |
76 | # In[6]:
77 |
78 |
79 | # 绘制回归曲线
80 |
81 | # 计算预测值
82 | x_plot = np.arange(0, 37)
83 | pred = mod_pois.predict(
84 | pd.DataFrame({"temperature": x_plot}))
85 |
86 | # 不含默认回归直线的 lmplot
87 | sns.lmplot(y="beer_number", x = "temperature",
88 | data = beer, fit_reg = False,
89 | scatter_kws = {"color":"black"})
90 | # 绘出回归曲线
91 | plt.plot(x_plot, pred, color="black")
92 |
93 |
94 | # ### 8. 回归系数的含义
95 |
96 | # In[7]:
97 |
98 |
99 | # 气温为 1 度时销售数量的期望
100 | exp_val_1 = pd.DataFrame({"temperature": [1]})
101 | pred_1 = mod_pois.predict(exp_val_1)
102 |
103 | # 气温为 2 度时销售数量的期望
104 | exp_val_2 = pd.DataFrame({"temperature": [2]})
105 | pred_2 = mod_pois.predict(exp_val_2)
106 |
107 | # 气温每升高 1 度, 销量变为多少倍
108 | pred_2 / pred_1
109 |
110 |
111 | # In[8]:
112 |
113 |
114 | # e 的指数为回归系数
115 | sp.exp(mod_pois.params["temperature"])
116 |
117 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/6-5-泊松回归_10_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/6-5-泊松回归_10_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 7 章 统计学与机器学习
5 | #
6 | # ## 第 3 节 Python 中的 Ridge 回归与 Lasso 回归
7 | #
8 | #
9 | #
10 |
11 | # ### 2. 环境准备
12 |
13 | # In[1]:
14 |
15 |
16 | # 用于数值计算的库
17 | import numpy as np
18 | import pandas as pd
19 | import scipy as sp
20 | from scipy import stats
21 |
22 | # 用于绘图的库
23 | from matplotlib import pyplot as plt
24 | import seaborn as sns
25 | sns.set()
26 |
27 | # 用于估计统计模型的库 (部分版本会报出警告信息)
28 | import statsmodels.formula.api as smf
29 | import statsmodels.api as sm
30 |
31 | # 用于机器学习的库
32 | from sklearn import linear_model
33 |
34 | # 设置浮点数打印精度
35 | get_ipython().run_line_magic('precision', '3')
36 | # 在 Jupyter Notebook 里显示图形
37 | get_ipython().run_line_magic('matplotlib', 'inline')
38 |
39 |
40 | # In[2]:
41 |
42 |
43 | # 读入示例数据
44 | X = pd.read_csv("7-3-1-large-data.csv")
45 | X.head(3)
46 |
47 |
48 | # ### 3. 实现:标准化
49 |
50 | # In[3]:
51 |
52 |
53 | # X_1 的均值
54 | sp.mean(X.X_1)
55 |
56 |
57 | # In[4]:
58 |
59 |
60 | # 所有解释变量的均值
61 | sp.mean(X, axis = 0).head(3)
62 |
63 |
64 | # In[5]:
65 |
66 |
67 | # 标准化
68 | X -= sp.mean(X, axis = 0)
69 | X /= sp.std(X, ddof = 1, axis = 0)
70 |
71 |
72 | # In[6]:
73 |
74 |
75 | # 检验
76 | sp.mean(X, axis = 0).head(3).round(3)
77 |
78 |
79 | # In[7]:
80 |
81 |
82 | # 检验
83 | sp.std(X, ddof = 1, axis = 0).head(3)
84 |
85 |
86 | # ### 4. 定义响应变量
87 |
88 | # In[8]:
89 |
90 |
91 | # 定义响应变量
92 |
93 | # 服从正态分布的噪声
94 | np.random.seed(1)
95 | noise = sp.stats.norm.rvs(loc = 0, scale = 1, size = X.shape[0])
96 |
97 | # 设正确的系数为 5, 定义响应变量
98 | y = X.X_1 * 5 + noise
99 |
100 |
101 | # In[9]:
102 |
103 |
104 | # 把响应变量和解释变量放在一起
105 | large_data = pd.concat([pd.DataFrame({"y":y}), X], axis = 1)
106 | # 绘制散点图
107 | sns.jointplot(y = "y", x = "X_1", data = large_data,
108 | color = 'black')
109 |
110 |
111 | # ### 5. 实现:普通最小二乘法
112 |
113 | # In[10]:
114 |
115 |
116 | lm_statsmodels = sm.OLS(endog = y, exog = X).fit()
117 | lm_statsmodels.params.head(3)
118 |
119 |
120 | # ### 6. 实现:使用 sklearn 实现线性回归
121 |
122 | # In[11]:
123 |
124 |
125 | # 指定模型的结构
126 | lm_sklearn = linear_model.LinearRegression()
127 | # 指定数据来源并估计模型
128 | lm_sklearn.fit(X, y)
129 | # 所估计的参数 (数组型)
130 | lm_sklearn.coef_
131 |
132 |
133 | # ### 7. 实现:Ridge 回归:惩罚项的影响
134 |
135 | # In[12]:
136 |
137 |
138 | # 生成 50 个 α
139 | n_alphas = 50
140 | ridge_alphas = np.logspace(-2, 0.7, n_alphas)
141 |
142 |
143 | # In[13]:
144 |
145 |
146 | # 参考
147 | sp.log10(ridge_alphas)
148 |
149 |
150 | # In[14]:
151 |
152 |
153 | # 对不同的 α 值进行 Ridge 回归
154 |
155 | # 存放已估计的回归系数的列表
156 | ridge_coefs = []
157 | # 使用 for 循环多次估计 Ridge 回归
158 | for a in ridge_alphas:
159 | ridge = linear_model.Ridge(alpha = a, fit_intercept = False)
160 | ridge.fit(X, y)
161 | ridge_coefs.append(ridge.coef_)
162 |
163 |
164 | # In[15]:
165 |
166 |
167 | # 转换为数组
168 | ridge_coefs = np.array(ridge_coefs)
169 | ridge_coefs.shape
170 |
171 |
172 | # In[16]:
173 |
174 |
175 | # 参考
176 | log_alphas = -sp.log10(ridge_alphas)
177 | plt.plot(log_alphas, ridge_coefs[::,0], color = 'black')
178 | plt.plot(log_alphas, ridge_coefs[::,1], color = 'black')
179 |
180 | plt.xlim([min(log_alphas)-0.1, max(log_alphas) + 0.3])
181 | plt.ylim([-8, 10.5])
182 |
183 |
184 | # In[17]:
185 |
186 |
187 | # 横轴为 -log10(α), 纵轴为系数的折线图
188 | # 无需重复 100 次即可自动得到 100 条线
189 |
190 | # 对 α 取对数
191 | log_alphas = -sp.log10(ridge_alphas)
192 | # 绘制曲线, 横轴为 -log10(α), 纵轴为系数
193 | plt.plot(log_alphas, ridge_coefs, color = 'black')
194 | # 标出解释变量 X_1 的系数
195 | plt.text(max(log_alphas) + 0.1, np.array(ridge_coefs)[0,0], "X_1")
196 | # X 轴的范围
197 | plt.xlim([min(log_alphas) - 0.1, max(log_alphas) + 0.3])
198 | # 轴标签
199 | plt.title("Ridge")
200 | plt.xlabel("- log10(alpha)")
201 | plt.ylabel("Coefficients")
202 |
203 |
204 | # ### 8. 实现:Ridge 回归:确定最佳正则化强度
205 |
206 | # In[18]:
207 |
208 |
209 | # 通过交叉验证法求最佳 α
210 | ridge_best = linear_model.RidgeCV(
211 | cv = 10, alphas = ridge_alphas, fit_intercept = False)
212 | ridge_best.fit(X, y)
213 |
214 | # 最佳的 -log10(α)
215 | -sp.log10(ridge_best.alpha_)
216 |
217 |
218 | # In[19]:
219 |
220 |
221 | # 最佳 α
222 | ridge_best.alpha_
223 |
224 |
225 | # In[20]:
226 |
227 |
228 | # 取最佳 α 时的回归系数
229 | ridge_best.coef_
230 |
231 |
232 | # ### 9. 实现:Lasso 回归:惩罚指标的影响
233 |
234 | # In[21]:
235 |
236 |
237 | # 对不同的 α 值进行 Lasso 回归
238 | lasso_alphas, lasso_coefs, _ = linear_model.lasso_path(
239 | X, y, fit_intercept = False)
240 |
241 |
242 | # In[23]:
243 |
244 |
245 | # Lasso 回归的解路径图
246 |
247 | # 对 α 取对数
248 | log_alphas = -sp.log10(lasso_alphas)
249 | # 绘制曲线, 横轴为 -log10(α), 纵轴为系数
250 | plt.plot(log_alphas, lasso_coefs.T, color = 'black')
251 | # 标出解释变量 X_1 的系数
252 | plt.text(max(log_alphas) + 0.1, lasso_coefs[0, -1], "X_1")
253 | # X 轴的范围
254 | plt.xlim([min(log_alphas)-0.1, max(log_alphas) + 0.3])
255 | # 轴标签
256 | plt.title("Lasso")
257 | plt.xlabel("- log10(alpha)")
258 | plt.ylabel("Coefficients")
259 |
260 |
261 | # ### 10. 实现:Lasso 回归:确定最佳正则化强度
262 |
263 | # In[39]:
264 |
265 |
266 | # 通过交叉验证法求最佳的 α
267 | lasso_best = linear_model.LassoCV(
268 | cv = 10, alphas = lasso_alphas, fit_intercept = False)
269 | lasso_best.fit(X, y)
270 |
271 | # 最佳的 -log(α)
272 | -sp.log10(lasso_best.alpha_)
273 |
274 |
275 | # In[40]:
276 |
277 |
278 | # 最佳的 α
279 | lasso_best.alpha_
280 |
281 |
282 | # In[41]:
283 |
284 |
285 | # 取最佳的 α 时的回归系数
286 | lasso_best.coef_
287 |
288 |
289 | # In[ ]:
290 |
291 |
292 |
293 |
294 |
--------------------------------------------------------------------------------
/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_12_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_22_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_22_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_23_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_23_2.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_30_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SocratesClub/statsbook/c5a012a41d83a0b48ce414d1e7b418cddf5ee90a/_build/jupyter_execute/7-3-Python中的Ridge回归与Lasso回归_30_1.png
--------------------------------------------------------------------------------
/_build/jupyter_execute/7-4-线性模型与神经网络.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # # 第 4 节 线性模型与神经网络
5 | #
6 | # ## 第 7 章 统计学与机器学习|用 Python 动手学统计学
7 | #
8 | #
9 |
10 | # ### 环境准备
11 |
12 | # In[1]:
13 |
14 |
15 | # 用于数值计算的库
16 | import numpy as np
17 | import pandas as pd
18 | import scipy as sp
19 |
20 | # 用于估计统计模型的库 (部分版本会有警告信息)
21 | import statsmodels.formula.api as smf
22 | import statsmodels.api as sm
23 |
24 | # 用于多层感知器的库
25 | from sklearn.neural_network import MLPClassifier
26 |
27 | # 导入示例数据
28 | from sklearn.datasets import load_iris
29 |
30 | # 区分训练集与测试集
31 | from sklearn.model_selection import train_test_split
32 |
33 | # 标准化数据
34 | from sklearn.preprocessing import StandardScaler
35 |
36 | # 设置浮点数打印精度
37 | get_ipython().run_line_magic('precision', '3')
38 |
39 |
40 | # ### 读入数据并整形
41 |
42 | # In[2]:
43 |
44 |
45 | # 导入示例数据
46 | iris = load_iris()
47 |
48 |
49 | # In[3]:
50 |
51 |
52 | # 解释变量的名称
53 | iris.feature_names
54 |
55 |
56 | # In[4]:
57 |
58 |
59 | # 响应变量的名称
60 | iris.target_names
61 |
62 |
63 | # In[5]:
64 |
65 |
66 | # 解释变量仅为萼片 (sepal)
67 | X = iris.data[50:150, 0:2]
68 | # 只取2种鸢尾花
69 | y = iris.target[50:150]
70 |
71 | print("解释变量行数与列数:", X.shape)
72 | print("响应变量行数与列数:", y.shape)
73 |
74 |
75 | # In[6]:
76 |
77 |
78 | # 把数据分为训练集与测试集
79 | X_train, X_test, y_train, y_test = train_test_split(
80 | X, y, random_state = 2)
81 |
82 | print("解释变量行数与列数:", X_train.shape)
83 | print("响应变量行数与列数:", y_train.shape)
84 |
85 |
86 | # ### 实现:logistic 回归
87 |
88 | # In[7]:
89 |
90 |
91 | # 打印响应变量
92 | y_train[0:10]
93 |
94 |
95 | # In[8]:
96 |
97 |
98 | # 数据整形
99 | # 解释变量的数据帧
100 | X_train_df = pd.DataFrame(
101 | X_train, columns = ["sepal_len", "sepal_wid"])
102 | # 响应变量的数据帧
103 | y_train_df = pd.DataFrame({"species": y_train - 1})
104 | # 连接数据帧
105 | iris_train_df = pd.concat(
106 | [y_train_df, X_train_df], axis=1)
107 | # 打印结果
108 | print(iris_train_df.head(3))
109 |
110 |
111 | # In[9]:
112 |
113 |
114 | # 模型化
115 | # 长度与宽度模型
116 | logi_mod_full = smf.glm(
117 | "species ~ sepal_len + sepal_wid", data = iris_train_df,
118 | family=sm.families.Binomial()).fit()
119 |
120 | # 长度模型
121 | logi_mod_len = smf.glm(
122 | "species ~ sepal_len", data = iris_train_df,
123 | family=sm.families.Binomial()).fit()
124 |
125 | # 宽度模型
126 | logi_mod_wid = smf.glm(
127 | "species ~ sepal_wid", data = iris_train_df,
128 | family=sm.families.Binomial()).fit()
129 |
130 | # 空模型
131 | logi_mod_null = smf.glm(
132 | "species ~ 1", data = iris_train_df,
133 | family=sm.families.Binomial()).fit()
134 |
135 | # 对比 AIC
136 | print("full", logi_mod_full.aic.round(3))
137 | print("len ", logi_mod_len.aic.round(3))
138 | print("wid ", logi_mod_wid.aic.round(3))
139 | print("null", logi_mod_null.aic.round(3))
140 |
141 |
142 | # In[10]:
143 |
144 |
145 | # 查看估计的系数等指标
146 | logi_mod_len.summary().tables[1]
147 |
148 |
149 | # In[11]:
150 |
151 |
152 | # 预测精度
153 | # 数据整形
154 | X_test_df = pd.DataFrame(
155 | X_test, columns = ["sepal_len", "sepal_wid"])
156 |
157 | # 拟合与预测
158 | logi_fit = logi_mod_len.fittedvalues.round(0)
159 | logi_pred = logi_mod_len.predict(X_test_df).round(0)
160 |
161 | # 正确数
162 | true_train = sp.sum(logi_fit == (y_train - 1))
163 | true_test = sp.sum(logi_pred == (y_test - 1))
164 |
165 | # 命中率
166 | result_train = true_train / len(y_train)
167 | result_test = true_test / len(y_test)
168 |
169 | # 打印结果
170 | print("训练集的命中率:", result_train)
171 | print("测试集的命中率:", result_test)
172 |
173 |
174 | # ### 实现:标准化
175 |
176 | # In[12]:
177 |
178 |
179 | # 准备标准化
180 | scaler = StandardScaler()
181 | scaler.fit(X_train)
182 | # 标准化
183 | X_train_scaled = scaler.transform(X_train)
184 | X_test_scaled = scaler.transform(X_test)
185 |
186 |
187 | # In[13]:
188 |
189 |
190 | sp.std(X_train_scaled, axis=0)
191 |
192 |
193 | # In[14]:
194 |
195 |
196 | sp.std(X_test_scaled, axis=0)
197 |
198 |
199 | # ### 实现:神经网络
200 |
201 | # In[15]:
202 |
203 |
204 | nnet = MLPClassifier(
205 | hidden_layer_sizes = (100,100),
206 | alpha = 0.07,
207 | max_iter = 10000,
208 | random_state = 0)
209 | nnet.fit(X_train_scaled, y_train)
210 |
211 | # 正确数
212 | print("训练集的命中率:", nnet.score(X_train_scaled, y_train))
213 | print("测试集的命中率:", nnet.score(X_test_scaled, y_test))
214 |
215 |
216 | # In[ ]:
217 |
218 |
219 |
220 |
221 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | #######################################################################################
2 | # A default configuration that will be loaded for all jupyter books
3 | # Users are expected to override these values in their own `_config.yml` file.
4 | # This is also the "master list" of all allowed keys and values.
5 |
6 | #######################################################################################
7 | # Book settings
8 | title : Stats Book # The title of the book. Will be placed in the left navbar.
9 | author : The Jupyter Book community # The author of the book
10 | copyright : "2021" # Copyright year to be placed in the footer
11 | logo : "" # A path to the book logo
12 | # Patterns to skip when building the book. Can be glob-style (e.g. "*skip.ipynb")
13 | exclude_patterns : [_build, Thumbs.db, .DS_Store, "**.ipynb_checkpoints"]
14 | # Auto-exclude files not in the toc
15 | only_build_toc_files : false
16 |
17 | #######################################################################################
18 | # Execution settings
19 | execute:
20 | execute_notebooks : auto # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off")
21 | cache : "" # A path to the jupyter cache that will be used to store execution artifacts. Defaults to `_build/.jupyter_cache/`
22 | exclude_patterns : [] # A list of patterns to *skip* in execution (e.g. a notebook that takes a really long time)
23 | timeout : 30 # The maximum time (in seconds) each notebook cell is allowed to run.
24 | run_in_temp : false # If `True`, then a temporary directory will be created and used as the command working directory (cwd),
25 | # otherwise the notebook's parent directory will be the cwd.
26 | allow_errors : false # If `False`, when a code cell raises an error the execution is stopped, otherwise all cells are always run.
27 | stderr_output : show # One of 'show', 'remove', 'remove-warn', 'warn', 'error', 'severe'
28 |
29 | #######################################################################################
30 | # Parse and render settings
31 | parse:
32 | myst_enable_extensions: # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html
33 | # - amsmath
34 | - colon_fence
35 | # - deflist
36 | - dollarmath
37 | # - html_admonition
38 | # - html_image
39 | - linkify
40 | # - replacements
41 | # - smartquotes
42 | - substitution
43 | - tasklist
44 | myst_url_schemes: [mailto, http, https] # URI schemes that will be recognised as external URLs in Markdown links
45 | myst_dmath_double_inline: true # Allow display math ($$) within an inline context
46 |
47 | #######################################################################################
48 | # HTML-specific settings
49 | html:
50 | favicon : "" # A path to a favicon image
51 | use_edit_page_button : false # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in
52 | use_repository_button : false # Whether to add a link to your repository button
53 | use_issues_button : false # Whether to add an "open an issue" button
54 | use_multitoc_numbering : true # Continuous numbering across parts/chapters
55 | extra_navbar : Powered by Jupyter Book # Will be displayed underneath the left navbar.
56 | extra_footer : "" # Will be displayed underneath the footer.
57 | google_analytics_id : "" # A GA id that can be used to track book views.
58 | home_page_in_navbar : true # Whether to include your home page in the left Navigation Bar
59 | baseurl : "" # The base URL where your book will be hosted. Used for creating image previews and social links. e.g.: https://mypage.com/mybook/
60 | comments:
61 | hypothesis : false
62 | utterances : false
63 |
64 | #######################################################################################
65 | # LaTeX-specific settings
66 | latex:
67 | latex_engine : pdflatex # one of 'pdflatex', 'xelatex' (recommended for unicode), 'luatex', 'platex', 'uplatex'
68 | use_jupyterbook_latex : true # use sphinx-jupyterbook-latex for pdf builds as default
69 |
70 | #######################################################################################
71 | # Launch button settings
72 | launch_buttons:
73 | notebook_interface : classic # The interface interactive links will activate ["classic", "jupyterlab"]
74 | binderhub_url : https://mybinder.org # The URL of the BinderHub (e.g., https://mybinder.org)
75 | jupyterhub_url : "" # The URL of the JupyterHub (e.g., https://datahub.berkeley.edu)
76 | thebe : false # Add a thebe button to pages (requires the repository to run on Binder)
77 | colab_url : "" # The URL of Google Colab (https://colab.research.google.com)
78 |
79 | repository:
80 | url : https://github.com/executablebooks/jupyter-book # The URL to your book's repository
81 | path_to_book : "" # A path to your book's folder, relative to the repository root.
82 | branch : master # Which branch of the repository should be used when creating links
83 |
84 | #######################################################################################
85 | # Advanced and power-user settings
86 | sphinx:
87 | extra_extensions : # A list of extra extensions to load by Sphinx (added to those already used by JB).
88 | local_extensions : # A list of local extensions to load by sphinx specified by "name: path" items
89 | config : # key-value pairs to directly over-ride the Sphinx configuration
90 |
--------------------------------------------------------------------------------
/_toc.yml:
--------------------------------------------------------------------------------
1 | format: jb-book
2 | root: README
3 |
4 | chapters:
5 |
6 | - file: 2-2-认识Jupyter-Notebook
7 | sections:
8 | - file: 2-3-Python编程基础
9 | - file: 2-4-认识numpy与pandas
10 |
11 | - file: 3-1-使用Python进行描述统计单变量
12 | sections:
13 | - file: 3-2-使用Python进行描述统计多变量
14 | - file: 3-3-基于matplotlib-seaborn的数据可视化
15 | - file: 3-4-用Python模拟抽样
16 | - file: 3-5-样本统计量的性质
17 | - file: 3-6-正态分布及其应用
18 | - file: 3-7-参数估计
19 | - file: 3-8-假设检验
20 | - file: 3-9-均值差的检验
21 | - file: 3-10-列联表检验
22 |
23 | - file: 5-1-一元回归
24 | sections:
25 | - file: 5-2-方差分析
26 | - file: 5-3-含有多个解释变量的模型
27 |
28 | - file: 6-1-各种概率分布
29 | sections:
30 | - file: 6-3-logistic回归
31 | - file: 6-4-广义线性模型的评估
32 | - file: 6-5-泊松回归
33 |
34 | - file: 7-3-Python中的Ridge回归与Lasso回归
35 | sections:
36 | - file: 7-4-线性模型与神经网络
37 |
--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | # ssh-add ~/.ssh/id_rsa
2 | # git remote set-url origin git@github.com:socratesacademy/statsbook.git
3 | # git pull origin main
4 | # git add .
5 | # git commit -m 'this is a message'
6 | # git push origin main
7 | # open atom master branch
8 | jupyter-book build ../statsbook/
9 | # Publish your book's HTML manually to GitHub pages
10 | # publish the _site folder of the main branch to the gh-pages branch
11 | ghp-import -n -p -f _build/html
12 |
--------------------------------------------------------------------------------