├── Class1 ├── Example │ ├── Ex01 │ │ ├── C01_California Housing Price.ipynb │ │ └── dataset │ │ │ ├── house.csv │ │ │ └── house.txt │ ├── Ex02 │ │ ├── C02_MNIST Number Recognition.ipynb │ │ └── dataset │ │ │ └── 数据集说明.txt │ └── Ex03 │ │ └── C03_Boston Housing Price.ipynb ├── Figures │ ├── fig01_人工智能、机器学习和深度学习关系.png │ ├── fig02_不同学习之间区别.jpg │ ├── fig03_深度学习中的神经网络.jpg │ ├── fig04_数据结构化.jpg │ ├── fig05_深度学习优势.jpg │ ├── fig06_强化学习.jpg │ ├── fig07_回归问题与分类问题.jpg │ ├── fig08_常见的机器学习应用场景.jpg │ ├── fig09_机器学习的基本术语.jpg │ ├── fig10_加州房价数据集中的特征和标签.jpg │ ├── fig11_机器学习项目实战的5个环节.jpg │ ├── fig12_卷积神经网络实现手写数字识别.jpg │ ├── fig13_k折验证方法示意图.jpg │ └── fig14_机器学习实战流程.jpg └── README.md ├── Class2 ├── Example │ ├── C01_NumPy Arrays.ipynb │ ├── C02_Vertorization.ipynb │ └── C03_Boston Housing.ipynb ├── Figures │ ├── fig01_本章知识导图.jpg │ ├── fig02_函数是什么.jpg │ ├── fig03_激活函数.jpg │ ├── fig04_凸函数和非凸函数.jpg │ ├── fig05_张量.jpg │ ├── fig06_向量的点积运算法则.jpg │ ├── fig07_矩阵的点积规则.jpg │ ├── fig08_时间序列数据集的张量结构.jpg │ ├── fig09_图像数据集的张量结构.jpg │ ├── fig10_Python的广播.jpg │ ├── fig11_矩阵点积的运算规则.jpg │ ├── fig12_二维向量.jpg │ ├── fig13_二维向量加法.jpg │ ├── fig14_二维向量的点积.jpg │ ├── fig15_常见的机器学习模型.jpg │ ├── fig16_通过神经网络展开数据流形.png │ ├── fig17_概率的定义和计算公式.jpg │ └── fig18_正态分布形状.png └── README.md ├── Class3 ├── Example │ ├── Ex01 │ │ ├── C01 Linear Regression - Ads-n-Sales Single Var.ipynb │ │ ├── C02 Linear Regression - Ads-n-Sales Multi Vars.ipynb │ │ ├── C03 Sklearn - Ads and Sales.ipynb │ │ ├── C04 Contour Plot.ipynb │ │ ├── animation.gif │ │ └── dataset │ │ │ └── advertising.csv │ └── Ex02 │ │ ├── C01 Linear Regression - Boston Housing.ipynb │ │ └── dataset │ │ └── 数据集说明.txt ├── Figures │ ├── fig01_机器学习的实战架构.jpg │ ├── fig02_相关性的热力图.jpg │ ├── fig03_商品销售额和各种广告投放金额之间的散点图.png │ ├── fig04_数据归一化.jpg │ ├── fig05_常见归一化公式.jpg │ ├── fig06_微信广告-销售额散点图.jpg │ ├── fig07_均方误差函数的计算公式.jpg │ ├── fig08_均方误差函数的损失曲线.jpg │ ├── fig09_均方误差函(三维图).jpg │ ├── fig10_梯度下降找到损失最小时的权重.jpg │ ├── fig11_梯度下降过程.jpg │ ├── fig12_w梯度更新公式.jpg │ ├── fig13_学习速率对优化过程影响.jpg │ ├── fig14_损失曲线.jpg │ ├── fig15_轮廓图.jpg │ └── fig16_多元线性回归模型.jpg └── README.md ├── Class4 ├── Example │ ├── Ex01 │ │ ├── C01 Logistic Regression Single Class Heart - without scaler.ipynb │ │ ├── C01 Logistic Regression Single Class Heart.ipynb │ │ └── dataset │ │ │ └── heart.csv │ ├── Ex02 │ │ ├── C01 Logistic Regression Multi Classes - Iris Sepal.ipynb │ │ ├── C02 Logistic Regression Multi Classes - Iris Petal.ipynb │ │ └── dataset │ │ │ ├── Iris.csv │ │ │ └── 数据集说明.txt │ └── Ex03 │ │ ├── C01 Logistic Regression Single Class - Titanic.ipynb │ │ └── dataset │ │ ├── test.csv │ │ ├── train.csv │ │ └── 数据集说明.txt ├── Figures │ ├── fig01_逻辑函数.jpg │ ├── fig02_Sigmoid函数.jpg │ ├── fig03_逻辑回归模型示意.jpg │ ├── fig04_逻辑回归损失函数.jpg │ ├── fig05_逻辑回归损失函数曲线.jpg │ ├── fig06_逻辑回归梯度计算公式.jpg │ ├── fig07_散点图显示年龄-最大心率和标签之间的关系.jpg │ ├── fig08_训练集和测试集的损失曲线.jpg │ ├── fig09_多元分类示意.jpg │ ├── fig10_机器学习模型对数据集的拟合.jpg │ ├── fig11_寻找模型优化和泛化的平衡点.jpg │ ├── fig12_3种鸢尾花样本的分布.jpg │ ├── fig13_不同C值带来不同的分类边界.jpg │ └── fig14_不同C值来不同的分类准确率.jpg └── README.md ├── Class5 ├── Example │ ├── Ex01 │ │ ├── C01 ANN - Bank Customer.ipynb │ │ ├── C02 Using TensorBoard.ipynb │ │ └── dataset │ │ │ └── BankCustomer.csv │ └── Ex02 │ │ ├── C01 ANN - Titanic.ipynb │ │ └── dataset │ │ ├── test.csv │ │ ├── train.csv │ │ └── 数据集说明.txt ├── README.md └── figures │ ├── fig01_在大数据领域神经网络的效能显著地领先于其他算法.jpg │ ├── fig02_神经元.jpg │ ├── fig03_与逻辑判断.jpg │ ├── fig04_或逻辑判断.jpg │ ├── fig05_假设空间与拟合能力.jpg │ ├── fig06_同或数据集.jpg │ ├── fig07_非线性模型.jpg │ ├── fig08_加入网络隐层拟合同或数据集.jpg │ ├── fig09_银行客户数据的分布情况.jpg │ ├── fig10_神经网络结构及对应层生成的语句.jpg │ ├── fig10_训练集和验证集上的损失曲线和准确率曲线.png │ ├── fig11_数据真值与预测值对比图.png │ ├── fig12_预测值和真值对照表.png │ ├── fig13_混淆矩阵.jpg │ ├── fig14_精确率计算公式.jpg │ ├── fig15_召回率计算公式.jpg │ ├── fig16_F1分数计算公式.jpg │ ├── fig17_单层神经网络的混淆矩阵.jpg │ ├── fig18_标准化公式.jpg │ ├── fig19_特征缩放后的损失曲线和准确率曲线.png │ ├── fig20_特征缩放后的混淆矩.png │ ├── fig21_从逻辑回归到深度神经网络的演进.jpg │ ├── fig22_局部最低点和鞍点.jpg │ ├── fig23_动量SGD示意图.jpg │ ├── fig24_神经元的激活过程.jpg │ ├── fig25_Sigmoid函数图像.jpg │ ├── fig26_Tanh函数图像.jpg │ ├── fig27_ReLU函数图像.jpg │ ├── fig28_Leaky ReLU函数图像.jpg │ ├── fig29_eLU函数图像.jpg │ ├── fig30_Softmax多分类输出层的激活函数.jpg │ ├── fig31_验证集上损失的波动很大.jpg │ ├── fig32_过拟合现象仍然存在.jpg │ ├── fig33_Dropout示意图.jpg │ ├── fig34_添加Dropout层之后过拟合现象被大幅度地抑制.jpg │ └── fig35_新的混淆矩阵.png ├── Class6 ├── Example │ ├── Ex01 │ │ └── C06 CNN Dogs.ipynb │ └── Ex02 │ │ └── C06 CNN Flowers.ipynb ├── README.md └── figures │ ├── fig01_程序编译出来的卷积网络的结构信息.jpg │ ├── fig02_卷积网络的典型架构.jpg │ ├── fig03_全连接和局部连接的示意.jpg │ ├── fig04_字母X中的小模式.jpg │ ├── fig05_通过滑动窗口抽取局部特征.jpg │ ├── fig06_不同的过滤器抽取不同类型的特征.jpg │ ├── fig07_MNIST案例中第一个卷积层的输入特征图和输出特征图.jpg │ ├── fig08_MNIST案例中第二个卷积层的输入特征图和输出特征图.jpg │ ├── fig09_卷积运算示意——黑白图像.jpg │ ├── fig10_卷积运算示意——RGB图像.jpg │ ├── fig11_边界效益.jpg │ ├── fig12_填充操作.jpg │ ├── fig13_步幅为2的步进卷积.jpg │ ├── fig14_程序编译出的卷积网络结构.png │ ├── fig15_损失曲线和准确率曲线.png │ ├── fig16_第一次调优后损失曲线和准确率曲线.png │ ├── fig17_第二次调优后损失曲线和准确率曲线.png │ ├── fig18_对同一张图片进行数据增强.jpg │ ├── fig19_数据增强后损失曲线和准确率曲线.png │ ├── fig20_各种大型卷积网络性能比较.jpg │ ├── fig21_VGGNet架构.jpg │ └── fig22_Inception模块的基本架构.jpg ├── Class7 ├── Example │ ├── Ex01 │ │ └── C01 RNN - Comments.ipynb │ ├── Ex02 │ │ └── C02 CNN1D GRU - New Earth.ipynb │ └── Ex03 │ │ └── C03 RNN - Quora Queries.ipynb ├── README.md └── figures │ ├── fig01_普通网络的神经元.jpg │ ├── fig02_循环神经网络中的神经元.jpg │ ├── fig03_多个循环神经网络的神经元.jpg │ ├── fig04_时间点1读入一个特征.jpg │ ├── fig05_时间点2读入两个特征.jpg │ ├── fig06_遍历特征处理.jpg │ ├── fig07_One-hot编码.jpg │ ├── fig08_分词和词嵌入示意图.jpg │ ├── fig09_影片形成的词嵌入空间.jpg │ ├── fig10_训练集中的前五条数据.jpg │ ├── fig11_评论长度分布.jpg │ └── fig12_包含词嵌入的SimpleRNN网络结构.jpg ├── Class8 ├── Example │ ├── Ex01 │ │ ├── C01 Tools - Heart.ipynb │ │ └── dataset │ │ │ └── heart.csv │ └── Ex02 │ │ ├── C02 Tools - Bank.ipynb │ │ └── dataset │ │ └── BankCustomer.csv ├── README.md └── figures │ ├── fig01_根据KNN算法来确定支持者.jpg │ ├── fig02_欧氏距离和曼哈顿距离.jpg │ ├── fig03_KNN算法示意.jpg │ ├── fig04_不同K值时模型所取得的测试集准确率和F1分数.jpg │ ├── fig05_SVM超平面的确定.jpg │ ├── fig06_根据相亲数据集所生成的决策树.jpg │ ├── fig07_一个过拟合的决策树分类结果.jpg │ ├── fig08_Sklearn的算法官方小抄.jpg │ ├── fig09_各种算法的准确率.png │ ├── fig10_各种算法的混淆矩阵.png │ └── fig11_参数优化后随机森林算法的混淆矩阵.png ├── Class9 ├── Example │ ├── Ex01 │ │ ├── C01 Ensemble - Bank Customer.ipynb │ │ ├── C03 Stacking - Bank Customer.ipynb │ │ └── dataset │ │ │ └── BankCustomer.csv │ ├── Ex02 │ │ └── C02 Bagging Regressors.ipynb │ └── Ex03 │ │ ├── C04 Ensemble - Heart.ipynb │ │ └── dataset │ │ └── heart.csv ├── README.md └── figures │ ├── fig01_偏差和方差都低是我们对模型的追求.jpg │ ├── fig02_损失-偏差-方差与模型复杂度之间的关系.jpg │ ├── fig03_损失-偏差-方差与模型复杂度之间的关系.jpg │ ├── fig04_函数复杂度的提升拟合能力的增强会带来高方差.jpg │ ├── fig05_有放回的随机抽取数据样本.jpg │ ├── fig06_Bagging的过程.jpg │ ├── fig07_4种算法的比较.png │ └── fig08_Boosting示意图.jpg └── README.md /Class1/Example/Ex02/C02_MNIST Number Recognition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 1.5.2 第2个环节:数据的收集和预处理" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "from keras.datasets import mnist\n", 19 | "\n", 20 | "(X_train_image, y_train_label), (X_test_image, y_test_label) = mnist.load_data()" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 4, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "特征集张量形状: (60000, 28, 28)\n", 33 | "第一个数据样本:\n", 34 | " [[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 35 | " 0 0 0 0 0 0 0 0 0 0]\n", 36 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 37 | " 0 0 0 0 0 0 0 0 0 0]\n", 38 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 39 | " 0 0 0 0 0 0 0 0 0 0]\n", 40 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 41 | " 0 0 0 0 0 0 0 0 0 0]\n", 42 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 43 | " 0 0 0 0 0 0 0 0 0 0]\n", 44 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 3 18 18 18 126 136\n", 45 | " 175 26 166 255 247 127 0 0 0 0]\n", 46 | " [ 0 0 0 0 0 0 0 0 30 36 94 154 170 253 253 253 253 253\n", 47 | " 225 172 253 242 195 64 0 0 0 0]\n", 48 | " [ 0 0 0 0 0 0 0 49 238 253 253 253 253 253 253 253 253 251\n", 49 | " 93 82 82 56 39 0 0 0 0 0]\n", 50 | " [ 0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241\n", 51 | " 0 0 0 0 0 0 0 0 0 0]\n", 52 | " [ 0 0 0 0 0 0 0 0 80 156 107 253 253 205 11 0 43 154\n", 53 | " 0 0 0 0 0 0 0 0 0 0]\n", 54 | " [ 0 0 0 0 0 0 0 0 0 14 1 154 253 90 0 0 0 0\n", 55 | " 0 0 0 0 0 0 0 0 0 0]\n", 56 | " [ 0 0 0 0 0 0 0 0 0 0 0 139 253 190 2 0 0 0\n", 57 | " 0 0 0 0 0 0 0 0 0 0]\n", 58 | " [ 0 0 0 0 0 0 0 0 0 0 0 11 190 253 70 0 0 0\n", 59 | " 0 0 0 0 0 0 0 0 0 0]\n", 60 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 35 241 225 160 108 1\n", 61 | " 0 0 0 0 0 0 0 0 0 0]\n", 62 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 81 240 253 253 119\n", 63 | " 25 0 0 0 0 0 0 0 0 0]\n", 64 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 45 186 253 253\n", 65 | " 150 27 0 0 0 0 0 0 0 0]\n", 66 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16 93 252\n", 67 | " 253 187 0 0 0 0 0 0 0 0]\n", 68 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 249\n", 69 | " 253 249 64 0 0 0 0 0 0 0]\n", 70 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 130 183 253\n", 71 | " 253 207 2 0 0 0 0 0 0 0]\n", 72 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 39 148 229 253 253 253\n", 73 | " 250 182 0 0 0 0 0 0 0 0]\n", 74 | " [ 0 0 0 0 0 0 0 0 0 0 24 114 221 253 253 253 253 201\n", 75 | " 78 0 0 0 0 0 0 0 0 0]\n", 76 | " [ 0 0 0 0 0 0 0 0 23 66 213 253 253 253 253 198 81 2\n", 77 | " 0 0 0 0 0 0 0 0 0 0]\n", 78 | " [ 0 0 0 0 0 0 18 171 219 253 253 253 253 195 80 9 0 0\n", 79 | " 0 0 0 0 0 0 0 0 0 0]\n", 80 | " [ 0 0 0 0 55 172 226 253 253 253 253 244 133 11 0 0 0 0\n", 81 | " 0 0 0 0 0 0 0 0 0 0]\n", 82 | " [ 0 0 0 0 136 253 253 253 212 135 132 16 0 0 0 0 0 0\n", 83 | " 0 0 0 0 0 0 0 0 0 0]\n", 84 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 85 | " 0 0 0 0 0 0 0 0 0 0]\n", 86 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 87 | " 0 0 0 0 0 0 0 0 0 0]\n", 88 | " [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", 89 | " 0 0 0 0 0 0 0 0 0 0]]\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "print(\"特征集张量形状:\", X_train_image.shape) # 用shape方法显示张量的形状\n", 95 | "print(\"第一个数据样本:\\n\", X_train_image[0])" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 5, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "第一个数据样本的标签: 5\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "print(\"第一个数据样本的标签:\", y_train_label[0])" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 7, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "数据集张量形状: (60000, 28, 28, 1)\n", 125 | "第一个数据标签: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "# 数据转换工作\n", 131 | "from keras.utils import to_categorical\n", 132 | "X_train = X_train_image.reshape(60000, 28, 28, 1) # 给标签增加一个维度\n", 133 | "X_test = X_test_image.reshape(10000, 28, 28, 1)\n", 134 | "\n", 135 | "y_train = to_categorical(y_train_label, 10) # 特征转换为one-hot编码\n", 136 | "y_test = to_categorical(y_test_label, 10) # 特征转换为one-hot编码\n", 137 | "\n", 138 | "print(\"数据集张量形状:\", X_train.shape)\n", 139 | "print(\"第一个数据标签:\", y_train[0])" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": {}, 145 | "source": [ 146 | "\n", 147 | "### 1.5.3 第3个环节:选择机器学习模型" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 11, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# MNIST数据集手写数字识别\n", 157 | "from keras import models\n", 158 | "from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D\n", 159 | "model = models.Sequential() # 用序贯方式建模\n", 160 | "model.add(Conv2D(32, (3,3), activation='relu', # 添加Conv2D层\n", 161 | " input_shape=(28, 28, 1))) # 指定输入数据样本张量的类型\n", 162 | "model.add(MaxPooling2D(pool_size=(2,2))) # 添加MaxPooling2D层\n", 163 | "model.add(Conv2D(64, (3,3), activation='relu')) # 添加Conv2D层\n", 164 | "model.add(MaxPooling2D(pool_size=(2, 2))) # 添加MaxPooling2D层\n", 165 | "model.add(Dropout(0.25)) # 添Dropout加层\n", 166 | "model.add(Flatten()) # 展平\n", 167 | "model.add(Dense(128, activation='relu')) # 添加全连接层\n", 168 | "model.add(Dropout(0.5))\n", 169 | "model.add(Dense(10, activation='softmax')) # softmax分类激活,输出10维分类码\n", 170 | "# 编译模型\n", 171 | "model.compile(optimizer='rmsprop', # 指定优化器\n", 172 | " loss='categorical_crossentropy', # 指定损失函数\n", 173 | " metrics=['accuracy']) # 指定验证过程中的评估指标" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 12, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "Epoch 1/5\n", 186 | "329/329 [==============================] - 12s 37ms/step - loss: 1.6845 - accuracy: 0.8111 - val_loss: 0.1466 - val_accuracy: 0.9574\n", 187 | "Epoch 2/5\n", 188 | "329/329 [==============================] - 13s 38ms/step - loss: 0.2017 - accuracy: 0.9452 - val_loss: 0.0779 - val_accuracy: 0.9787\n", 189 | "Epoch 3/5\n", 190 | "329/329 [==============================] - 12s 37ms/step - loss: 0.1395 - accuracy: 0.9617 - val_loss: 0.0797 - val_accuracy: 0.9798\n", 191 | "Epoch 4/5\n", 192 | "329/329 [==============================] - 12s 36ms/step - loss: 0.1189 - accuracy: 0.9677 - val_loss: 0.0827 - val_accuracy: 0.9818\n", 193 | "Epoch 5/5\n", 194 | "329/329 [==============================] - 12s 36ms/step - loss: 0.1059 - accuracy: 0.9716 - val_loss: 0.0698 - val_accuracy: 0.9837\n" 195 | ] 196 | }, 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "" 201 | ] 202 | }, 203 | "execution_count": 12, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "# fit拟合\n", 210 | "model.fit(X_train, y_train, # 指定训练特征集和训练标签集\n", 211 | " validation_split=0.3, # 部分训练集数据拆分成验证集\n", 212 | " epochs=5, # 训练轮次为5轮\n", 213 | " batch_size=128)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "### 1.5.5 第5个环节:超参数调试和性能优化" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 13, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "name": "stdout", 230 | "output_type": "stream", 231 | "text": [ 232 | "313/313 [==============================] - 1s 2ms/step - loss: 0.0635 - accuracy: 0.9846\n", 233 | "测试集预测准确度: 0.9846000075340271\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "# 在测试集上评估模型\n", 239 | "score = model.evaluate(X_test, y_test) \n", 240 | "print(\"测试集预测准确度:\", score[1])" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 14, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "[9.4449442e-15 1.8798775e-11 4.0123642e-09 8.0469409e-09 1.7034626e-12\n", 253 | " 4.3635156e-15 8.2535572e-17 1.0000000e+00 3.0502645e-12 1.1226295e-10] 转换一下格式得到: 7\n" 254 | ] 255 | }, 256 | { 257 | "data": { 258 | "text/plain": [ 259 | "" 260 | ] 261 | }, 262 | "execution_count": 14, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | }, 266 | { 267 | "data": { 268 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAANL0lEQVR4nO3dXahd9ZnH8d9vYqPBFs0xRw1p9MQieHRwknKIQaU4lAm+XMRcODRKyaBMeqHSYi98mYtGQQzDtDUXQyGdxKTasRTamAgyNoSKKWjwKGc0meAcjWea1JjsEDBWhGryzMVZmTnGs9fZ7rX2S/J8P3DYe69nvTxs8svae//X3n9HhACc/f6q1w0A6A7CDiRB2IEkCDuQBGEHkjinmwebN29eDA0NdfOQQCoTExM6evSop6tVCrvtmyWtlzRL0r9FxLqy9YeGhjQ6OlrlkABKjIyMNK21/TLe9ixJ/yrpFklXS1pl++p29wegs6q8Z18q6Z2I2B8Rf5H0K0kr6mkLQN2qhH2BpANTHh8sln2O7TW2R22PNhqNCocDUEWVsE/3IcAXrr2NiA0RMRIRI4ODgxUOB6CKKmE/KGnhlMdfl/R+tXYAdEqVsL8m6Urbi2zPlvQdSdvraQtA3doeeouIz2zfJ+lFTQ69bYqIvbV1BqBWlcbZI+IFSS/U1AuADuJyWSAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EASlaZstj0h6SNJJyR9FhEjdTQFoH6Vwl7424g4WsN+AHQQL+OBJKqGPST9zvbrttdMt4LtNbZHbY82Go2KhwPQrqphvyEivinpFkn32v7W6StExIaIGImIkcHBwYqHA9CuSmGPiPeL2yOStkpaWkdTAOrXdthtn2/7a6fuS1ouaU9djQGoV5VP4y+RtNX2qf38e0T8Ry1dAahd22GPiP2S/qbGXgB0EENvQBKEHUiCsANJEHYgCcIOJFHHF2FSePXVV5vW1q9fX7rtggULSutz5swpra9evbq0PjAw0FYNuXBmB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkGGdvUdlY9/j4eEeP/fjjj5fWL7jggqa1ZcuW1d3OGWNoaKhp7eGHHy7d9rLLLqu5m97jzA4kQdiBJAg7kARhB5Ig7EAShB1IgrADSTDO3qLnnnuuaW1sbKx022uuuaa0vnfv3tL67t27S+vbtm1rWnvxxRdLt120aFFp/b333iutV3HOOeX//ObPn19aP3DgQNvHLhuDl6QHH3yw7X33K87sQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AE4+wtGh4ebqvWimuvvba0vmrVqtL6unXrmtYmJiZKt51pnH3//v2l9Spmz55dWp9pnH2m3huNRtPaVVddVbrt2WjGM7vtTbaP2N4zZdmA7R22x4vbuZ1tE0BVrbyM3yzp5tOWPSRpZ0RcKWln8RhAH5sx7BHxsqRjpy1eIWlLcX+LpNvrbQtA3dr9gO6SiDgkScXtxc1WtL3G9qjt0bL3UAA6q+OfxkfEhogYiYiRwcHBTh8OQBPthv2w7fmSVNweqa8lAJ3Qbti3Szr128qrJTX/jiWAvjDjOLvtZyXdJGme7YOSfiRpnaRf275H0h8l3dHJJlHuvPPOa1qrOp5c9RqCKmb6Hv/Ro0dL69ddd13T2vLly9vq6Uw2Y9gjotkVHd+uuRcAHcTlskAShB1IgrADSRB2IAnCDiTBV1zRMx9//HFpfeXKlaX1kydPltaffPLJprU5c+aUbns24swOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kwzo6e2bx5c2n9gw8+KK1fdNFFpfXLL7/8y7Z0VuPMDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJMM6Ojnr33Xeb1h544IFK+37llVdK65deemml/Z9tOLMDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKMs6Ojnn/++aa1Tz/9tHTbO+4onwn8iiuuaKunrGY8s9veZPuI7T1Tlq21/SfbY8XfrZ1tE0BVrbyM3yzp5mmW/zQiFhd/L9TbFoC6zRj2iHhZ0rEu9AKgg6p8QHef7TeLl/lzm61ke43tUdujjUajwuEAVNFu2H8m6RuSFks6JOnHzVaMiA0RMRIRI4ODg20eDkBVbYU9Ig5HxImIOCnp55KW1tsWgLq1FXbb86c8XClpT7N1AfSHGcfZbT8r6SZJ82wflPQjSTfZXiwpJE1I+l7nWkQ/m2msfOvWrU1r5557bum2TzzxRGl91qxZpXV83oxhj4hV0yze2IFeAHQQl8sCSRB2IAnCDiRB2IEkCDuQBF9xRSUbN5YPzOzatatp7c477yzdlq+w1oszO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kwTg7So2NjZXW77///tL6hRde2LT22GOPtdER2sWZHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSYJw9uU8++aS0vmrVdD8u/P9OnDhRWr/rrrua1vi+endxZgeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJBhnP8udPHmytH7bbbeV1t9+++3S+vDwcGn90UcfLa2je2Y8s9teaPv3tvfZ3mv7+8XyAds7bI8Xt3M73y6AdrXyMv4zST+MiGFJyyTda/tqSQ9J2hkRV0raWTwG0KdmDHtEHIqIN4r7H0naJ2mBpBWSthSrbZF0e4d6BFCDL/UBne0hSUsk7ZZ0SUQckib/Q5B0cZNt1tgetT3aaDQqtgugXS2H3fZXJf1G0g8i4nir20XEhogYiYiRwcHBdnoEUIOWwm77K5oM+i8j4rfF4sO25xf1+ZKOdKZFAHWYcejNtiVtlLQvIn4ypbRd0mpJ64rbbR3pEJUcO3astP7SSy9V2v/TTz9dWh8YGKi0f9SnlXH2GyR9V9JbtseKZY9oMuS/tn2PpD9KuqMjHQKoxYxhj4g/SHKT8rfrbQdAp3C5LJAEYQeSIOxAEoQdSIKwA0nwFdezwIcffti0tmzZskr7fuaZZ0rrS5YsqbR/dA9ndiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2s8BTTz3VtLZ///5K+77xxhtL65M/d4AzAWd2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCcfYzwPj4eGl97dq13WkEZzTO7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQRCvzsy+U9AtJl0o6KWlDRKy3vVbSP0pqFKs+EhEvdKrRzHbt2lVaP378eNv7Hh4eLq3PmTOn7X2jv7RyUc1nkn4YEW/Y/pqk123vKGo/jYh/6Vx7AOrSyvzshyQdKu5/ZHufpAWdbgxAvb7Ue3bbQ5KWSNpdLLrP9pu2N9me22SbNbZHbY82Go3pVgHQBS2H3fZXJf1G0g8i4rikn0n6hqTFmjzz/3i67SJiQ0SMRMTI4OBg9Y4BtKWlsNv+iiaD/suI+K0kRcThiDgREScl/VzS0s61CaCqGcPuyZ8P3ShpX0T8ZMry+VNWWylpT/3tAahLK5/G3yDpu5Lesj1WLHtE0irbiyWFpAlJ3+tAf6jo+uuvL63v2LGjtM7Q29mjlU/j/yBpuh8HZ0wdOINwBR2QBGEHkiDsQBKEHUiCsANJEHYgCX5K+gxw9913V6oDEmd2IA3CDiRB2IEkCDuQBGEHkiDsQBKEHUjCEdG9g9kNSf8zZdE8SUe71sCX06+99WtfEr21q87eLo+IaX//rath/8LB7dGIGOlZAyX6tbd+7Uuit3Z1qzdexgNJEHYgiV6HfUOPj1+mX3vr174kemtXV3rr6Xt2AN3T6zM7gC4h7EASPQm77Zttv237HdsP9aKHZmxP2H7L9pjt0R73ssn2Edt7piwbsL3D9nhxO+0cez3qba3tPxXP3ZjtW3vU20Lbv7e9z/Ze298vlvf0uSvpqyvPW9ffs9ueJem/Jf2dpIOSXpO0KiL+q6uNNGF7QtJIRPT8Agzb35L0Z0m/iIi/Lpb9s6RjEbGu+I9ybkQ82Ce9rZX0515P413MVjR/6jTjkm6X9A/q4XNX0tffqwvPWy/O7EslvRMR+yPiL5J+JWlFD/roexHxsqRjpy1eIWlLcX+LJv+xdF2T3vpCRByKiDeK+x9JOjXNeE+fu5K+uqIXYV8g6cCUxwfVX/O9h6Tf2X7d9ppeNzONSyLikDT5j0fSxT3u53QzTuPdTadNM943z107059X1YuwTzeVVD+N/90QEd+UdIuke4uXq2hNS9N4d8s004z3hXanP6+qF2E/KGnhlMdfl/R+D/qYVkS8X9wekbRV/TcV9eFTM+gWt0d63M//6adpvKebZlx98Nz1cvrzXoT9NUlX2l5ke7ak70ja3oM+vsD2+cUHJ7J9vqTl6r+pqLdLWl3cXy1pWw97+Zx+mca72TTj6vFz1/PpzyOi63+SbtXkJ/LvSvqnXvTQpK8rJP1n8be3171JelaTL+s+1eQronskXSRpp6Tx4nagj3p7WtJbkt7UZLDm96i3GzX51vBNSWPF3629fu5K+urK88blskASXEEHJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0n8Lx5q4VTxgWLnAAAAAElFTkSuQmCC", 269 | "text/plain": [ 270 | "
" 271 | ] 272 | }, 273 | "metadata": { 274 | "needs_background": "light" 275 | }, 276 | "output_type": "display_data" 277 | } 278 | ], 279 | "source": [ 280 | "pred = model.predict(X_test[0].reshape(1, 28, 28, 1))\n", 281 | "print(pred[0], \"转换一下格式得到:\", pred.argmax())\n", 282 | "import matplotlib.pyplot as plt\n", 283 | "plt.imshow(X_test[0].reshape(28, 28), cmap='Greys')" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [] 292 | } 293 | ], 294 | "metadata": { 295 | "interpreter": { 296 | "hash": "694cdcaedaf049a0984f27e4a849c1af591c6b1d7a3cf6d6f220830adff0acba" 297 | }, 298 | "kernelspec": { 299 | "display_name": "Python 3.8.8 ('Vuean_ML')", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.8.8" 314 | }, 315 | "orig_nbformat": 4 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | -------------------------------------------------------------------------------- /Class1/Example/Ex02/dataset/数据集说明.txt: -------------------------------------------------------------------------------- 1 | 数据集存在于keras及很多其他框架内部,可以通过下述语句导入 2 | from keras.datasets import mnist #从Keras中导入mnist数据集 -------------------------------------------------------------------------------- /Class1/Example/Ex03/C03_Boston Housing Price.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz\n", 13 | "57344/57026 [==============================] - 0s 3us/step\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "from keras.datasets import boston_housing\n", 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "\n", 22 | "(X_train, y_train), (X_test, y_test) = boston_housing.load_data()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 4, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "数据集张量形状: (404, 13)\n", 35 | "第一个数据样本:\n", 36 | " [ 1.23247 0. 8.14 0. 0.538 6.142 91.7\n", 37 | " 3.9769 4. 307. 21. 396.9 18.72 ]\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "print(\"数据集张量形状:\", X_train.shape)\n", 43 | "print(\"第一个数据样本:\\n\", X_train[0])" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "第一个数据样本的标签:\n", 56 | " 15.2\n" 57 | ] 58 | } 59 | ], 60 | "source": [ 61 | "print(\"第一个数据样本的标签:\\n\", y_train[0])" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 6, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "LinearRegression()" 73 | ] 74 | }, 75 | "execution_count": 6, 76 | "metadata": {}, 77 | "output_type": "execute_result" 78 | } 79 | ], 80 | "source": [ 81 | "from sklearn.linear_model import LinearRegression\n", 82 | "model = LinearRegression()\n", 83 | "model.fit(X_train, y_train)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 8, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "房价的真值(测试集) 7.2\n", 96 | "预测的房价(测试集) 9.692672389355678\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "y_pred = model.predict(X_test)\n", 102 | "print(\"房价的真值(测试集)\", y_test[0])\n", 103 | "print(\"预测的房价(测试集)\", y_pred[0])" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 9, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "name": "stdout", 113 | "output_type": "stream", 114 | "text": [ 115 | "给预测评分: 0.7213535934621549\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "print(\"给预测评分:\", model.score(X_test, y_test)) #评估预测结果" 121 | ] 122 | } 123 | ], 124 | "metadata": { 125 | "interpreter": { 126 | "hash": "694cdcaedaf049a0984f27e4a849c1af591c6b1d7a3cf6d6f220830adff0acba" 127 | }, 128 | "kernelspec": { 129 | "display_name": "Python 3.8.8 ('Vuean_ML')", 130 | "language": "python", 131 | "name": "python3" 132 | }, 133 | "language_info": { 134 | "codemirror_mode": { 135 | "name": "ipython", 136 | "version": 3 137 | }, 138 | "file_extension": ".py", 139 | "mimetype": "text/x-python", 140 | "name": "python", 141 | "nbconvert_exporter": "python", 142 | "pygments_lexer": "ipython3", 143 | "version": "3.8.8" 144 | }, 145 | "orig_nbformat": 4 146 | }, 147 | "nbformat": 4, 148 | "nbformat_minor": 2 149 | } 150 | -------------------------------------------------------------------------------- /Class1/Figures/fig01_人工智能、机器学习和深度学习关系.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig01_人工智能、机器学习和深度学习关系.png -------------------------------------------------------------------------------- /Class1/Figures/fig02_不同学习之间区别.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig02_不同学习之间区别.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig03_深度学习中的神经网络.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig03_深度学习中的神经网络.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig04_数据结构化.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig04_数据结构化.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig05_深度学习优势.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig05_深度学习优势.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig06_强化学习.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig06_强化学习.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig07_回归问题与分类问题.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig07_回归问题与分类问题.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig08_常见的机器学习应用场景.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig08_常见的机器学习应用场景.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig09_机器学习的基本术语.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig09_机器学习的基本术语.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig10_加州房价数据集中的特征和标签.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig10_加州房价数据集中的特征和标签.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig11_机器学习项目实战的5个环节.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig11_机器学习项目实战的5个环节.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig12_卷积神经网络实现手写数字识别.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig12_卷积神经网络实现手写数字识别.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig13_k折验证方法示意图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig13_k折验证方法示意图.jpg -------------------------------------------------------------------------------- /Class1/Figures/fig14_机器学习实战流程.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class1/Figures/fig14_机器学习实战流程.jpg -------------------------------------------------------------------------------- /Class1/README.md: -------------------------------------------------------------------------------- 1 | # 第一课 机器学习快速上手路径——唯有实战 2 | 3 | ## 1.1 机器学习的家族谱 4 | 5 | **机器学习是AI的分支技术,而深度学习是机器学习的技术之一**。从人工智能到机器学习,再到深度学习,它们之间是一种包含和被包含的关系,如下图所示。 6 | 7 | ![fig01_人工智能、机器学习和深度学习关系](https://github.com/Vuean/Zero-Basic-Machine-Learning/blob/main/Class1/Figures/fig01_%E4%BA%BA%E5%B7%A5%E6%99%BA%E8%83%BD%E3%80%81%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0%E5%92%8C%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%85%B3%E7%B3%BB.png) 8 | 9 | ![fig01_人工智能、机器学习和深度学习关系](./Figures/fig01_人工智能、机器学习和深度学习关系.png) 10 | 11 | 造成目前AI火热的原因主要包括有:**数据支撑**、**硬件支撑**以及技术良好的**可达性**。 12 | 13 | 可达性和实用性,才是机器学习和深度学习的真正价值所在。 14 | 15 | ### 1.1.2 机器学习就是从数据中发现规律 16 | 17 | 机器学习的关键内涵止一在于利用计算机的运算能力从大量的数据中发现一个“函数”或者“模型”,并通过它来模拟现实世界事物间的关系,从而实现预测或判断的功能。 18 | 19 | 即机器学习就是在已知数据集的基础上,通过反复**训练**(**train**),选择最贴切的**函数**(**function**),来描述数据集中自变量$x_1, x_2, x_3,...,x_n$(**特征,feature**)与因变量$y$(**标签,label**)之间的关系。 20 | 21 | 其中,机器学习数据集常分为:**训练数据集**(training dataset)和**测试数据集**(test dataset)。 22 | 23 | 机器学习的另外一个特质是**从错误中学习**。 24 | 25 | ### 1.1.3 机器学习的类别——监督学习及其他 26 | 27 | 机器学习常见分类方式为:**监督学习**(supervised learning)、**无监督学习**(unsupervised learning)和**半监督学习**(semi-supervised learning)。主要区别在于是否需要数据标签,监督学习的训练需要标签数据,而无监督学习不需要标签数据,半监督学习介于两者之间。 28 | 29 | ![fig02_不同学习之间区别](https://github.com/Vuean/Zero-Basic-Machine-Learning/blob/main/Class1/Figures/fig02_%E4%B8%8D%E5%90%8C%E5%AD%A6%E4%B9%A0%E4%B9%8B%E9%97%B4%E5%8C%BA%E5%88%AB.jpg) 30 | 31 | ![fig02_不同学习之间区别](./Figures/fig02_不同学习之间区别.jpg) 32 | 33 | 半监督学习是监督学习与无监督学习相结合的一种学习方式,当有时获取有标签数据成本过高时,半监督学习使用大量的无标签数据,同时使用部分有标签数据来进行建模。 34 | 35 | ### 1.1.4 机器学习的重要分支——深度学习 36 | 37 | 从另一角度出发,根据机器学习的模型或者训练机器时采用的算法,机器学习的另一个重要内容是**深度学习**(deep learning)。 38 | 39 | 深度学习所采用的机器学习模型不同之处在于:**神经网络**。人工神经网络(Aetificial Neural Network, ANN),是数据结构和算法形成的机器学习模型。 40 | 41 | **通常将层数较多、结构比较复杂的神经网络的机器学习技术叫做深度学习。** 42 | 43 | ![fig03_深度学习中的神经网络](https://github.com/Vuean/Zero-Basic-Machine-Learning/blob/main/Class1/Figures/fig03_%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E4%B8%AD%E7%9A%84%E7%A5%9E%E7%BB%8F%E7%BD%91%E7%BB%9C.jpg) 44 | 45 | ![fig03_深度学习中的神经网络](./Figures/fig03_深度学习中的神经网络.jpg) 46 | 47 | 各种深度学习模型,如卷积神经网络(Convolutional Neural Network, CNN)、循环神经网络(Recurrent Neural Network, RNN),在计算机视觉、自然语言处理(Natural Language Processing, NLP)、音频识别等应用中得到了极好的效果。 48 | 49 | 深度学习的另一大好处是对数据特征的要求降低,自动实现非结构化数据的结构化,无须手工获取特征,减少特征工程(feature engineering)。其中,特征工程是指对数据特征的整理和优化工作,让其更容易被机器所学习。 50 | 51 | 非结构化数据的结构化:有些数据人很容易理解,但是计算机很难识别。比如下图中一个32px×32px的图片,我们一看到就知道写的是8。然而计算机并不知道这图片8背后的逻辑,计算机比较容易读入Excel表格里面的数字8,因为它是存储在计算机文件系统或者数据库中的结构化数据。但是一张图片,在计算机里面存储的形式是数字矩阵,它很难把这个32px×32px的矩阵和数字8联系起来。 52 | 53 | 通过深度学习就能够完成图片上这种从非结构化到结构化的转换,通过卷积神经网络的处理,图片‘8’变成了[0000000010]的编码,有利于计算机辨认。 54 | 55 | ![fig04_数据结构化](./Figures/fig04_数据结构化.jpg) 56 | 57 | 深度学习的过程,也是一个“**数据提纯**”的过程,可以减少手工进行的特征工程任务。深度学习通过神经网络将特征提取和分类任务一并解决。 58 | 59 | ![fig05_深度学习优势](./Figures/fig05_深度学习优势.jpg) 60 | 61 | ### 1.1.5 机器学习新热点——强化学习 62 | 63 | **强化学习**(reinforcement learning)研究的目标是**智能体**(agent)**如何基于环境而做出行动反应,以取得最大化的累积奖励**。如下图所示,智能体通过所获得的奖励(或惩罚)、环境反馈回来的状态以及动作与环境互动。 64 | 65 | ![fig06_强化学习](./Figures/fig06_强化学习.jpg) 66 | 67 | 强化学习和普通机器学习的差异在于:普通机器学习是在开放的环境中学习,如自动驾驶,每一次前进都会带给机器新的环境,新环境(新数据)永无止息;而强化学习的环境是封闭的,如AlphaGo下围棋,每落一子,棋盘就少一目,在这样的闭环中,更容易实现对机器学习所采取的策略进行奖惩。 68 | 69 | 强化学习和监督学习的差异在于:监督学习是从数据中学习,而强化学习是从环境给它的奖惩中学习。 70 | 71 | 除监督学习、无监督学习、半监督学习、深度学习、强化学习外,还有很多其他的机器学习方法,如集成学习(ensemble learning)、在线学习(online learning)、迁移学习(transfer learning)。 72 | 73 | ### 1.1.6 机器学习的两大应用场景——回归与分类 74 | 75 | 最常见的两类机器学习问题类型为:**回归**(regression)问题和**分类**(classification)问题。 76 | 77 | - 回归问题通常用来预测一个值,其标签值是**连续**的。比较常见的回归算法是线性回归(linear regression)算法和深度学习中的神经网络等。 78 | 79 | - 分类问题是将事物标记一个类别标签,结果为**离散**的。分类有二元分类和多元分类。常见的分类算法有:逻辑回归算法、决策树算法和深度学习中的神经网络等。 80 | 81 | ![fig07_回归问题与分类问题](./Figures/fig07_回归问题与分类问题.jpg) 82 | 83 | ### 1.1.7 机器学习的其他应用场景 84 | 85 | 除回归和分类问题外,机器学习的应用场景还有很多,如无监督学习中最常见的**聚类**(clustering)问题是在没有标签的情况下,把数据按照其特征的行致分成不同的簇。还有一种无监督学习是**关联学习**,通过它可以找到特征之间的影响关系。 86 | 87 | ![fig08_常见的机器学习应用场景](./Figures/fig08_常见的机器学习应用场景.jpg) 88 | 89 | ## 1.2 快捷的云实战学习模式 90 | 91 | [Ex01_加州房价预测](https://github.com/Vuean/Zero-Basic-Machine-Learning/blob/main/Class1/Example/Ex01/C01_California%20Housing%20Price.ipynb) 92 | 93 | ## 1.3 基本机器学习术语 94 | 95 | ![fig09_机器学习的基本术语](./Figures/fig09_机器学习的基本术语.jpg) 96 | 97 | 其中,最为重要的3个术语是:特征、标签和模型。 98 | 99 | ### 1.3.1 特征 100 | 101 | 特征是机器学习中的输入,原始的特征描述了数据的属性。它是有维度的。**特征的维度指的是特征的数目**(不是数据集里面样本的个数),不同的数据集中的数据特征的维度不同,有多有少。 102 | 103 | 例如,预测商品销量,把商品的类别、价格和推荐级别这3个属性定义为商品的特征,那么这个数据集就是三位特征数据集。其中一个样本的格式如下:$(x_1, x_2, x_3)$。 104 | 105 | **维:主要指的是数据集中特征X的数目** 106 | 107 | ### 1.3.1 标签 108 | 109 | 标签,也就是机器学习要输出的结果,是我们试图预测的目标。实际上,机器学习要解决什么问题,标签就是什么。如一个有标签数据样本的格式为:$(x_1, x_2, x_3, y)$。 110 | 111 | ![fig10_加州房价数据集中的特征和标签](./Figures/fig10_加州房价数据集中的特征和标签.jpg) 112 | 113 | ### 1.3.3 模型 114 | 115 | 模型将样本映射到预测标签y‘。其实模型就是函数,是执行预测的工具。函数由模型的内部参数定义,而这些内部参数通过从数据中学习规律得到。 116 | 117 | 在机器学习中,先确定模型的类型,比如线性回归模型,逻辑回归模型,神经网络模型;选定算法后,再确定模型参数。 118 | 119 | ## 1.4 Python和机器学习框架 120 | 121 | ### 1.4.2 机器学习和深度学习框架 122 | 123 | Python的机器学习框架,也就是各种Python库,里面包含定义好的数据结构以及很多库函数、方法、模型等(即API)。我们只需要选择一个适合的框架,通过调用其中的APl,编写少量代码,就可以快速建立机器学习模型了。 124 | 125 | 良好的框架不仅易于理解,还支持并行化计算(即硬件加速),并能够自动计算微分、链式求导。机器学习中常用的库有8个,可分为3类:Pandas和NumPy提供数据结构,支持数学运算;Matplotlib和Seaborn用于数据可视化;其余4个提供算法,Scikit-learn是机器学习框架,TensorFlow、Keras和PyTorch则提供深度学习框架。 126 | 127 | 1. Pandas 128 | 129 | Pandas中的预置数据结构有以下几种: 130 | 131 | - Series:1D数组,与Numpy中的一维数组(array)类似。与python中的列表(list)类似。 132 | 133 | - TimeSeries:以时间为索引的Series。 134 | 135 | - DataFrame:2D的表格型数据结构,Series的容器。 136 | 137 | - Panel:3D的数组,DataFrame的容器。 138 | 139 | 2. Numpy 140 | 141 | NumPy是Python进行科学计算的基础库,有人称它为Python的数学扩展包。它提供了一个强大的多维数组对象array,还提供了大量API支持数组运算。**本课程中将重点使用的数据结构就是NumPy中的数组。** 142 | 143 | 3. Matplotlib 144 | 145 | Matplotlib是Python及其数学扩展包NumPy的可视化操作界面,通过应用程序接口(APl)向应用程序提供嵌入式绘图功能。其中还有面向其他图像处理库(如开放图形库OpenGL)的接口。 146 | 147 | 4. Seaborn 148 | 149 | Seaborn是在Matplotib基础上设计出的绘图库,因此是更高级的视觉化工具,可以画出特别酷炫的数学统计图形。 150 | 151 | 5. Scikit-learn 152 | 153 | 简称Sklearn,是一个相当强大的Python机器学习库,也是简单有效的数据挖掘和数据分析工具。Sklearn基于NumPy、SciPy和Matplotib构建,其功能涵盖了从数据预处理到训练模型,再到性能评估的各个方面。 154 | 155 | 6. TensorFlow 156 | 157 | TensorFlow编程建立在“图”这个抽象概念上,入门难度高。 158 | 159 | 7. Keras 160 | 161 | Keras建立在TensorFlow、CNTK或Theano这些后端框架之上。这也就是说,Keras比TensorFlow更高级。在计算机领域,高级是“简单”的代名词。高级意味着易学易用。 162 | 163 | 8. PyTorch 164 | 165 | 相对于TensorFlow而言,更为“优雅”的机器学习框架。 166 | 167 | ## 1.5 机器学习项目实战架构 168 | 169 | 机器学习项目的主要环节包括5个部分:问题定义;数据的收集和预处理;模型(算法)选择;选择机器学习模型;超参数调试和性能优化。 170 | 171 | ![fig11_机器学习项目实战的5个环节](./Figures/fig11_机器学习项目实战的5个环节.jpg) 172 | 173 | ### 1.5.1 第1个环节:问题定义 174 | 175 | 第一个环节是对问题的构建和概念化。关注问题的痛点、现状和目标。 176 | 177 | 机器学习中常使用的数据集为**MNIST数据集**。包含有60000张训练图片和10000张测试图片,都是28px×28px的手写数字灰度图像。 178 | 179 | *灰度图像与黑白图像不同,黑白图像只有黑、白两种颜色,对应像素值为0和1;而灰度图像在黑色和白色之间还有许多灰度级别,取值为0~255。* 180 | 181 | 182 | ### 1.5.2 第2个环节:数据的收集和预处理 183 | 184 | 1. 原始数据的准备 185 | 186 | 自有数据;爬取数据;开源网站(ImageNet、Kaggle、Google Public Data Explorer等)。 187 | 188 | 2. 数据的预处理 189 | 190 | - **可视化**(visualization) 191 | 192 | - **数据向量化**(data vectorization):将数据格式化,变得机器可以读取。 193 | 194 | - 处理**坏数据**和**缺失值** 195 | 196 | - **特征缩放**(feature scaling):特征缩放方法包括很多,包括数据**标准化**(standardization)和**规范化**(normalization)。 197 | 198 | - 标准化,是对数据特征分布的转换,目标是使其标准正态分布。 199 | 200 | - 归一化,标准化的变体,将特征压缩到给定的最小值和最大值之间,多为0~1。归一化不会改变数据的分布状态。 201 | 202 | - 规范化,将样本缩放为具有单位范数的过程,消除数据中的离群值。 203 | 204 | 数据预处理的原则: 205 | 206 | - 将数据转换成数字格式(向量、矩阵、3D、4D、5D)的数组(张量); 207 | 208 | - 大范围数据进行压缩成较小值,分布不均的数据进行标准化; 209 | 210 | - 异质数据同质化(homogenous),即同一个特征的数据类型保持相同。 211 | 212 | 3. 特征工程和特征提取 213 | 214 | - 特征工程:使用数据的领域知识来创建机器学习算法起作用的特征的过程。 215 | 216 | - 特征提取(feature extraction):通过子特征的选择来减少冗余特征,使初始测量数据更简洁,并保留有用信息。 217 | 218 | 4. 载入MNIST数据集 219 | 220 | shape方法显示X_train_image张量的形状。灰度图像数据集是3D张量,第一个维度是样本维(即图片张数,共60000张),后面两个是特征维(即图片的28px×28px的矩阵)。 221 | 222 | 数据格式转换: 223 | 224 | ```python 225 | # 数据转换工作 226 | from keras.utils import to_categorical 227 | X_train = X_train_image.reshape(60000, 28, 28, 1) # 给标签增加一个维度 228 | X_test = X_test_image.reshape(10000, 28, 28, 1) 229 | 230 | y_train = to_categorical(y_train_label, 10) # 特征转换为one-hot编码 231 | y_test = to_categorical(y_test_label, 10) # 特征转换为one-hot编码 232 | 233 | print("数据集张量形状:", X_train.shape) 234 | print("第一个数据标签:", y_train[0]) 235 | 236 | >>> 数据集张量形状: (60000, 28, 28, 1) 237 | 第一个数据标签: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.] 238 | ``` 239 | 240 | 数据格式转换原因在于: 241 | 242 | - Keras要求图像数据集导入卷积网络模型时为4阶张量,最后一节代表颜色深度,灰度图像只有一个颜色统导,可以设置为1 243 | 244 | - 在机器学习的分类问题中,标签[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]代表5。 245 | 246 | ### 1.5.3 第3个环节:选择机器学习模型 247 | 248 | 先选择机器学习模型的算法类型,再开始训练机器确定参数。 249 | 250 | 主要的算法模型类型包括有: 251 | 252 | - 线性模型(线性回归,逻辑回归) 253 | 254 | - 非线性模型(支持向量机,k最邻近分类) 255 | 256 | - 基于树和集成的模型(决策树、随机森林、梯度提升树等) 257 | 258 | - 神经网络(人工神经网络、卷积神经网络、长短期记忆网络等) 259 | 260 | 根据不同的问题,选择不同的算法,比如,随机森林很适合处理回归问题,而神经网络则适合处理特征量巨大的数据。 261 | 262 | ```python 263 | # MNIST数据集手写数字识别 264 | from keras import models 265 | from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D 266 | model = models.Sequential() # 用序贯方式建模 267 | model.add(Conv2D(32, (3,3), activation='relu', # 添加Conv2D层 268 | input_shape=(28, 28, 1))) # 指定输入数据样本张量的类型 269 | model.add(MaxPooling2D(pool_size=(2,2))) # 添加MaxPooling2D层 270 | model.add(Conv2D(64, (3,3), activation='relu')) # 添加Conv2D层 271 | model.add(MaxPooling2D(pool_size=(2, 2))) # 添加MaxPooling2D层 272 | model.add(Dropout(0.25)) # 添Dropout加层 273 | model.add(Flatten()) # 展平 274 | model.add(Dense(128, activation='relu')) # 添加全连接层 275 | model.add(Dropout(0.5)) 276 | model.add(Dense(10, activation='softmax')) # softmax分类激活,输出10维分类码 277 | # 编译模型 278 | model.compile(optimizer='rmsprop', # 指定优化器 279 | loss='categorical_crossentory', # 指定损失函数 280 | metrics=['accuracy']) # 指定验证过程中的评估指标 281 | ``` 282 | 283 | 这段代码把数据集放入卷积神经网络进行处理。这个网络中包括两个Conv2D(二维卷积)层,两个MaxPooling2D(最大池化)层,两个Dropout层用于防止过拟合,还有Dense(全连接)层,最后通过Softmax分类器输出预测标签$y'$值,也就是所预测的分类值。这个$y'$,是一个one-hot格式的10维向量,通过与标签真实值$y$比较,以计算预测的准确率。 284 | 285 | ![fig12_卷积神经网络实现手写数字识别](./Figures/fig12_卷积神经网络实现手写数字识别.jpg) 286 | 287 | ### 1.5.4 第4个环节:训练机器,确定参数 288 | 289 | 确定机器学习模型的算法类型后,进行机器的学习,训练机器以确定最佳的模型内部参数,使用模型对新数据集进行预测。除了确定模型**内部参数**,还有可能需要调整**超参数**。 290 | 291 | - **内部参数**:机器学习模型的具体参数值,有**权重**(weight)和**偏置**(bias)。模型内参数在机器的训练过程中被确定。 292 | 293 | - **超参数**(hyperparameter):属于训练和调试过程中的参数。例如,迭代次数、迭代时模型参数改变的速率(学习率)、正则化参数等。 294 | 295 | ```python 296 | # fit拟合 297 | model.fit(X_train, y_train, # 指定训练特征集和训练标签集 298 | validation_split=0.3, # 部分训练集数据拆分成验证集 299 | epochs=5, # 训练轮次为5轮 300 | batch_size=128) 301 | 302 | >>> 303 | Epoch 1/5 304 | 329/329 [==============================] - 12s 37ms/step - loss: 1.6845 - accuracy: 0.8111 - val_loss: 0.1466 - val_accuracy: 0.9574 305 | Epoch 2/5 306 | 329/329 [==============================] - 13s 38ms/step - loss: 0.2017 - accuracy: 0.9452 - val_loss: 0.0779 - val_accuracy: 0.9787 307 | Epoch 3/5 308 | 329/329 [==============================] - 12s 37ms/step - loss: 0.1395 - accuracy: 0.9617 - val_loss: 0.0797 - val_accuracy: 0.9798 309 | Epoch 4/5 310 | 329/329 [==============================] - 12s 36ms/step - loss: 0.1189 - accuracy: 0.9677 - val_loss: 0.0827 - val_accuracy: 0.9818 311 | Epoch 5/5 312 | 329/329 [==============================] - 12s 36ms/step - loss: 0.1059 - accuracy: 0.9716 - val_loss: 0.0698 - val_accuracy: 0.9837 313 | 314 | ``` 315 | 316 | 在上面的训练过程中,fit方法自动地将训练数据预留出30%作为验证集。 317 | 318 | 训练过程中,准确率逐步提高。 319 | 320 | - accuracy:代表训练集上的预测准确率; 321 | 322 | - val_accuracy:代表验证集上的预测准确率; 323 | 324 | ### 1.5.5 第5个环节:超参数调试和性能优化 325 | 326 | 机器学习**重在评估**,只有通过评估,才能知道当前模型的效率,才能在不同模型或同一模型的不同超参数之间进行比较。 327 | 328 | 主要评估点: 329 | 330 | - 在机器训练过程中,对于模型内部参数的评估是通过**损失函数**进行的。比如回归问题的均方误差函数、分类问题的交叉熵函数。 331 | 332 | - 机器训练结束后,还要进行**验证**。验证过程采用的评估方式包括有R2分数、均方误差函数、平均绝对误差函数、交叉熵函数等。 333 | 334 | 1. 训练集、验证集和测试集 335 | 336 | 为了进行模型评估,常将数据分为3类集合:**训练集**(training set)、**验证集**(validation set)、**测试集**(test set)。在训练集上训练模型,在验证集上评估模型,完成训练后,在测试集上测试模型。 337 | 338 | 模型训练过程中,常可能出现**过拟合**(overfit)现象,即**模型泛化能力弱**。在利用训练集和验证集优化模型时,也可能导致过拟合,这种现象称为**信息泄露**(information leak)。 339 | 340 | 2. K折验证 341 | 342 | 当测试结果不够理想时,还需要继续调试和优化,那么就需要持续的大量的新数据进行支撑。为了解决新数据获取难,机器学习常通过**k折验证**方法,重用一个数据集进行多次验证方法。 343 | 344 | ![fig13_k折验证方法示意图](./Figures/fig13_k折验证方法示意图.jpg) 345 | 346 | **K折验证**(K-fold validation):将数据划分为大小相同的K个分区,对于每个分区,都在剩余的K-1个分区上训练模型,然后在留下的分区上评估模型。最终分数等于K个分数的平均值。 347 | 348 | 3. 模型的优化和泛化 349 | 350 | **优化**(optimization)和**泛化**(generalization)是机器学习的两个目标,是一种此消彼长的状态。 351 | 352 | - 如何成功地拟合已有数据,这是性能的**优化**; 353 | 354 | - 更为重要的是将当前模型**泛化**到其他数剧集。 355 | 356 | 4. 查看预测结果 357 | 358 | 使用predict方法可获得模型的预测值。 359 | 360 | ## 1.6 本课内容小结 361 | 362 | ![fig14_机器学习实战流程](./Figures/fig14_机器学习实战流程.jpg) 363 | 364 | ## 1.7 课后练习 365 | 366 | 1. 机器学习分类,并说明分类标准。 367 | 368 | -------------------------------------------------------------------------------- /Class2/Example/C01_NumPy Arrays.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 2.4.2 标量——0D(阶)张量" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "X的值: 5\n", 20 | "X的阶: 0\n", 21 | "X的数据类型: int32\n", 22 | "X的形状: ()\n" 23 | ] 24 | } 25 | ], 26 | "source": [ 27 | "import numpy as np\n", 28 | "X = np.array(5) # 创建0D张量,也就是标量\n", 29 | "print(\"X的值:\", X)\n", 30 | "print(\"X的阶:\", X.ndim)\n", 31 | "print(\"X的数据类型:\", X.dtype)\n", 32 | "print(\"X的形状:\", X.shape)\n" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "n = 1\n", 45 | "n = 2\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "n = 0\n", 51 | "for gender in [0, 1]:\n", 52 | " n = n + 1\n", 53 | " print(\"n = \", n)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### 2.4.3 向量——1D(阶)张量" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "X的值: [5 6 7 8 9]\n", 73 | "X的阶: 1\n", 74 | "X的数据类型: int32\n", 75 | "X的形状: (5,)\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "X = np.array([5, 6, 7, 8, 9])\n", 81 | "print(\"X的值:\", X)\n", 82 | "print(\"X的阶:\", X.ndim)\n", 83 | "print(\"X的数据类型:\", X.dtype)\n", 84 | "print(\"X的形状:\", X.shape)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "X = np.array([5]) # 1维的向量,就是1D数组里面只有一个元素" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "X_train的形状: (404, 13)\n", 106 | "X_train中第一个样本的形状: (13,)\n", 107 | "y_train的形状: (404,)\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "from keras.datasets import boston_housing\n", 113 | "(X_train, y_train), (X_test, y_test) = boston_housing.load_data()\n", 114 | "print(\"X_train的形状:\", X_train.shape)\n", 115 | "print(\"X_train中第一个样本的形状:\", X_train[0].shape)\n", 116 | "print(\"y_train的形状:\", y_train.shape)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "函数返回结果: 23.2\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "# 向量点积\n", 134 | "weight = np.array([1, -1.8, 1, 1, 2])\n", 135 | "X = np.array([1, 6, 7, 8, 9])\n", 136 | "y_hat = np.dot(X, weight)\n", 137 | "print('函数返回结果:', y_hat)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 8, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "23.2" 149 | ] 150 | }, 151 | "execution_count": 8, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "y_hat = weight.dot(X)\n", 158 | "y_hat" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "### 2.4.4 矩阵——2D(阶)张量" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 9, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "X_train的内容: [[1.23247e+00 0.00000e+00 8.14000e+00 ... 2.10000e+01 3.96900e+02\n", 178 | " 1.87200e+01]\n", 179 | " [2.17700e-02 8.25000e+01 2.03000e+00 ... 1.47000e+01 3.95380e+02\n", 180 | " 3.11000e+00]\n", 181 | " [4.89822e+00 0.00000e+00 1.81000e+01 ... 2.02000e+01 3.75520e+02\n", 182 | " 3.26000e+00]\n", 183 | " ...\n", 184 | " [3.46600e-02 3.50000e+01 6.06000e+00 ... 1.69000e+01 3.62250e+02\n", 185 | " 7.83000e+00]\n", 186 | " [2.14918e+00 0.00000e+00 1.95800e+01 ... 1.47000e+01 2.61950e+02\n", 187 | " 1.57900e+01]\n", 188 | " [1.43900e-02 6.00000e+01 2.93000e+00 ... 1.56000e+01 3.76700e+02\n", 189 | " 4.38000e+00]]\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "print(\"X_train的内容:\", X_train) #X_train是2D张量,即矩阵" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 10, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# 创建3D张量\n", 204 | "X = np.array([[[1, 22, 4, 78, 2],\n", 205 | " [2, 59, 6, 56, 1],\n", 206 | " [3, 31, 8, 54, 0]],\n", 207 | " [[4, 56, 9, 34, 1],\n", 208 | " [5, 78, 8, 35, 2],\n", 209 | " [6, 34, 7, 36, 0]],\n", 210 | " [[7, 45,5, 34, 5],\n", 211 | " [8, 53, 6, 35, 4],\n", 212 | " [9, 81, 4, 36, 5]]])" 213 | ] 214 | } 215 | ], 216 | "metadata": { 217 | "interpreter": { 218 | "hash": "694cdcaedaf049a0984f27e4a849c1af591c6b1d7a3cf6d6f220830adff0acba" 219 | }, 220 | "kernelspec": { 221 | "display_name": "Python 3.8.8 ('Vuean_ML')", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.8.8" 236 | }, 237 | "orig_nbformat": 4 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 2 241 | } 242 | -------------------------------------------------------------------------------- /Class2/Example/C02_Vertorization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 2.5.1 机器学习中张量的创建" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "列表: [1, 2, 3, 4, 5]\n", 20 | "列表转换为数组: [1 2 3 4 5]\n", 21 | "元组转换为数组: [ 6 7 8 9 10]\n", 22 | "2D数组: [[1 2 3]\n", 23 | " [4 5 6]]\n", 24 | "数组的形状: (5,)\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "import numpy as np\n", 30 | "list = [1, 2, 3, 4, 5] # 创建列表\n", 31 | "array_01 = np.array([1, 2, 3, 4, 5]) # 列表转换为数组\n", 32 | "array_02 = np.array((6, 7, 8, 9, 10)) # 元组转换为数组\n", 33 | "array_03 = np.array([[1, 2, 3], [4, 5, 6]]) # 列表转换为2D数组\n", 34 | "\n", 35 | "print('列表:', list)\n", 36 | "print('列表转换为数组:', array_01)\n", 37 | "print('元组转换为数组:', array_02)\n", 38 | "print('2D数组:', array_03)\n", 39 | "print('数组的形状:', array_01.shape)\n", 40 | "# print('列表的形状:', list.shape) # 列表无形状,报错" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "[1 2 3 4]\n", 53 | "[1. 2. 3. 4. 5.]\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "# 其他方法生成数组\n", 59 | "\n", 60 | "# arange(a, b, c)生成a~b(不包括b),间隔为c的一个数组\n", 61 | "array_04 = np.arange(1, 5, 1)\n", 62 | "\n", 63 | "# linspace(a, b, c)生成a~b(包括b),平均分成c份\n", 64 | "array_05 = np.linspace(1, 5, 5)\n", 65 | "\n", 66 | "print(array_04)\n", 67 | "print(array_05)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "### 2.5.2 通过索引和切片访问张量中的数据" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "name": "stdout", 84 | "output_type": "stream", 85 | "text": [ 86 | "[0 1 2 3 4 5 6 7 8 9]\n", 87 | "第4个元素: 3\n", 88 | "第-1个(最后一个)元素: 9\n", 89 | "从0到4切片: [0 1 2 3]\n", 90 | "从0到12切片,步长为4: [0 4 8]\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "array_06 = np.arange(10)\n", 96 | "print(array_06)\n", 97 | "\n", 98 | "index_01 = array_06[3]\n", 99 | "print(\"第4个元素:\", index_01)\n", 100 | "\n", 101 | "index_02 = array_06[-1]\n", 102 | "print(\"第-1个(最后一个)元素:\", index_02)\n", 103 | "\n", 104 | "slice_01 = array_06[:4]\n", 105 | "print(\"从0到4切片:\", slice_01)\n", 106 | "\n", 107 | "slice_02 = array_06[0:12:4]\n", 108 | "print(\"从0到12切片,步长为4:\", slice_02)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 4, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "(60000, 28, 28)\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "# 对MNIST数据切片\n", 126 | "from keras.datasets import mnist\n", 127 | "(X_train, y_train), (X_test, y_test) = mnist.load_data()\n", 128 | "print(X_train.shape)\n", 129 | "\n", 130 | "# 10000:15000表示把样本轴进行了切片,后面两个冒号表示,剩余的两个轴里的数据全部保留。\n", 131 | "X_train_slice = X_train[10000:15000, :, :]" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 5, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "[[4 5 6]] 它的形状是: (1, 3)\n", 144 | "[4 5 6] 它的形状又不同了: (3,)\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "array_07 = np.array([[1, 2, 3], [4, 5, 6]])\n", 150 | "print(array_07[1:2], '它的形状是:', array_07[1:2].shape)\n", 151 | "print(array_07[1:2][0], '它的形状又不同了:', array_07[1:2][0].shape)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### 2.5.3 张量的整体操作和逐元素运算" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 6, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "[[2 3 4]\n", 171 | " [5 6 7]]\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "array_07 += 1\n", 177 | "print(array_07)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 7, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# 等价于\n", 187 | "for i in range(array_07.shape[0]):\n", 188 | " for j in range(array_07.shape[1]):\n", 189 | " array_07[i, j] += 1" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 8, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "[[1.73205081 2. 2.23606798]\n", 202 | " [2.44948974 2.64575131 2.82842712]]\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "# 逐元素的平方根\n", 208 | "print(np.sqrt(array_07))" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "### 2.5.4 张量的变形和转置" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 9, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "name": "stdout", 225 | "output_type": "stream", 226 | "text": [ 227 | "[[3 4 5]\n", 228 | " [6 7 8]] 形状是 (2, 3)\n", 229 | "[[3 4]\n", 230 | " [5 6]\n", 231 | " [7 8]] 形状是 (3, 2)\n", 232 | "[[3 4 5]\n", 233 | " [6 7 8]] 形状是 (2, 3)\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "print(array_07, \"形状是\", array_07.shape)\n", 239 | "print(array_07.reshape(3, 2), \"形状是\", array_07.reshape(3, 2).shape)\n", 240 | "\n", 241 | "# reshape方法不影响元素本身\n", 242 | "print(array_07, \"形状是\", array_07.shape)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 10, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "array_07 = array_07.reshape(3, 2)" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 11, 257 | "metadata": {}, 258 | "outputs": [], 259 | "source": [ 260 | "# 上述转变也成为矩阵转置(transpose)\n", 261 | "array_07 = array_07.T" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 12, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "[0 1 2 3 4 5 6 7 8 9] 形状是 (10,) 阶为 1\n", 274 | "[[0]\n", 275 | " [1]\n", 276 | " [2]\n", 277 | " [3]\n", 278 | " [4]\n", 279 | " [5]\n", 280 | " [6]\n", 281 | " [7]\n", 282 | " [8]\n", 283 | " [9]] 形状是 (10, 1) 阶为 2\n" 284 | ] 285 | } 286 | ], 287 | "source": [ 288 | "array_06 = array_06.reshape(10)\n", 289 | "print(array_06, '形状是', array_06.shape, '阶为', array_06.ndim)\n", 290 | "\n", 291 | "array_06 = array_06.reshape(10, 1)\n", 292 | "print(array_06, '形状是', array_06.shape, '阶为', array_06.ndim)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "### 2.5.5 Python中的广播" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 13, 305 | "metadata": {}, 306 | "outputs": [ 307 | { 308 | "name": "stdout", 309 | "output_type": "stream", 310 | "text": [ 311 | "array_09的形状: (1, 3)\n", 312 | "array_10的形状: (4, 1)\n", 313 | "array_12的形状: (3,)\n", 314 | "array_13的形状: (1,)\n", 315 | "array_14的形状: (1, 1) \n", 316 | "\n", 317 | "08 + 09结果: [[ 0 1 2]\n", 318 | " [10 11 12]\n", 319 | " [20 21 22]\n", 320 | " [30 31 32]] \n", 321 | "\n", 322 | "08 + 10结果: [[ 0 0 0]\n", 323 | " [11 11 11]\n", 324 | " [22 22 22]\n", 325 | " [33 33 33]] \n", 326 | "\n", 327 | "08 + 11结果: [[ 0 1 2]\n", 328 | " [10 11 12]\n", 329 | " [20 21 22]\n", 330 | " [30 31 32]] \n", 331 | "\n", 332 | "08 + 12结果: [[ 0 1 2]\n", 333 | " [10 11 12]\n", 334 | " [20 21 22]\n", 335 | " [30 31 32]] \n", 336 | "\n", 337 | "08 + 13结果: [[ 1 1 1]\n", 338 | " [11 11 11]\n", 339 | " [21 21 21]\n", 340 | " [31 31 31]] \n", 341 | "\n", 342 | "08 + 14结果: [[ 1 1 1]\n", 343 | " [11 11 11]\n", 344 | " [21 21 21]\n", 345 | " [31 31 31]] \n", 346 | "\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "array_08 = np.array([[0, 0, 0], [10, 10, 10], [20, 20, 20], [30, 30, 30]])\n", 352 | "array_09 = np.array([[0, 1, 2]])\n", 353 | "array_10 = np.array([[0], [1], [2], [3]])\n", 354 | "list_11 = [[0, 1, 2]]\n", 355 | "\n", 356 | "print('array_09的形状:', array_09.shape)\n", 357 | "print('array_10的形状:', array_10.shape)\n", 358 | "\n", 359 | "array_12 = array_09.reshape(3)\n", 360 | "print('array_12的形状:', array_12.shape)\n", 361 | "\n", 362 | "array_13 = np.array([1])\n", 363 | "print('array_13的形状:', array_13.shape)\n", 364 | "\n", 365 | "array_14 = array_13.reshape(1, 1)\n", 366 | "print('array_14的形状:', array_14.shape, '\\n')\n", 367 | "\n", 368 | "print ('08 + 09结果:', array_08 + array_09, '\\n')\n", 369 | "print ('08 + 10结果:', array_08 + array_10, '\\n')\n", 370 | "print ('08 + 11结果:', array_08 + list_11, '\\n')\n", 371 | "print ('08 + 12结果:', array_08 + array_12, '\\n')\n", 372 | "print ('08 + 13结果:', array_08 + array_13, '\\n')\n", 373 | "print ('08 + 14结果:', array_08 + array_14, '\\n')" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "### 2.5.6 向量和矩阵的点积运算" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 15, 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "name": "stdout", 390 | "output_type": "stream", 391 | "text": [ 392 | "vector_01的形状: (3,)\n", 393 | "vector_02的形状: (3, 1)\n", 394 | "vector_03的形状: (1,)\n", 395 | "vector_04的形状: (1, 3)\n", 396 | "01和01的点积: 14\n", 397 | "01和02的点积: [14]\n", 398 | "04和02的点积: [[14]]\n", 399 | "04和02的点积: (1, 1)\n", 400 | "01和数字的点积: [2 4 6]\n", 401 | "02和03的点积: [2 4 6]\n", 402 | "02和04的点积: [[1 2 3]\n", 403 | " [2 4 6]\n", 404 | " [3 6 9]]\n" 405 | ] 406 | } 407 | ], 408 | "source": [ 409 | "vector_01 = np.array([1, 2, 3])\n", 410 | "vector_02 = np.array([[1], [2], [3]])\n", 411 | "vector_03 = np.array([2])\n", 412 | "vector_04 = vector_02.reshape(1, 3)\n", 413 | "print('vector_01的形状:', vector_01.shape)\n", 414 | "print('vector_02的形状:', vector_02.shape)\n", 415 | "print('vector_03的形状:', vector_03.shape)\n", 416 | "print('vector_04的形状:', vector_04.shape)\n", 417 | "print('01和01的点积:',np.dot(vector_01,vector_01))\n", 418 | "print('01和02的点积:',np.dot(vector_01,vector_02))\n", 419 | "print('04和02的点积:',np.dot(vector_04,vector_02))\n", 420 | "print('01和数字的点积:',np.dot(vector_01,2))\n", 421 | "print('02和03的点积:',np.dot(vector_02,vector_03))\n", 422 | "print('02和04的点积:',np.dot(vector_02,vector_04))\n", 423 | "# print ('01和03的点积:', np.dot(vector_01,vector_03)) # 程序会报错\n", 424 | "# print ('02和02的点积:', np.dot(vector_02,vector_02))" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 17, 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "name": "stdout", 434 | "output_type": "stream", 435 | "text": [ 436 | "[[0 1 2]\n", 437 | " [3 4 5]]\n", 438 | "[[0 1]\n", 439 | " [2 3]\n", 440 | " [4 5]]\n", 441 | "01和02的点积: [[10 13]\n", 442 | " [28 40]]\n", 443 | "02和01的点积: [[ 3 4 5]\n", 444 | " [ 9 14 19]\n", 445 | " [15 24 33]]\n" 446 | ] 447 | } 448 | ], 449 | "source": [ 450 | "# 矩阵点积\n", 451 | "matrix_01 = np.arange(0, 6).reshape(2, 3)\n", 452 | "matrix_02 = np.arange(0, 6).reshape(3, 2)\n", 453 | "print(matrix_01)\n", 454 | "print(matrix_02)\n", 455 | "print('01和02的点积:', np.dot(matrix_01, matrix_02))\n", 456 | "print('02和01的点积:', np.dot(matrix_02, matrix_01))\n", 457 | "# 失败 \n", 458 | "# print('01和01的点积:', np.dot(matrix_01, matrix_01))" 459 | ] 460 | } 461 | ], 462 | "metadata": { 463 | "interpreter": { 464 | "hash": "694cdcaedaf049a0984f27e4a849c1af591c6b1d7a3cf6d6f220830adff0acba" 465 | }, 466 | "kernelspec": { 467 | "display_name": "Python 3.8.8 ('Vuean_ML')", 468 | "language": "python", 469 | "name": "python3" 470 | }, 471 | "language_info": { 472 | "codemirror_mode": { 473 | "name": "ipython", 474 | "version": 3 475 | }, 476 | "file_extension": ".py", 477 | "mimetype": "text/x-python", 478 | "name": "python", 479 | "nbconvert_exporter": "python", 480 | "pygments_lexer": "ipython3", 481 | "version": "3.8.8" 482 | }, 483 | "orig_nbformat": 4 484 | }, 485 | "nbformat": 4, 486 | "nbformat_minor": 2 487 | } 488 | -------------------------------------------------------------------------------- /Class2/Example/C03_Boston Housing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from keras.datasets import boston_housing\n", 12 | "(X_train, y_train), (X_test, y_test) = boston_housing.load_data()" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "数据集张量形状: (404, 13)\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "print(\"数据集张量形状:\", X_train.shape)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "第一个数据样本:\n", 42 | " [ 1.23247 0. 8.14 0. 0.538 6.142 91.7\n", 43 | " 3.9769 4. 307. 21. 396.9 18.72 ]\n" 44 | ] 45 | } 46 | ], 47 | "source": [ 48 | "print(\"第一个数据样本:\\n\", X_train[0])" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 4, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "第101到200个数据样本:\n", 61 | " [[5.75290e-01 0.00000e+00 6.20000e+00 ... 1.74000e+01 3.85910e+02\n", 62 | " 2.47000e+00]\n", 63 | " [4.75470e-01 0.00000e+00 9.90000e+00 ... 1.84000e+01 3.96230e+02\n", 64 | " 1.27300e+01]\n", 65 | " [1.27440e-01 0.00000e+00 6.91000e+00 ... 1.79000e+01 3.85410e+02\n", 66 | " 4.84000e+00]\n", 67 | " ...\n", 68 | " [9.06000e-03 9.00000e+01 2.97000e+00 ... 1.53000e+01 3.94720e+02\n", 69 | " 7.85000e+00]\n", 70 | " [2.36482e+01 0.00000e+00 1.81000e+01 ... 2.02000e+01 3.96900e+02\n", 71 | " 2.36900e+01]\n", 72 | " [4.98100e-02 2.10000e+01 5.64000e+00 ... 1.68000e+01 3.96900e+02\n", 73 | " 8.43000e+00]]\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "print(\"第101到200个数据样本:\\n\", X_train[101:199])" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "第一个数据样本的标签: 15.2\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "print (\"第一个数据样本的标签:\", y_train[0])" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 7, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "A张量形状: (5,)\n", 108 | "B张量形状: (5, 1)\n", 109 | "点积操作: [35]\n" 110 | ] 111 | } 112 | ], 113 | "source": [ 114 | "A = np.array([1, 2, 3, 4, 5])\n", 115 | "B = np.array([[5], [4], [3], [2], [1]])\n", 116 | "print(\"A张量形状:\", A.shape)\n", 117 | "print(\"B张量形状:\", B.shape)\n", 118 | "print(\"点积操作:\", A.dot(B))\n", 119 | "# print(\"点积操作:\", B.dot(A)) # 报错" 120 | ] 121 | } 122 | ], 123 | "metadata": { 124 | "interpreter": { 125 | "hash": "694cdcaedaf049a0984f27e4a849c1af591c6b1d7a3cf6d6f220830adff0acba" 126 | }, 127 | "kernelspec": { 128 | "display_name": "Python 3.8.8 ('Vuean_ML')", 129 | "language": "python", 130 | "name": "python3" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "ipython", 135 | "version": 3 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "python", 140 | "nbconvert_exporter": "python", 141 | "pygments_lexer": "ipython3", 142 | "version": "3.8.8" 143 | }, 144 | "orig_nbformat": 4 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 2 148 | } 149 | -------------------------------------------------------------------------------- /Class2/Figures/fig01_本章知识导图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig01_本章知识导图.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig02_函数是什么.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig02_函数是什么.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig03_激活函数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig03_激活函数.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig04_凸函数和非凸函数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig04_凸函数和非凸函数.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig05_张量.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig05_张量.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig06_向量的点积运算法则.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig06_向量的点积运算法则.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig07_矩阵的点积规则.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig07_矩阵的点积规则.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig08_时间序列数据集的张量结构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig08_时间序列数据集的张量结构.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig09_图像数据集的张量结构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig09_图像数据集的张量结构.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig10_Python的广播.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig10_Python的广播.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig11_矩阵点积的运算规则.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig11_矩阵点积的运算规则.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig12_二维向量.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig12_二维向量.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig13_二维向量加法.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig13_二维向量加法.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig14_二维向量的点积.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig14_二维向量的点积.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig15_常见的机器学习模型.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig15_常见的机器学习模型.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig16_通过神经网络展开数据流形.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig16_通过神经网络展开数据流形.png -------------------------------------------------------------------------------- /Class2/Figures/fig17_概率的定义和计算公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig17_概率的定义和计算公式.jpg -------------------------------------------------------------------------------- /Class2/Figures/fig18_正态分布形状.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class2/Figures/fig18_正态分布形状.png -------------------------------------------------------------------------------- /Class2/README.md: -------------------------------------------------------------------------------- 1 | # 第二课 数学和Python基础知识 2 | 3 | ![fig01_本章知识导图](./Figures/fig01_本章知识导图.jpg) 4 | 5 | ## 2.1 函数描述了事物间的关系 6 | 7 | ### 2.1.1 什么是函数 8 | 9 | 函数是把一个集里的每一个元素联系到另一个集里一个独一的值。函数反应了两个集合之间的对应关系。 10 | 11 | ![fig02_函数是什么](./Figures/fig02_函数是什么.jpg) 12 | 13 | (1) 输入集的每一个元素X都需要被覆盖; 14 | 15 | (2)函数的输出是**独一无二**的。 16 | 17 | ### 2.1.2 机器学习中的函数 18 | 19 | 机器学习基本上等价于寻找函数的过程,实现了从特征到结果的一个特定推断。在大数据时代的机器学习,不是注重特征到标签之间的**因果逻辑**,而是注重其间的**相关关系**。 20 | 21 | **机器学习算法得到的函数,往往能够看到数据背后隐藏的、肉眼所不能发现的秘密**。 22 | 23 | 机器学习中常见的函数包括: 24 | 25 | 1. 线性函数 26 | 27 | 2. 二次函数和多次函数 28 | 29 | 3. 激活函数 30 | 31 | **激活函数**(activation function),在机器学习算法中实现非线性的、阶跃行致的变换。 32 | 33 | ![fig03_激活函数](./Figures/fig03_激活函数.jpg) 34 | 35 | 4. 对数函数 36 | 37 | 对数是将数轴进行强力的缩放。 38 | 39 | ## 2.2 捕捉函数的变化趋势 40 | 41 | 机器学习所惯性的问题之一是捕捉函数的变化趋势。 42 | 43 | ### 2.2.1 连续性是求导的前提条件 44 | 45 | 连续性是函数的性质之一,它是可以对函数求导的前提条件。 46 | 47 | ### 2.2.2 通过求导发现y如何随x而变 48 | 49 | **导数**(derivative)是定义在连续函数的基础之上的。导数是针对一个变量而言的函数变化趋向。 50 | 51 | ### 2.2.3 凸函数有一个全局最低点 52 | 53 | 凹凸性也是函数的特性之一,(其他特性包括:奇偶性、单调性、周期性等)。 54 | 55 | 凸函数:函数形状连续,函数平滑,只存在一个最低点,整个函数层碗状。 56 | 57 | ![fig04_凸函数和非凸函数](./Figures/fig04_凸函数和非凸函数.jpg) 58 | 59 | ## 2.3 梯度下降是机器学习的动力之源 60 | 61 | ### 2.3.1 什么是梯度 62 | 63 | **对多元函数的各参数求偏导数,然后把所求得的各个参数的偏导数以向量形式写出来,就是梯度。** 64 | 65 | 具体地,函数$f(x_1, x_2)$,对应着机器学习数据集中的两个特征,分别对$x_1$和$x_2$求偏导数,得到的梯度向量就是$(\partial f/\partial x_1, \partial f/\partial x_2)^T$,在数学上可表示为$\Delta f(x_1, x_2)$。 66 | 67 | 计算梯度的意义:几何意义上,梯度就是函数变化的方向,而且是变化最快的方向。 68 | 69 | ### 2.3.2 梯度下降:下山的隐喻 70 | 71 | 对于非凸函数梯度下降不一定总能找到全局最优解,有可能找到一个局部最优解。如果是凸函数,那么梯度下降法理论上就能得到全局最优解。 72 | 73 | ### 2.3.3 梯度下降有什么用 74 | 75 | 梯度下降作用需注意几点: 76 | 77 | - 机器学习的本质是找到最优的函数 78 | 79 | - 如何衡量函是否最优,其方法是尽量减小预测值和真实值间的误差 80 | 81 | - 可以建立误差和模型参数之间的函数 82 | 83 | - 梯度下降能够引导我们走到凸函数的全局最低点,也就是找到误差最小时的参数 84 | 85 | ## 2.4 机器学习的数据结构——张量 86 | 87 | 机器学习中,把用于存储数据的结构称为**张量**(tensor)。 88 | 89 | ### 2.4.1 张量的轴、阶和形状 90 | 91 | 张量是机器学习程序中的数字容器,本质上就是各种不同维度的数组,如下图所示。将张量的维度称为**轴**(axis),轴的个数称为**阶**(rank),NumPy中称为数组的秩。 92 | 93 | ![fig05_张量](./Figures/fig05_张量.jpg) 94 | 95 | 张量的**形状**(shape)就是张量的阶,加上每个阶的维度(每个阶的元素数目)。 96 | 97 | ### 2.4.2 标量——0D(阶)张量 98 | 99 | 仅包含一个数字的张量叫作**标量**(scalar),即0阶张量或0D张量。标量的功能主要在于程序流程控制、设置参数值等。 100 | 101 | ```python 102 | import numpy as np 103 | X = np.array(5) # 创建0D张量,也就是标量 104 | print("X的值:", X) 105 | print("X的阶:", X.ndim) 106 | print("X的数据类型:", X.dtype) 107 | print("X的形状:", X.shape) 108 | 109 | >>> X的值: 5 110 | X的阶: 0 111 | X的数据类型: int32 112 | X的形状: () 113 | ``` 114 | 115 | 标量的形状为(),即标量的阶为0。 116 | 117 | ### 2.4.3 向量——1D(阶)张量 118 | 119 | 由一组数字组成的数组叫作**向量**(vector),也就是一阶张量,1D张量。一阶张量只有一个轴。 120 | 121 | ```python 122 | X = np.array([5, 6, 7, 8, 9]) 123 | print("X的值:", X) 124 | print("X的阶:", X.ndim) 125 | print("X的数据类型:", X.dtype) 126 | print("X的形状:", X.shape) 127 | 128 | >>> 129 | X的值: [5 6 7 8 9] 130 | X的阶: 1 131 | X的数据类型: int32 132 | X的形状: (5,) 133 | ``` 134 | 135 | 上述代码创建了一个包含5个元素的1D张量,需要强调的是,机器学习中**把5个元素的向量称为5维向量**。不要混淆**5维向量**和**5D张量**。 136 | 137 | 载入keras中的boston_housing数据: 138 | 139 | ```python 140 | from keras.datasets import boston_housing 141 | (X_train, y_train), (X_test, y_test) = boston_housing.load_data() 142 | print("X_train的形状:", X_train.shape) 143 | print("X_train中第一个样本的形状:", X_train[0].shape) 144 | print("y_train的形状:", y_train.shape) 145 | 146 | >>> 147 | X_train的形状: (404, 13) 148 | X_train中第一个样本的形状: (13,) 149 | y_train的形状: (404,) 150 | ``` 151 | 152 | 从结果中可以看出,X_train是一个2D矩阵,是404个样本数据的集合。y_train是一个典型的向量。 153 | 154 | X_train[0]表示训练集X_train中的第一行数据,是一个13维向量(1D张量),即每行数据包含13个特征。 155 | 156 | 向量的点积运算法则:两个相同维度的向量对应元素先相乘后相加。向量的点积结果是一个标量,也就是一个值。 157 | 158 | ![fig06_向量的点积运算法则](./Figures/fig06_向量的点积运算法则.jpg) 159 | 160 | 机器学习中最基础的线性回归方法就是根据线性函数去拟合特征和标签的关系,其中的参数$w 161 | $是一个向量,$x$也是一个向量。$x$是特征向量,$w$是权重向量。通过将**特征向量(一个样本)和权重向量做点积,就得到针对该样本的预测目标值** $y‘$。公式如下:$y'=w_0x_0+w_1x_1+w_2x_2+...+w_nx_n$。 162 | 163 | ### 2.4.4 矩阵——2D(阶)张量 164 | 165 | **矩阵**(matrix)是一组向量的集合,也就是2阶张量,2D张量,形状为(m, n)。机器学习中,矩阵形状维(样本,特征)。第一轴是**样本轴**,第二个轴是**特征轴**。 166 | 167 | 以keras中的boston_housing数据为例,这个矩阵的形状是(404, 13),也就是404个样本,13个特征。 168 | 169 | 矩阵的点积:第一个矩阵的行向量,和第二个矩阵的列向量进行点积,然后将结果标量放进新矩阵,作为结果矩阵中的一个元素。 170 | 171 | ![fig07_矩阵的点积规则](./Figures/fig07_矩阵的点积规则.jpg) 172 | 173 | ### 2.4.5 序列数据——3D(阶)张量 174 | 175 | 实际应用中,**序列数据集**才是机器学习中的3D张量。而**时间序列**(time series)是最常见的序列数据集,结构如下: 176 | 177 | ![fig08_时间序列数据集的张量结构](./Figures/fig08_时间序列数据集的张量结构.jpg) 178 | 179 | 180 | ### 2.4.6 图像数据——4D(阶)张量 181 | 182 | 图像本身包括高度、宽度、颜色深度,再加上数据集大小,则图像数据集就形成了4D张量,其形状维(**样本,图像高度,图像宽度,颜色深度**),如MNIST特征数据集的形状为(60000, 28, 28, 1)。 183 | 184 | ![fig09_图像数据集的张量结构](./Figures/fig09_图像数据集的张量结构.jpg) 185 | 186 | ### 2.4.7 视频数据——5D(阶)张量 187 | 188 | 视频可以看作是由一帧一帧的彩色图像组成的数据集。 189 | 190 | - 每一帧都保存在一个形状为(高度,宽度,颜色深度)的3D张量中。 191 | 192 | - 一系列帧则保存在一个形状为(帧,高度,宽度,颜色深度)的4D张量中。 193 | 194 | 因此,视频数据集需要5D张量才放得下,其形状为(**样本,帧,高度,宽度,颜色深度**)。 195 | 196 | ### 2.4.8 数据的维度和空间的维度 197 | 198 | ## 2.5 Python的张量运算 199 | 200 | ### 2.5.1 机器学习中张量的创建 201 | 202 | **机器学习中的张量大多是通过NumPy数组来实现的。** 203 | 204 | 当然,机器学习的数据集并不是在程序里面创建的,大多是**先从文本文件中把所有样本读取至Dataframe格式的数据,然后用array方法或其他方法把Dataframe格式的数据转换为NumPy数组,也就是张量,再进行后续操作**。 205 | 206 | ### 2.5.2 通过索引和切片访问张量中的数据 207 | 208 | 可以通过**索引**(indexing)访问数据集中某个具体数据和**切片**(slicing)访问一个范围内的数据。 209 | 210 | ### 2.5.3 张量的整体操作和逐元素运算 211 | 212 | 张量的算术运算,包括加、减、乘、除、乘方等,既可以整体进行,也可以逐元素进行。 213 | 214 | ### 2.5.4 张量的变形和转置 215 | 216 | ### 2.5.5 Python中的广播 217 | 218 | python的**广播**(broadcasting)功能,是NumPy对形状不完全相同的数组间进行数值计算的方式,可以自动自发地把一个数变成一排的向量,把一个低维的数组变成高维的数组。 219 | 220 | 广播,就是跟着对应阶中维度较大,也就是较为复杂的张量进行填充。用图展示就更为清楚了,如下图所示。图中a的形状是(4, 3),是二阶张量,b的形状是(1, 3),也是二阶张量,那么结果就是把张量b的行进行复制,拉伸成一个形状为(4, 3)的张量,然后再与张量a相加。 221 | 222 | ![fig10_Python的广播](./Figures/fig10_Python的广播.jpg) 223 | 224 | ### 2.5.6 向量和矩阵的点积运算 225 | 226 | 1. 向量的点积运算 227 | 228 | 对于向量$a=[a_1, a_2, ... , a_n]$和向量$b=[b_1, b_2, ... , b_n]$,点积运算规则如下:$a·b=a_1b_1+a_2b_2+...+a_nb_n$。 229 | 230 | 点积运算过程中,需要向量a与向量b的维度相同,结果为标量。但是再Python中要求更为宽松: 231 | 232 | - 形状为(n,)和形状为(n,)的1D向量可以进行点积——结果是一个标量,即数字,且a·b=b·a。 233 | 234 | - 形状为(n,)的1D向量和形状为(1,n)的2D张量可以进行点积(其实(1,n)形状的张量已经是矩阵了,但因为矩阵中有一个阶的维度是1,广义上也可以看作向量)——结果是一个1D形状的数字。 235 | 236 | - 形状为(1,n)的2D张量和形状为(n,)的1D向量可以进行点积——结果是一个1D形状的数字。 237 | 238 | - 形状为(1,n)和形状为(n,1)的2D张量也可以进行点积—结果是一个1D形状的数字。 239 | 240 | - 形状为(1,n)和形状为(1,n)的2D张量不能进行点积——系统会报错shapes(1,n)and(1,n)not aligned:n(dim 1)!=1(dim0)。 241 | 242 | - 形状为(n,1)和形状为(n,1)的2D张量不能进行点积——系统会报错shapes(n,1)and(n,1)not aligned:1(dim1)!=n(dim0)。 243 | 244 | - 形状为(n,)、(n,1)、(1,n)的张量和形状为(1,)的向量或者标量也可以进行点积——Python对后面的向量或标量进行广播,但是结果会有所不同。 245 | 246 | 2. 矩阵的点积运算 247 | 248 | 关于矩阵和矩阵之间的点积,大家就只需要牢记一个原则:**第一个矩阵的第1阶,一定要和第二个矩阵的第0阶维度相同。**即,形状为(a,b)和(b,c)的两个张量中相同的b维度值,是矩阵点积实现的关键,其点积结果矩阵的形状为(a,c)。 249 | 250 | ![fig11_矩阵点积的运算规则](./Figures/fig11_矩阵点积的运算规则.jpg) 251 | 252 | ## 2.6 机器学习的几何意义 253 | 254 | ## 2.6.1 机器学习的向量空间 255 | 256 | 张量,可以理解为某种几何空间内点的坐标。这样,机器学习中特征向量就形成了**特征空间**。例如二维向量A=(0.5, 1),可看作二维空间中的一个点: 257 | 258 | ![fig12_二维向量](./Figures/fig12_二维向量.jpg) 259 | 260 | 张量的运算都有几何意义,二维向量的加法如下图所示: 261 | 262 | ![fig13_二维向量加法](./Figures/fig13_二维向量加法.jpg) 263 | 264 | 二维向量点积的几何意义则是两个向量之间的夹角,以及在向量上的投影: 265 | 266 | ![fig14_二维向量的点积](./Figures/fig14_二维向量的点积.jpg) 267 | 268 | 推而广之:**机器学习模型是在更高维度的几何空间中对特征向量进行操作、变形,计算其间的距离,并寻找从特征向量到标签之间的函数拟合——这就是从几何角度所阐述的机器学习本质**。 269 | 270 | 几种常见的机器学习模型都可以通过特征空间进行几何描述,如下图所示。 271 | 272 | ![fig15_常见的机器学习模型](./Figures/fig15_常见的机器学习模型.jpg) 273 | 274 | ### 2.6.2 深度学习和数据流形 275 | 276 | 深度学习的过程,实际上也就是一个数据提纯的过程。其主要原因是数据特征维度过高,导致特征空间十分复杂,进而导致机器学习建模过程难度过大。有一种思路是通过**流形**(mainfold)学习将高维特征空间中的样本分布群“平铺”至一个低维空间,同时保存原高维空间中样本点之间的局部位置关系。 277 | 278 | 在传统的机器学习中,**流形学习主要用于特征提取和数据降维**,特征提取使特征变得更加友好,降维是因为高维数据通常有冗余。 279 | 280 | 而在深度学习出现之后,有一种说法认为神经网络能够自动自发地将复杂的特征数据流形展开,从而减少了特征提取的需要。从直观上,这个展开过程可以用一团揉皱了的纸来解释,如下图所示。 281 | 282 | ![fig16_通过神经网络展开数据流形](./Figures/fig16_通过神经网络展开数据流形.png) 283 | 284 | 现代的**深度神经网络(Deep Neural Networks,DNN)通过参数学习,展开了高维数据的流形——这可以说是深度学习的几何意义**。 285 | 286 | ## 2.7 概率与统计研究了随机事件的规律 287 | 288 | ### 2.7.1 什么是概率 289 | 290 | 事件分为以下两种。 291 | 292 | - 一种是**确定性事件**。确定性事件又分为以下两种。 293 | 294 | - **必然事件**:如太阳从东方升起,或者水在0℃会结冰。 295 | 296 | - **不可能事件**:如掷一个常规的六面骰子,得到的点数是7。 297 | 298 | - 有大量事件在一定条件下能否发生,是无法确定的,它们是**随机事件**。 299 | 300 | ![fig17_概率的定义和计算公式]() 301 | 302 | 其中,最后一个公式$P(A|B)$,叫作条件概率,也叫后验概率。 303 | 304 | ### 2.7.2 正态分布 305 | 306 | 所谓分布就是一组概率的几何,是把一种常见的概率分布用连续的函数曲线显示出来的方式。 307 | 308 | 正态分布(normal distribution),也叫高斯分布(Gaussian distribution),是一个常见的连续概率分布。也叫做概率分布的钟形曲线(bell curve)。 309 | 310 | ![fig18_正态分布形状](./Figures/fig18_正态分布形状.png) 311 | 312 | ### 2.7.3 标准差和方差 313 | 314 | 正态分布中$\sigma$,代表**标准差**(Standard Devision, SD),也称为**均方差**(mean square error),反映研究总体内个体之间差异程度的一种统计指标。 315 | 316 | 标准差是方差的算术平方根。方差和标准差,描述的都是数据相对于其期望值的离散程度。 317 | 318 | **方差**(variance)是一组资料中各实际数值与其算术平均数(即**均值**(mean),也叫期望值)的差值做平方结果相加之后,再除以总数而得。 -------------------------------------------------------------------------------- /Class3/Example/Ex01/animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Example/Ex01/animation.gif -------------------------------------------------------------------------------- /Class3/Example/Ex01/dataset/advertising.csv: -------------------------------------------------------------------------------- 1 | wechat,weibo,others,sales 2 | 304.4,93.6,294.4,9.7 3 | 1011.9,34.4,398.4,16.7 4 | 1091.1,32.8,295.2,17.3 5 | 85.5,173.6,403.2,7 6 | 1047,302.4,553.6,22.1 7 | 940.9,41.6,155.2,17.2 8 | 1277.2,111.2,296,16.1 9 | 38.2,217.6,16.8,5.7 10 | 342.6,162.4,260,11.3 11 | 347.6,6.4,118.4,9.4 12 | 980.1,188.8,460.8,17.1 13 | 39.1,16.8,8,4.8 14 | 39.6,391.2,600,7.2 15 | 889.1,381.6,423.2,22.4 16 | 633.8,116,81.6,13.4 17 | 527.8,61.6,184.8,11 18 | 203.4,206.4,164.8,10.1 19 | 499.6,382.4,411.2,16.7 20 | 633.4,114.4,204.8,12.2 21 | 437.7,118.4,311.2,12.3 22 | 334,136,103.2,10.9 23 | 1132,216.8,183.2,18.9 24 | 841.3,351.2,13.6,20.7 25 | 435.4,11.2,59.2,11.9 26 | 627.4,371.2,472,15 27 | 599.2,147.2,276.8,12.9 28 | 321.2,128,326.4,10.5 29 | 571.9,295.2,633.6,15.9 30 | 758.9,336,28.8,19.6 31 | 799.4,123.2,19.2,17.1 32 | 314,74.4,7.2,11.3 33 | 108.3,280.8,527.2,9.2 34 | 339.9,395.2,365.6,14.7 35 | 619.7,153.6,132.8,13.2 36 | 227.5,92.8,147.2,8.4 37 | 347.2,220,128,12 38 | 774.4,62.4,281.6,16.7 39 | 1003.3,265.6,303.2,20.1 40 | 60.1,127.2,396.8,5.6 41 | 88.3,128,178.4,6.6 42 | 1280.4,316.8,446.4,24.4 43 | 743.9,294.4,59.2,18 44 | 805.4,267.2,309.6,17.1 45 | 905,395.2,480,23.7 46 | 76.9,349.6,715.2,8.7 47 | 1088.8,124,218.4,20.7 48 | 670.2,191.2,152.8,14.6 49 | 513.7,139.2,308.8,11.9 50 | 1067,27.2,678.4,16.9 51 | 89.2,160.8,136,7.6 52 | 130.1,12,264,7.3 53 | 113.8,88,237.6,7.2 54 | 195.7,207.2,164,9.6 55 | 1000.1,268,360.8,19.6 56 | 283.5,100.8,146.4,9.7 57 | 1245.3,231.2,477.6,20.8 58 | 681.1,284.8,48,17.3 59 | 341.7,280,421.6,12.6 60 | 743,252.8,423.2,16.9 61 | 976.9,192,32,17.4 62 | 1308.6,344,574.4,26.2 63 | 953.7,164.8,85.6,20.9 64 | 1196.2,28,156,17 65 | 488.7,112,87.2,11.5 66 | 1027.4,65.6,452,18.4 67 | 830.8,369.6,469.6,21.2 68 | 984.6,333.6,316.8,22.6 69 | 143.3,196.8,17.6,11 70 | 1092.5,58.4,69.6,18.2 71 | 993.7,221.6,427.2,18 72 | 1290.4,336,529.6,25.5 73 | 638.4,15.2,72,10.3 74 | 355.8,374.4,276,14.6 75 | 854.5,168.8,76,20.6 76 | 3.2,316.8,69.6,1.6 77 | 615.2,333.6,367.2,17.2 78 | 53.2,295.2,361.6,7.3 79 | 401.8,204,587.2,12.9 80 | 1348.6,290.4,807.2,23.8 81 | 78.3,367.2,554.4,12 82 | 1188.9,341.6,437.6,24.2 83 | 1206.7,23.2,344,17.7 84 | 899.1,28,47.2,16.7 85 | 364.9,0,73.6,11.9 86 | 854.9,137.6,143.2,19.7 87 | 1099.7,304,185.6,21.8 88 | 909.1,20.8,169.6,15.6 89 | 1293.6,84.8,51.2,20 90 | 311.2,356,284.8,13.6 91 | 411.3,2.4,185.6,12 92 | 881.3,283.2,604.8,19.2 93 | 1091.5,332,148,23.2 94 | 18.7,92.8,45.6,3.2 95 | 921.4,178.4,252.8,16.6 96 | 1214.4,350.4,40,25.4 97 | 1038.8,135.2,209.6,20.5 98 | 427.2,348,404,15.3 99 | 116.5,312,74.4,9.5 100 | 879.1,147.2,525.6,20.2 101 | 971,196.8,104.8,17 102 | 899.1,186.4,113.6,16.6 103 | 114.2,205.6,346.4,8.5 104 | 78.3,32.8,252.8,5.9 105 | 59.6,3.2,204.8,5.3 106 | 748.5,167.2,379.2,17.5 107 | 681.6,10.4,194.4,10.1 108 | 261.6,262.4,188,11.8 109 | 1083.8,274.4,42.4,20.7 110 | 1322.7,32.8,68,17.8 111 | 753.5,80,140.8,17.6 112 | 1259.9,391.2,334.4,27 113 | 1080.2,40.8,188,17.5 114 | 33.2,224.8,331.2,5.5 115 | 909.1,24.8,276.8,16.4 116 | 1092.5,133.6,183.2,20.9 117 | 1208.5,160,2.4,17.4 118 | 766.2,56.8,102.4,16.7 119 | 467.3,236.8,67.2,14 120 | 611.1,39.2,74.4,14 121 | 202.5,314.4,360.8,10.4 122 | 24.6,239.2,75.2,5.3 123 | 442.3,12,240,13.2 124 | 1301.3,111.2,29.6,20.9 125 | 314.9,164,146.4,11.3 126 | 634.7,16.8,212.8,10.3 127 | 408.1,79.2,285.6,10.6 128 | 560.1,276.8,99.2,15.2 129 | 503.7,324.8,505.6,16 130 | 1154.8,170.4,240,17.6 131 | 1130.2,241.6,162.4,20.2 132 | 932.8,360.8,156.8,22.6 133 | 958.7,236,74.4,18.4 134 | 1044.2,258.4,593.6,19.7 135 | 1274.9,80.8,171.2,19.8 136 | 550.6,67.2,389.6,11.6 137 | 1259,18.4,189.6,16.8 138 | 196.1,213.6,280.8,10.1 139 | 548.3,228,113.6,14.2 140 | 650.2,234.4,100.8,15 141 | 81.4,300.8,172.8,8 142 | 499.6,114.4,253.6,12.4 143 | 1033.8,126.4,399.2,19.8 144 | 219.8,376,68,11.6 145 | 971.4,344,270.4,21.7 146 | 779.4,317.6,301.6,16 147 | 1019.2,19.2,124.8,16.6 148 | 1141.6,292,578.4,22.2 149 | 994.2,43.2,219.2,17.2 150 | 986.4,351.2,217.6,22.3 151 | 1318.1,338.4,409.6,25.4 152 | 300.8,46.4,193.6,12.6 153 | 588.8,45.6,250.4,11 154 | 1056.1,68.8,69.6,18.4 155 | 179.7,328.8,46.4,10.8 156 | 1080.2,220,88,18.9 157 | 255.7,45.6,237.6,8.7 158 | 1011.9,27.2,104.8,16.5 159 | 941.4,67.2,211.2,17.9 160 | 928.7,263.2,368,19 161 | 167.9,308.8,524.8,10.8 162 | 271.2,96,344.8,9.7 163 | 822.6,86.4,467.2,17.9 164 | 1162.1,215.2,44,19.8 165 | 596.5,342.4,231.2,16 166 | 990.5,268,472,19.4 167 | 533.3,117.6,43.2,11.9 168 | 1335.9,221.6,14.4,20.7 169 | 308.5,292.8,912,12.5 170 | 1106.6,392,354.4,25.4 171 | 805.4,74.4,51.2,14.8 172 | 1002.4,392,25.6,24.7 173 | 347.6,213.6,178.4,11.8 174 | 443.6,60.8,57.6,13.7 175 | 389.9,286.4,394.4,13.3 176 | 642.9,214.4,369.6,15.5 177 | 243.4,16,171.2,8.1 178 | 841.3,168,176,20.5 179 | 35.5,311.2,404.8,6.6 180 | 85.1,96.8,187.2,6.7 181 | 784.9,144.8,245.6,16.4 182 | 428.6,39.2,64.8,14 183 | 173.8,29.6,110.4,7.6 184 | 1037.4,301.6,256,21.5 185 | 712.5,20.8,66.4,15.5 186 | 172.9,322.4,95.2,10.9 187 | 456.8,76.8,28.8,10.7 188 | 396.8,94.4,207.2,10.6 189 | 1332.7,226.4,345.6,21.4 190 | 546.9,156.8,92.8,13.2 191 | 857.2,144.8,204.8,19.9 192 | 905.9,244.8,309.6,18.3 193 | 475.9,45.6,275.2,10.4 194 | 959.1,396.8,301.6,23.8 195 | 125.1,12.8,165.6,6.9 196 | 689.3,330.4,468,16.5 197 | 869.5,229.6,145.6,17.3 198 | 1195.3,230.4,127.2,20.2 199 | 121.9,264,154.4,8.8 200 | 343.5,86.4,48,11.9 201 | 796.7,180,252,16.1 202 | -------------------------------------------------------------------------------- /Class3/Example/Ex02/C01 Linear Regression - Boston Housing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 23, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from keras.datasets import boston_housing\n", 12 | "(X_train, y_train), (X_test, y_test) = boston_housing.load_data()" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 24, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def cost_function(X, y, W):\n", 22 | " # X -> 是一个矩阵,形状是(N,M),N是数据集大小,M是特征数量\n", 23 | " # W -> 是一个向量,形状是(M,1)(1*)\n", 24 | " y_hat = X.dot(W.T)\n", 25 | " loss = y_hat - y\n", 26 | " cost = np.sum(loss**2) / len(X)\n", 27 | " return cost" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 25, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def gradient_descent(X, y, W, alpha, iterations):\n", 37 | " l_history = np.zeros(iterations)\n", 38 | " W_history = np.zeros((iterations, len(W)))\n", 39 | " for iter in range(iterations):\n", 40 | " y_hat = X.dot(W)\n", 41 | " loss = y_hat - y\n", 42 | " derivative_W = X.T.dot(loss) / (2 * len(X))\n", 43 | " derivative_W = derivative_W.reshape(len(W))\n", 44 | " W = W - alpha * derivative_W\n", 45 | " l_history[iter] = cost_function(X, y, W)\n", 46 | " W_history[iter] = W\n", 47 | " return l_history, W_history" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 32, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "(404, 13)" 59 | ] 60 | }, 61 | "execution_count": 32, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "X_train.shape" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 33, 73 | "metadata": {}, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": [ 78 | "(13,)" 79 | ] 80 | }, 81 | "execution_count": 33, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "w1 = np.array([0.5,1.2,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])\n", 88 | "w1.shape" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 40, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "(404,)" 100 | ] 101 | }, 102 | "execution_count": 40, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "xtest_1 = X_train.dot(w1)\n", 109 | "xtest_1.shape" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 39, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "data": { 119 | "text/plain": [ 120 | "(404,)" 121 | ] 122 | }, 123 | "execution_count": 39, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "xtest_2 = X_train.dot(w1.T)\n", 130 | "xtest_2.shape" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 26, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "name": "stdout", 140 | "output_type": "stream", 141 | "text": [ 142 | "当前损失: 7661.442353600208\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "# 首先确定参数的初始值\n", 148 | "iterations = 12000\n", 149 | "alpha = 0.00001\n", 150 | "weight = np.array([0.5,1.2,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1])\n", 151 | "#计算一下初始值的损失\n", 152 | "print ('当前损失:',cost_function(X_train, y_train, weight))" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 27, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "# 定义线性回归模型\n", 162 | "def linear_regression(X, y, W, alpha, iterations):\n", 163 | " loss_history, W_history = gradient_descent(X, y, W, alpha, iterations)\n", 164 | " print(\"训练最终损失:\", loss_history[-1]) # 打印最终损失\n", 165 | " y_pred = X.dot(W_history[-1])\n", 166 | " traning_acc = 100 - np.mean(np.abs(y_pred - y) / y) * 100\n", 167 | " print(\"线性回归训练准确率: {:.2f}%\".format(traning_acc)) # 打印准确率\n", 168 | " return loss_history, W_history # 返回训练历史记录" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 28, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "训练最终损失: 46.52456005801284\n", 181 | "线性回归训练准确率: 73.92%\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "loss_history, weight_history = linear_regression(X_train, y_train, weight, alpha, iterations) " 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 29, 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "name": "stdout", 196 | "output_type": "stream", 197 | "text": [ 198 | "权重历史记录: [-1.01999858e-01 1.31171778e-01 -5.73370359e-04 1.20500089e-01\n", 199 | " 1.09279291e-01 3.87936676e-01 1.13007411e-01 8.98427117e-02\n", 200 | " 1.09303884e-01 -3.73064774e-04 2.57257459e-01 3.61469088e-02\n", 201 | " -6.46577070e-01]\n", 202 | "损失历史记录: 46.52456005801284\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "loss_history, weight_history = gradient_descent(X_train, y_train, weight, alpha, iterations) \n", 208 | "print(\"权重历史记录:\", weight_history[-1])\n", 209 | "print(\"损失历史记录:\", loss_history[-1])" 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "interpreter": { 215 | "hash": "694cdcaedaf049a0984f27e4a849c1af591c6b1d7a3cf6d6f220830adff0acba" 216 | }, 217 | "kernelspec": { 218 | "display_name": "Python 3.8.8 ('Vuean_ML')", 219 | "language": "python", 220 | "name": "python3" 221 | }, 222 | "language_info": { 223 | "codemirror_mode": { 224 | "name": "ipython", 225 | "version": 3 226 | }, 227 | "file_extension": ".py", 228 | "mimetype": "text/x-python", 229 | "name": "python", 230 | "nbconvert_exporter": "python", 231 | "pygments_lexer": "ipython3", 232 | "version": "3.8.8" 233 | }, 234 | "orig_nbformat": 4 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /Class3/Example/Ex02/dataset/数据集说明.txt: -------------------------------------------------------------------------------- 1 | 数据集存在于keras及很多其他框架内部,可以通过下述语句导入 2 | from keras.datasets import boston_housing #从Keras中导入Boston Housing数据集 -------------------------------------------------------------------------------- /Class3/Figures/fig01_机器学习的实战架构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig01_机器学习的实战架构.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig02_相关性的热力图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig02_相关性的热力图.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig03_商品销售额和各种广告投放金额之间的散点图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig03_商品销售额和各种广告投放金额之间的散点图.png -------------------------------------------------------------------------------- /Class3/Figures/fig04_数据归一化.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig04_数据归一化.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig05_常见归一化公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig05_常见归一化公式.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig06_微信广告-销售额散点图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig06_微信广告-销售额散点图.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig07_均方误差函数的计算公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig07_均方误差函数的计算公式.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig08_均方误差函数的损失曲线.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig08_均方误差函数的损失曲线.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig09_均方误差函(三维图).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig09_均方误差函(三维图).jpg -------------------------------------------------------------------------------- /Class3/Figures/fig10_梯度下降找到损失最小时的权重.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig10_梯度下降找到损失最小时的权重.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig11_梯度下降过程.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig11_梯度下降过程.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig12_w梯度更新公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig12_w梯度更新公式.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig13_学习速率对优化过程影响.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig13_学习速率对优化过程影响.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig14_损失曲线.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig14_损失曲线.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig15_轮廓图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig15_轮廓图.jpg -------------------------------------------------------------------------------- /Class3/Figures/fig16_多元线性回归模型.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class3/Figures/fig16_多元线性回归模型.jpg -------------------------------------------------------------------------------- /Class3/README.md: -------------------------------------------------------------------------------- 1 | # 第三课 线性回归 2 | 3 | ![fig01_机器学习的实战架构](./Figures/fig01_机器学习的实战架构.jpg) 4 | 5 | 1. 明确定义所要解决的问题——网店销售额的预测。 6 | 7 | 2. 在数据的收集和预处理环节分5个小节完成数据的预处理工作分别如下。 8 | 9 | - 收集数据——提供网店的相关记录。 10 | 11 | - 将收集到的数据可视化显示。 12 | 13 | - 做特征工程使数据更容易被机器处理。 14 | 15 | - 拆分数据集为训练集和测试集。 16 | 17 | - 做特征缩放把数据值压缩到比较小的区间。 18 | 19 | 3. 选择机器学习模型的环节其中有3个主要内容。 20 | 21 | - 确定机器学习的算法——这里即线性回归算法。 22 | 23 | - 确定线性回归算法的假设函数。 24 | 25 | - 确定线性回归算法的损失函数。 26 | 27 | 4. 通过梯度下降训练机器确定模型内部参数的过程。 28 | 29 | 5. 进行超参数调试和性能优化。 30 | 31 | ## 3.1 问题定义——网店广告该如何投放 32 | 33 | 问题定义: 34 | 35 | 1. 各种广告和商品销售额的相关度如何; 36 | 37 | 2. 各种广告和商品销售额之间体现出一种什么关系; 38 | 39 | 3. 哪一种广告对于商品销售额的影响最大; 40 | 41 | 4. 分配特定的广告投放金额预测出未来的商品销售额。 42 | 43 | 本课通过回归分析来寻找这个函数。所谓**回归分析**(regression analysis)是确定两种或两种以上变量间相互依赖的定量关系的一种统计分析方法,也就是研究当自变量x变化时因变量y以何种形式在变化。 44 | 45 | 在机器学习的线性回归分析中,如果只包括一个自变量特征x和一个因变量标签y且两者的关系可用一条直线近似表示这种回归分析就称为**一元线性回归分析**。如果回归分析中包括两个或两个以上的自变量且因变量和自变量之间是线性关系则称为**多元线性回归分析**。 46 | 47 | ## 3.2 数据的收集和预处理 48 | 49 | ### 3.2.1 收集网店销售额数据 50 | 51 | 数据集包含以下内容: 52 | 53 | - 微信广告投入、微博广告投入、其他类型广告投入三个**特征**。 54 | 55 | - **标签**:销售额 56 | 57 | 每一个类型广告的广告投放金额都是一个特征,因此这个数据集中含有3个特征。即该问题是一个多元回归问题。 58 | 59 | ### 3.2.2 数据读取和可视化 60 | 61 | ```python 62 | import numpy as np 63 | import pandas as pd 64 | # 用相对路径读取数据集的前提是:数据集与脚本不在同一个文件下,但同在上一级文件夹。 65 | df_ads = pd.read_csv('./dataset/advertising.csv') 66 | df_ads.head() 67 | ``` 68 | 69 | ### 3.2.3 数据的相关分析 70 | 71 | **相关性分析**(correlation analysis),可了解数据集中任意一对变量之间的相关性。相关性系数是一个-1~1之间的数值,正值表示正相关,负值表示负相关。 72 | 73 | ```python 74 | import matplotlib.pyplot as plt 75 | import seaborn as sns # Seaborn – 统计学数据可视化工具库 76 | # 对所有的标签和特征两两显示其相关性的热力图(heatmap) 77 | sns.heatmap(df_ads.corr(), cmap='YlGnBu', annot=True) 78 | plt.show() 79 | ``` 80 | 81 | ![fig02_相关性的热力图](./Figures/fig02_相关性的热力图.jpg) 82 | 83 | 上图显示了3个特征、1个标签共4组变量之间的相关性系数,相关性越高对应的颜色越深。可以看出,将有限的资金投放微信广告是最合理的选择。 84 | 85 | ### 3.2.4 数据的散点图 86 | 87 | 下面通过散点图scatter plot两两一组显示商品销售额和各种广告投放金额之间的对应关系来将重点聚焦。散点图是回归分析中数据点在直角坐标系平面上的分布图它是相当有效的数据可视化工具。 88 | 89 | ```python 90 | # 显示销量和各种广告投放量的散点图 91 | sns.pairplot(df_ads, 92 | x_vars = ['wechat', 'weibo', 'others'], 93 | y_vars = 'sales', 94 | height =4, aspect=1, kind='scatter') 95 | plt.show() 96 | ``` 97 | 98 | ![fig03_商品销售额和各种广告投放金额之间的散点图](./Figures/fig03_商品销售额和各种广告投放金额之间的散点图.png) 99 | 100 | ### 3.2.5 数据清洗与规范化 101 | 102 | 通过观察相关性和散点图发现,在本案例的3个特征中微信广告投放金额和商品销售额的相关性比较高。因此为了简化模型,我们将暂时忽略微博广告和其他类型广告投放金额这两组特征只留下微信广告投放金额数据。这样就把多变量的回归分析简化为单变量的回归分析。 103 | 104 | ```python 105 | # 构建特征集,只含有微信广告一个特征 106 | X = np.array(df_ads.wechat) 107 | # 构建标签集 108 | y = np.array(df_ads.sales) 109 | 110 | print("张量X的阶:", X.ndim) 111 | print("张量X的形状:", X.shape) 112 | print("张量X的内容:", X) 113 | ``` 114 | 115 | 对于回归问题的数值类型数据集,**机器学习模型所读入的规范格式应该是2D张量,也就是矩阵**,其形状为样本数标签数。其中,**行是数据;列是特征**。 116 | 117 | ```python 118 | # 通过reshape函数把向量转换为矩阵,len函数返回样本个数 119 | X = X.reshape(len(X), 1) 120 | y = y.reshape(len(y), 1) 121 | 122 | print("张量X的形状:", X.shape) 123 | print("张量X的内容:", X) 124 | ``` 125 | 126 | ### 3.2.6 拆分数据集为训练集和测试集 127 | 128 | 将数据集进行80%训练集和20%测试集的分割: 129 | 130 | ```python 131 | # 将数据集进行80%训练集和20%测试集的分割: 132 | from sklearn.model_selection import train_test_split 133 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 134 | ``` 135 | 136 | Sklearn中的`train_test_split`函数是机器学习中拆分数据集的常用工具。 137 | 138 | - `test_size=0.2`表示拆分出来的测试集占总样本量的20%。 139 | 140 | - 因为`shuffle`参数默认值为True,所以数据集已进行随机排序。 141 | 142 | - `random_state`参数则用于数据集拆分过程的随机化设定。如果指定了一个整数那么这个数叫作随机化种子每次设定固定的种子能够保证得到同样的训练集和测试集否则进行随机分割。 143 | 144 | ### 3.2.7 数据归一化 145 | 146 | 特征缩放的方法包括标准化、数据的压缩也叫归一化以及规范化等。 147 | 148 | ![fig04_数据归一化](./Figures/fig04_数据归一化.jpg) 149 | 150 | ![fig05_常见归一化公式](./Figures/fig05_常见归一化公式.jpg) 151 | 152 | 通过Sklearn库中`preprocessing`数据预处理工具中的`Min_Max_Scaler`可以实现数据的归一化。也可以自己定义函数: 153 | 154 | ```python 155 | def scaler(train, test): 156 | min = train.min(axis=0) 157 | max = train.max(axis=0) 158 | gap = max - min 159 | train -= min 160 | train /= gap 161 | test -= min 162 | test /= gap 163 | return train, test 164 | ``` 165 | 166 | 需要注意的是:**不能使用测试集中的数据信息进行特征缩放中间步骤中任何值的计算**。 167 | 168 | 绘制特征压缩后,微信广告-销售额散点图: 169 | 170 | ![fig06_微信广告-销售额散点图](./Figures/fig06_微信广告-销售额散点图.jpg) 171 | 172 | ## 3.3 机器学习模型选择 173 | 174 | 机器学习模型的确立过程中有两个主要环节。 175 | 176 | 1. 确定选用什么类型的模型。 177 | 178 | 2. 确定模型的具体参数。 179 | 180 | ### 3.3.1 确定线性回归模型 181 | 182 | 机器学习中:y=wx+b,其中,w代表权重,b代表偏置。 183 | 184 | ### 3.3.2 假设预测函数——hx 185 | 186 | **假设函数**,hypothesis function,也被称为**预测函数**,predication function。 187 | 188 | ### 3.3.3 算是误差函数——Lwb 189 | 190 | **损失是对糟糕预测的惩罚**。损失也就是误差,也称为成本或代价。样本的损失的大小从几何意义上基本上可以理解为y和y'之间的几何距离。平均距离越大说明误差越大模型越离谱。 191 | 192 | **损失函数**就是用来计算平均损失的,是关于参数w和b的函数。**如果平均损失小参数就好,如果平均损失大模型或者参数就还要继续调整**。 193 | 194 | 常见损失函数: 195 | 196 | - 用于回归的损失函数 197 | 198 | - 均方误差(Mean Square Error, MSE),也叫平方损失、L2损失函数。 199 | 200 | - 平均绝对误差(Mean Absolute Error, MAE),也叫L1损失函数。 201 | 202 | - 平均偏差误差(Mean Bias Error) 203 | 204 | - 用于分类的损失函数 205 | 206 | - 交叉熵损失(cross-entropy loss)函数; 207 | 208 | - 多分类SVM损失函数 209 | 210 | 均方误差函数的计算公式: 211 | 212 | ![fig07_均方误差函数的计算公式](./Figures/fig07_均方误差函数的计算公式.jpg) 213 | 214 | ```python 215 | def loss_function(X, y, weight, bias): 216 | y_hat = weight * X + bias 217 | loss = y_hat - y 218 | cost = np.sum(loss**2) 219 | return cost 220 | ``` 221 | 222 | **对于一个给定的训练样本集而言,L函数是权重w和偏置b的函数,它的大小随着w和b的变化而变**。 223 | 224 | 之所以还要平方是为了让Lwb形成相对于w和b而言的凸函数从而实现梯度下降。 225 | 226 | ## 3.4 通过梯度下降找到最佳参数 227 | 228 | ### 3.4.1 训练机器要有正确的方向 229 | 230 | 是对于线性回归来说有一种方法可以使猜测沿着正确的方向前进因此总能找到比起上一次猜测时误差更小的w和b组合。这种方法就是针对损失函数的**梯度下降**(gradient descent)。 231 | 232 | ### 3.4.2 凸函数确保有最小损失点 233 | 234 | 根据均方误差函数公式,可以看出平均损失L和w的对应关系如下图所示: 235 | 236 | ![fig08_均方误差函数的损失曲线](./Figures/fig08_均方误差函数的损失曲线.jpg) 237 | 238 | ![fig09_均方误差函(三维图)](./Figures/fig09_均方误差函(三维图).jpg) 239 | 240 | 我们将这个函数图像称为**损失曲线**,这是一个凸函数。凸函数的图像会流畅、连续地形成相对于y轴的全局最低点,也就是说存在着**全局最小损失点**。**这也是此处选择MSE作为线性回归的损失函数的原因**。 241 | 242 | ### 3.4.3 梯度下降的实现 243 | 244 | 梯度下降的过程就是在程序中一点点变化参数w和b,使L也就是损失值逐渐趋近最低点也称为机器学习中的**最优解**。 245 | 246 | 程序中用梯度下降法通过求导来计算损失曲线在起点处的梯度。此时梯度就是损失曲线导数的矢量它可以让我们了解哪个方向距离目标“更近”或“更远”。 247 | 248 | - 如果求导后梯度为正值,则说明L正在随着w增大而增大,应该减小w以得到更小的损失。 249 | 250 | - 如果求导后梯度为负值,则说明L正在随着w增大而减小,应该增大w以得到更小的损失。 251 | 252 | 梯度具有以下两个特征: 253 | 254 | - 方向也就是梯度的正负。 255 | 256 | - 大小也就是切线倾斜的幅度。 257 | 258 | 这两个重要的特征,尤其是方向特征确保了梯度始终指向损失函数中增长最为迅猛的方向。**梯度下降法会沿着负梯度的方向走一步以降低损失**,如右图所示。 259 | 260 | ![fig10_梯度下降找到损失最小时的权重](./Figures/fig10_梯度下降找到损失最小时的权重.jpg) 261 | 262 | 通过梯度下降法如果初始估计的w值落在最优值左边,那么梯度下降会将w增大以趋近最低值。如果初始估计的w值落在最优值右边,那么梯度下降会将w减小以趋近最低值。这个逐渐趋近于最优值的过程也叫作损失函数的**收敛**。 263 | 264 | ![fig11_梯度下降过程](./Figures/fig11_梯度下降过程.jpg) 265 | 266 | ```python 267 | def gradient_descent(X, y, w, b, lr, iter): 268 | l_history = np.zeros(iter) 269 | w_history = np.zeros(iter) 270 | b_history = np.zeros(iter) 271 | for i in range(iter): 272 | y_hat = w*X + b 273 | loss = y_hat - y 274 | derivative_w = X.T.dot(loss) / len(X) 275 | derivative_b = sum(loss)*1 / len(X) 276 | w = w - lr * derivative_w 277 | b = b - lr * derivative_b 278 | l_history[i] = loss_function(X, y, w, b) 279 | w_history[i] = w 280 | b_history[i] = b 281 | return l_history, w_history, b_history 282 | ``` 283 | 284 | ### 3.4.4 学习速率也很重要 285 | 286 | **学习速率**(learning rate),记作α,学习速率乘以损失曲线求导之后的微分值就是一次梯度变化的**步长**(step size)。它控制着当前梯度下降的节奏,或快或慢,w将在每一次迭代过程中被更新、优化。 287 | 288 | 引入学习速率后,w随梯度更新的公式如下: 289 | 290 | ![fig12_w梯度更新公式](./Figures/fig12_w梯度更新公式.jpg) 291 | 292 | 在实战中这些内容基本不需要编程人员自己写代码实现。而大多数机器学习从业者真正花费相当多的时间来调试的是像学习速率、迭代次数这样的参数,我们称这类位于模型外部的人工可调节的参数为**超参数**。 293 | 294 | 如果所选择的学习速率过小,机器就会花费很长的学习时间需要迭代很多次才能到达损失函数的最底点,如下面左图所示。相反如果学习速率过大,导致L的变化过大越过了损失曲线的最低点,则下一个点将永远在U形曲线的底部随意弹跳损失可能越来越大,如下面右图所示。 295 | 296 | ![fig13_学习速率对优化过程影响](./Figures/fig13_学习速率对优化过程影响.jpg) 297 | 298 | 寻找最佳学习速率很考验经验和感觉。一个常见的策略是:在机器学习刚刚开始的时候,学习速率可以设置得大一些,快速几步达到靠近最佳权重的位置,当逐渐地接近最佳权重时可以减小学习速率,防止一下子越过最优值。 299 | 300 | ## 3.5 实现一元线性回归模型并调试超参数 301 | 302 | ### 3.5.6 用轮廓图描绘L、w和b的关系 303 | 304 | **轮廓图**(contour plot),损失曲线描绘的是损失和迭代次数之间的关系,而轮廓图则描绘的是L、w和b这3者之间的关系,这样才能够清楚地知道损失值是怎样随着w和b的变化而逐步下降的。 305 | 306 | ![fig14_损失曲线](./Figures/fig14_损失曲线.jpg) 307 | 308 | ![fig15_轮廓图](./Figures/fig15_轮廓图.jpg) 309 | 310 | ## 3.6 实现多元线性回归模型 311 | 312 | 多元即多变量也就是特征是多维的。以本案例为例,涉及3个特征,则假设函数可描述为:$y'=h(x)=b+w_1x_1+w_2x_2+w_3x_3$。 313 | 314 | ### 3.6.1 向量化的点积运算 315 | 316 | 向量化表示:$y'=h(x)=b+w^Tx$。若将偏置b表示为$w_0x_0$,则公式可转变为:$y'=w^Tx$。 317 | 318 | ![fig16_多元线性回归模型](./Figures/fig16_多元线性回归模型.jpg) 319 | 320 | 首先在构建特征数据集时保留所有字段,包括wechat、weibo、others,然后用NumPy的`delete`方法删除标签字段。 321 | 322 | ```python 323 | import numpy as np 324 | import pandas as pd 325 | df_ads = pd.read_csv('./dataset/advertising.csv') 326 | df_ads.head() 327 | 328 | X = np.array(df_ads) 329 | X = np.delete(X, [3], axis=1) # 删除标签 330 | y = np.array(df_ads.sales) 331 | print ("张量X的阶:",X.ndim) 332 | print ("张量X的形状:", X.shape) 333 | print (X) 334 | ``` 335 | 336 | 是给X最前面加上一列,一个新的维度,这个维度所有数值全都是1,是一个哑特征然后把偏置看作w0。 337 | 338 | ```python 339 | x0_train = np.ones((len(X_train), 1))# 构造X_train长度的全1数组配合对Bias的点积 340 | X_train = np.append(x0_train, X_train, axis=1)#把X增加一系列的1 341 | x0_test = np.ones((len(X_test), 1)) 342 | X_test = np.append(x0_test, X_test, axis=1) 343 | print ("张量X的形状:", X_train.shape) 344 | print (X_train) 345 | ``` 346 | 347 | ### 3.6.2 多变量的损失函数和梯度下降 348 | 349 | 损失函数: 350 | 351 | ```python 352 | # 手工定义一个MSE均方误差函数,W此时是一个向量 353 | def loss_function(X, y, W): 354 | y_hat = X.dot(W.T) 355 | loss = y_hat.reshape((len(y_hat), 1)) - y 356 | cost = np.sum(loss**2) / (2*len(X)) 357 | return cost 358 | ``` 359 | 360 | 封装一个梯度下降函数: 361 | 362 | ```python 363 | # 定义梯度下降函数 364 | def gradient_descent(X, y, W, alpha, iterations): 365 | l_history = np.zeros(iterations) 366 | W_history = np.zeros((iterations, len(W))) 367 | for iter in range(iterations): 368 | y_hat = X.dot(W) 369 | loss = y_hat.reshape((len(y_hat), 1)) - y # 中间过程, y_hat和y真值的差 370 | derivative_W = X.T.dot(loss) / (2*len(X)) # 求出多项式的梯度向量 371 | derivative_W = derivative_W.reshape(len(W)) 372 | W = W- alpha * derivative_W.reshape(len(W)) 373 | l_history[iter] = loss_function(X, y, W) 374 | W_history[iter] = W 375 | return l_history, W_history 376 | ``` 377 | 378 | ### 3.6.3 构建一个线性回归函数模型 379 | 380 | 在训练机器之前构建一个线性回归函数,把梯度下降和训练过程封装至一个函数。这可以通过调用线性回归模型来训练机器代码显得比较整齐。 381 | 382 | ```python 383 | # 定义线性回归模型 384 | def linear_regression(X, y, weight, alpha, iterations): 385 | loss_history, weight_history = gradient_descent(X, y, 386 | weight, alpha, iterations) 387 | print("训练最终稿损失:", loss_history[-1]) 388 | y_pred = X.dot(weight_history[-1]) 389 | training_acc = 100 - np.mean(np.abs(y_pred-y))*100 # 计算准确率 390 | print("线性回归训练准确率:{:.2f}%".format(training_acc)) 391 | return loss_history, weight_history 392 | ``` 393 | 394 | ### 3.6.4 初始化权重并训练机器 395 | 396 | 确定初始参数: 397 | 398 | ```python 399 | # 首先确定参数的初始值 400 | iterations = 300 401 | alpha = 0.5 402 | weight = np.array([0.5, 1, 1, 1]) 403 | print('当前损失:', loss_function(X_train, y_train, weight)) 404 | ``` 405 | 406 | 调用线性回归模型,训练机器,并给出最终损失以及基于训练集的预测准确率: 407 | 408 | ```python 409 | # 调用刚才定义的线性回归模型 410 | loss_history, weight_history = linear_regression(X_train, y_train, 411 | weight, alpha, iterations) 412 | 413 | print("权重历史记录:", weight_history[-1]) 414 | print("损失历史记录:", loss_history[-1]) 415 | ``` 416 | 417 | -------------------------------------------------------------------------------- /Class4/Example/Ex01/dataset/heart.csv: -------------------------------------------------------------------------------- 1 | age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target 2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1 3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1 4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1 5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1 6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1 7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1 8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1 9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1 100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1 101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1 102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1 103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1 104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1 105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1 106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1 107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1 108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1 109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1 110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1 111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1 112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1 113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1 114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1 115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1 116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1 117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1 118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1 119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1 120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1 121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1 122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1 123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1 124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1 125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1 126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1 127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1 128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1 129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1 130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1 131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1 132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1 133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1 134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1 135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1 136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1 137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1 138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1 139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1 140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1 141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1 142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1 143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1 144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1 145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1 146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1 147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1 148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1 149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1 150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1 151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1 152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1 153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1 154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1 155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1 156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1 157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1 158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1 159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1 160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1 161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1 162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1 163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1 164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1 165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1 166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1 167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0 168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0 169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0 170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0 171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0 172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0 173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0 174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0 175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0 176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0 177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0 178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0 179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0 180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0 181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0 182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0 183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0 184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0 185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0 186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0 187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0 188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0 189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0 190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0 191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0 192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0 193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0 194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0 195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0 196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0 197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0 198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0 199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0 200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0 201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0 202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0 203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0 204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0 205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0 206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0 207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0 208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0 209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0 210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0 211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0 212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0 213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0 214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0 215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0 216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0 217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0 218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0 219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0 220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0 221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0 222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0 223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0 224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0 225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0 226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0 227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0 228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0 229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0 230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0 231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0 232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0 233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0 234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0 235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0 236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0 237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0 238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0 239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0 240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0 241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0 242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0 243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0 244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0 245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0 246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0 247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0 248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0 249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0 250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0 251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0 252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0 253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0 254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0 255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0 256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0 257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0 258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0 259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0 260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0 261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0 262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0 263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0 264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0 265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0 266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0 267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0 268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0 269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0 270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0 271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0 272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0 273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0 274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0 275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0 276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0 277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0 278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0 279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0 280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0 281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0 282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0 283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0 284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0 285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0 286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0 287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0 288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0 289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0 290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0 291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0 292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0 293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0 294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0 295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0 296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0 297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0 298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0 299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0 300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0 301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0 302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0 303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0 304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0 305 | -------------------------------------------------------------------------------- /Class4/Example/Ex02/dataset/Iris.csv: -------------------------------------------------------------------------------- 1 | Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species 2 | 1,5.1,3.5,1.4,0.2,Iris-setosa 3 | 2,4.9,3.0,1.4,0.2,Iris-setosa 4 | 3,4.7,3.2,1.3,0.2,Iris-setosa 5 | 4,4.6,3.1,1.5,0.2,Iris-setosa 6 | 5,5.0,3.6,1.4,0.2,Iris-setosa 7 | 6,5.4,3.9,1.7,0.4,Iris-setosa 8 | 7,4.6,3.4,1.4,0.3,Iris-setosa 9 | 8,5.0,3.4,1.5,0.2,Iris-setosa 10 | 9,4.4,2.9,1.4,0.2,Iris-setosa 11 | 10,4.9,3.1,1.5,0.1,Iris-setosa 12 | 11,5.4,3.7,1.5,0.2,Iris-setosa 13 | 12,4.8,3.4,1.6,0.2,Iris-setosa 14 | 13,4.8,3.0,1.4,0.1,Iris-setosa 15 | 14,4.3,3.0,1.1,0.1,Iris-setosa 16 | 15,5.8,4.0,1.2,0.2,Iris-setosa 17 | 16,5.7,4.4,1.5,0.4,Iris-setosa 18 | 17,5.4,3.9,1.3,0.4,Iris-setosa 19 | 18,5.1,3.5,1.4,0.3,Iris-setosa 20 | 19,5.7,3.8,1.7,0.3,Iris-setosa 21 | 20,5.1,3.8,1.5,0.3,Iris-setosa 22 | 21,5.4,3.4,1.7,0.2,Iris-setosa 23 | 22,5.1,3.7,1.5,0.4,Iris-setosa 24 | 23,4.6,3.6,1.0,0.2,Iris-setosa 25 | 24,5.1,3.3,1.7,0.5,Iris-setosa 26 | 25,4.8,3.4,1.9,0.2,Iris-setosa 27 | 26,5.0,3.0,1.6,0.2,Iris-setosa 28 | 27,5.0,3.4,1.6,0.4,Iris-setosa 29 | 28,5.2,3.5,1.5,0.2,Iris-setosa 30 | 29,5.2,3.4,1.4,0.2,Iris-setosa 31 | 30,4.7,3.2,1.6,0.2,Iris-setosa 32 | 31,4.8,3.1,1.6,0.2,Iris-setosa 33 | 32,5.4,3.4,1.5,0.4,Iris-setosa 34 | 33,5.2,4.1,1.5,0.1,Iris-setosa 35 | 34,5.5,4.2,1.4,0.2,Iris-setosa 36 | 35,4.9,3.1,1.5,0.1,Iris-setosa 37 | 36,5.0,3.2,1.2,0.2,Iris-setosa 38 | 37,5.5,3.5,1.3,0.2,Iris-setosa 39 | 38,4.9,3.1,1.5,0.1,Iris-setosa 40 | 39,4.4,3.0,1.3,0.2,Iris-setosa 41 | 40,5.1,3.4,1.5,0.2,Iris-setosa 42 | 41,5.0,3.5,1.3,0.3,Iris-setosa 43 | 42,4.5,2.3,1.3,0.3,Iris-setosa 44 | 43,4.4,3.2,1.3,0.2,Iris-setosa 45 | 44,5.0,3.5,1.6,0.6,Iris-setosa 46 | 45,5.1,3.8,1.9,0.4,Iris-setosa 47 | 46,4.8,3.0,1.4,0.3,Iris-setosa 48 | 47,5.1,3.8,1.6,0.2,Iris-setosa 49 | 48,4.6,3.2,1.4,0.2,Iris-setosa 50 | 49,5.3,3.7,1.5,0.2,Iris-setosa 51 | 50,5.0,3.3,1.4,0.2,Iris-setosa 52 | 51,7.0,3.2,4.7,1.4,Iris-versicolor 53 | 52,6.4,3.2,4.5,1.5,Iris-versicolor 54 | 53,6.9,3.1,4.9,1.5,Iris-versicolor 55 | 54,5.5,2.3,4.0,1.3,Iris-versicolor 56 | 55,6.5,2.8,4.6,1.5,Iris-versicolor 57 | 56,5.7,2.8,4.5,1.3,Iris-versicolor 58 | 57,6.3,3.3,4.7,1.6,Iris-versicolor 59 | 58,4.9,2.4,3.3,1.0,Iris-versicolor 60 | 59,6.6,2.9,4.6,1.3,Iris-versicolor 61 | 60,5.2,2.7,3.9,1.4,Iris-versicolor 62 | 61,5.0,2.0,3.5,1.0,Iris-versicolor 63 | 62,5.9,3.0,4.2,1.5,Iris-versicolor 64 | 63,6.0,2.2,4.0,1.0,Iris-versicolor 65 | 64,6.1,2.9,4.7,1.4,Iris-versicolor 66 | 65,5.6,2.9,3.6,1.3,Iris-versicolor 67 | 66,6.7,3.1,4.4,1.4,Iris-versicolor 68 | 67,5.6,3.0,4.5,1.5,Iris-versicolor 69 | 68,5.8,2.7,4.1,1.0,Iris-versicolor 70 | 69,6.2,2.2,4.5,1.5,Iris-versicolor 71 | 70,5.6,2.5,3.9,1.1,Iris-versicolor 72 | 71,5.9,3.2,4.8,1.8,Iris-versicolor 73 | 72,6.1,2.8,4.0,1.3,Iris-versicolor 74 | 73,6.3,2.5,4.9,1.5,Iris-versicolor 75 | 74,6.1,2.8,4.7,1.2,Iris-versicolor 76 | 75,6.4,2.9,4.3,1.3,Iris-versicolor 77 | 76,6.6,3.0,4.4,1.4,Iris-versicolor 78 | 77,6.8,2.8,4.8,1.4,Iris-versicolor 79 | 78,6.7,3.0,5.0,1.7,Iris-versicolor 80 | 79,6.0,2.9,4.5,1.5,Iris-versicolor 81 | 80,5.7,2.6,3.5,1.0,Iris-versicolor 82 | 81,5.5,2.4,3.8,1.1,Iris-versicolor 83 | 82,5.5,2.4,3.7,1.0,Iris-versicolor 84 | 83,5.8,2.7,3.9,1.2,Iris-versicolor 85 | 84,6.0,2.7,5.1,1.6,Iris-versicolor 86 | 85,5.4,3.0,4.5,1.5,Iris-versicolor 87 | 86,6.0,3.4,4.5,1.6,Iris-versicolor 88 | 87,6.7,3.1,4.7,1.5,Iris-versicolor 89 | 88,6.3,2.3,4.4,1.3,Iris-versicolor 90 | 89,5.6,3.0,4.1,1.3,Iris-versicolor 91 | 90,5.5,2.5,4.0,1.3,Iris-versicolor 92 | 91,5.5,2.6,4.4,1.2,Iris-versicolor 93 | 92,6.1,3.0,4.6,1.4,Iris-versicolor 94 | 93,5.8,2.6,4.0,1.2,Iris-versicolor 95 | 94,5.0,2.3,3.3,1.0,Iris-versicolor 96 | 95,5.6,2.7,4.2,1.3,Iris-versicolor 97 | 96,5.7,3.0,4.2,1.2,Iris-versicolor 98 | 97,5.7,2.9,4.2,1.3,Iris-versicolor 99 | 98,6.2,2.9,4.3,1.3,Iris-versicolor 100 | 99,5.1,2.5,3.0,1.1,Iris-versicolor 101 | 100,5.7,2.8,4.1,1.3,Iris-versicolor 102 | 101,6.3,3.3,6.0,2.5,Iris-virginica 103 | 102,5.8,2.7,5.1,1.9,Iris-virginica 104 | 103,7.1,3.0,5.9,2.1,Iris-virginica 105 | 104,6.3,2.9,5.6,1.8,Iris-virginica 106 | 105,6.5,3.0,5.8,2.2,Iris-virginica 107 | 106,7.6,3.0,6.6,2.1,Iris-virginica 108 | 107,4.9,2.5,4.5,1.7,Iris-virginica 109 | 108,7.3,2.9,6.3,1.8,Iris-virginica 110 | 109,6.7,2.5,5.8,1.8,Iris-virginica 111 | 110,7.2,3.6,6.1,2.5,Iris-virginica 112 | 111,6.5,3.2,5.1,2.0,Iris-virginica 113 | 112,6.4,2.7,5.3,1.9,Iris-virginica 114 | 113,6.8,3.0,5.5,2.1,Iris-virginica 115 | 114,5.7,2.5,5.0,2.0,Iris-virginica 116 | 115,5.8,2.8,5.1,2.4,Iris-virginica 117 | 116,6.4,3.2,5.3,2.3,Iris-virginica 118 | 117,6.5,3.0,5.5,1.8,Iris-virginica 119 | 118,7.7,3.8,6.7,2.2,Iris-virginica 120 | 119,7.7,2.6,6.9,2.3,Iris-virginica 121 | 120,6.0,2.2,5.0,1.5,Iris-virginica 122 | 121,6.9,3.2,5.7,2.3,Iris-virginica 123 | 122,5.6,2.8,4.9,2.0,Iris-virginica 124 | 123,7.7,2.8,6.7,2.0,Iris-virginica 125 | 124,6.3,2.7,4.9,1.8,Iris-virginica 126 | 125,6.7,3.3,5.7,2.1,Iris-virginica 127 | 126,7.2,3.2,6.0,1.8,Iris-virginica 128 | 127,6.2,2.8,4.8,1.8,Iris-virginica 129 | 128,6.1,3.0,4.9,1.8,Iris-virginica 130 | 129,6.4,2.8,5.6,2.1,Iris-virginica 131 | 130,7.2,3.0,5.8,1.6,Iris-virginica 132 | 131,7.4,2.8,6.1,1.9,Iris-virginica 133 | 132,7.9,3.8,6.4,2.0,Iris-virginica 134 | 133,6.4,2.8,5.6,2.2,Iris-virginica 135 | 134,6.3,2.8,5.1,1.5,Iris-virginica 136 | 135,6.1,2.6,5.6,1.4,Iris-virginica 137 | 136,7.7,3.0,6.1,2.3,Iris-virginica 138 | 137,6.3,3.4,5.6,2.4,Iris-virginica 139 | 138,6.4,3.1,5.5,1.8,Iris-virginica 140 | 139,6.0,3.0,4.8,1.8,Iris-virginica 141 | 140,6.9,3.1,5.4,2.1,Iris-virginica 142 | 141,6.7,3.1,5.6,2.4,Iris-virginica 143 | 142,6.9,3.1,5.1,2.3,Iris-virginica 144 | 143,5.8,2.7,5.1,1.9,Iris-virginica 145 | 144,6.8,3.2,5.9,2.3,Iris-virginica 146 | 145,6.7,3.3,5.7,2.5,Iris-virginica 147 | 146,6.7,3.0,5.2,2.3,Iris-virginica 148 | 147,6.3,2.5,5.0,1.9,Iris-virginica 149 | 148,6.5,3.0,5.2,2.0,Iris-virginica 150 | 149,6.2,3.4,5.4,2.3,Iris-virginica 151 | 150,5.9,3.0,5.1,1.8,Iris-virginica 152 | -------------------------------------------------------------------------------- /Class4/Example/Ex02/dataset/数据集说明.txt: -------------------------------------------------------------------------------- 1 | 鸢尾花数据集,在Keras自带的Dataset中也有,因此大家做练习时有两个选择,既可以上载Iris.csv文件,读入DataFrame,也可以直接使用Keras自带的Iris Dataset,直接生成Numpy张量。 2 | 源码包的示例中为了简化操作,直接使用Keras自带的Iris Dataset数据集。 -------------------------------------------------------------------------------- /Class4/Example/Ex03/dataset/数据集说明.txt: -------------------------------------------------------------------------------- 1 | 在这个竞赛数据集页面中,还包含有如何参与Kaggle机器学习竞赛和提交结果文件的信息。读者可以尝试参与。 -------------------------------------------------------------------------------- /Class4/Figures/fig01_逻辑函数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig01_逻辑函数.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig02_Sigmoid函数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig02_Sigmoid函数.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig03_逻辑回归模型示意.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig03_逻辑回归模型示意.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig04_逻辑回归损失函数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig04_逻辑回归损失函数.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig05_逻辑回归损失函数曲线.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig05_逻辑回归损失函数曲线.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig06_逻辑回归梯度计算公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig06_逻辑回归梯度计算公式.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig07_散点图显示年龄-最大心率和标签之间的关系.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig07_散点图显示年龄-最大心率和标签之间的关系.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig08_训练集和测试集的损失曲线.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig08_训练集和测试集的损失曲线.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig09_多元分类示意.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig09_多元分类示意.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig10_机器学习模型对数据集的拟合.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig10_机器学习模型对数据集的拟合.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig11_寻找模型优化和泛化的平衡点.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig11_寻找模型优化和泛化的平衡点.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig12_3种鸢尾花样本的分布.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig12_3种鸢尾花样本的分布.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig13_不同C值带来不同的分类边界.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig13_不同C值带来不同的分类边界.jpg -------------------------------------------------------------------------------- /Class4/Figures/fig14_不同C值来不同的分类准确率.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class4/Figures/fig14_不同C值来不同的分类准确率.jpg -------------------------------------------------------------------------------- /Class5/Example/Ex01/C02 Using TensorBoard.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Using TensorBoard" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/html": [ 18 | "\n", 19 | " \n", 21 | " \n", 32 | " " 33 | ], 34 | "text/plain": [ 35 | "" 36 | ] 37 | }, 38 | "metadata": {}, 39 | "output_type": "display_data" 40 | } 41 | ], 42 | "source": [ 43 | "# 导入并激活TensorBoard\n", 44 | "%load_ext tensorboard\n", 45 | "%tensorboard --logdir logs" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# 创建机器学习模型\n", 55 | "import tensorflow as tf\n", 56 | "mnist = tf.keras.datasets.mnist\n", 57 | "((x_train, y_train), (x_test, y_test)) = mnist.load_data()\n", 58 | "(x_train, x_test) = (x_train / 255.0, x_test / 255.0)\n", 59 | "model = tf.keras.models.Sequential([\n", 60 | " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", 61 | " tf.keras.layers.Dense(512, activation=tf.nn.relu),\n", 62 | " tf.keras.layers.Dropout(0.2),\n", 63 | " tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n", 64 | "])\n", 65 | "model.compile(\n", 66 | " optimizer='adam',\n", 67 | " loss='sparse_categorical_crossentropy',\n", 68 | " metrics=['accuracy'],\n", 69 | ")" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Epoch 1/5\n", 82 | " 1/1875 [..............................] - ETA: 0s - loss: 2.3535 - accuracy: 0.1562WARNING:tensorflow:From d:\\Anaconda3\\envs\\Vuean_ML\\lib\\site-packages\\tensorflow\\python\\ops\\summary_ops_v2.py:1277: stop (from tensorflow.python.eager.profiler) is deprecated and will be removed after 2020-07-01.\n", 83 | "Instructions for updating:\n", 84 | "use `tf.profiler.experimental.stop` instead.\n", 85 | " 2/1875 [..............................] - ETA: 46s - loss: 2.2921 - accuracy: 0.1719WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0030s vs `on_train_batch_end` time: 0.0470s). Check your callbacks.\n", 86 | "1875/1875 [==============================] - 4s 2ms/step - loss: 0.2189 - accuracy: 0.9342\n", 87 | "Epoch 2/5\n", 88 | "1875/1875 [==============================] - 3s 1ms/step - loss: 0.0972 - accuracy: 0.9702\n", 89 | "Epoch 3/5\n", 90 | "1875/1875 [==============================] - 3s 2ms/step - loss: 0.0686 - accuracy: 0.9783\n", 91 | "Epoch 4/5\n", 92 | "1875/1875 [==============================] - 3s 2ms/step - loss: 0.0535 - accuracy: 0.9825\n", 93 | "Epoch 5/5\n", 94 | "1875/1875 [==============================] - 3s 2ms/step - loss: 0.0420 - accuracy: 0.9864\n" 95 | ] 96 | }, 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "" 101 | ] 102 | }, 103 | "execution_count": 3, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "# 回调Tensorboard\n", 110 | "tensorboard_callback = tf.keras.callbacks.TensorBoard(\"logs\")\n", 111 | "model.fit(\n", 112 | " x_train,\n", 113 | " y_train,\n", 114 | " epochs=5,\n", 115 | " callbacks=[tensorboard_callback],\n", 116 | ")" 117 | ] 118 | } 119 | ], 120 | "metadata": { 121 | "interpreter": { 122 | "hash": "96bbb65fb5df4d9cc0d3b437c46ecfe8c6742e111c114025f0cea31b14306341" 123 | }, 124 | "kernelspec": { 125 | "display_name": "Python 3.8.8 ('Vuean_ML')", 126 | "language": "python", 127 | "name": "python3" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.8.8" 140 | }, 141 | "orig_nbformat": 4 142 | }, 143 | "nbformat": 4, 144 | "nbformat_minor": 2 145 | } 146 | -------------------------------------------------------------------------------- /Class5/Example/Ex02/dataset/数据集说明.txt: -------------------------------------------------------------------------------- 1 | 在这个竞赛数据集页面中,还包含有如何参与Kaggle机器学习竞赛和提交结果文件的信息。读者可以尝试参与。 -------------------------------------------------------------------------------- /Class5/figures/fig01_在大数据领域神经网络的效能显著地领先于其他算法.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig01_在大数据领域神经网络的效能显著地领先于其他算法.jpg -------------------------------------------------------------------------------- /Class5/figures/fig02_神经元.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig02_神经元.jpg -------------------------------------------------------------------------------- /Class5/figures/fig03_与逻辑判断.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig03_与逻辑判断.jpg -------------------------------------------------------------------------------- /Class5/figures/fig04_或逻辑判断.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig04_或逻辑判断.jpg -------------------------------------------------------------------------------- /Class5/figures/fig05_假设空间与拟合能力.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig05_假设空间与拟合能力.jpg -------------------------------------------------------------------------------- /Class5/figures/fig06_同或数据集.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig06_同或数据集.jpg -------------------------------------------------------------------------------- /Class5/figures/fig07_非线性模型.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig07_非线性模型.jpg -------------------------------------------------------------------------------- /Class5/figures/fig08_加入网络隐层拟合同或数据集.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig08_加入网络隐层拟合同或数据集.jpg -------------------------------------------------------------------------------- /Class5/figures/fig09_银行客户数据的分布情况.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig09_银行客户数据的分布情况.jpg -------------------------------------------------------------------------------- /Class5/figures/fig10_神经网络结构及对应层生成的语句.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig10_神经网络结构及对应层生成的语句.jpg -------------------------------------------------------------------------------- /Class5/figures/fig10_训练集和验证集上的损失曲线和准确率曲线.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig10_训练集和验证集上的损失曲线和准确率曲线.png -------------------------------------------------------------------------------- /Class5/figures/fig11_数据真值与预测值对比图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig11_数据真值与预测值对比图.png -------------------------------------------------------------------------------- /Class5/figures/fig12_预测值和真值对照表.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig12_预测值和真值对照表.png -------------------------------------------------------------------------------- /Class5/figures/fig13_混淆矩阵.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig13_混淆矩阵.jpg -------------------------------------------------------------------------------- /Class5/figures/fig14_精确率计算公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig14_精确率计算公式.jpg -------------------------------------------------------------------------------- /Class5/figures/fig15_召回率计算公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig15_召回率计算公式.jpg -------------------------------------------------------------------------------- /Class5/figures/fig16_F1分数计算公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig16_F1分数计算公式.jpg -------------------------------------------------------------------------------- /Class5/figures/fig17_单层神经网络的混淆矩阵.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig17_单层神经网络的混淆矩阵.jpg -------------------------------------------------------------------------------- /Class5/figures/fig18_标准化公式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig18_标准化公式.jpg -------------------------------------------------------------------------------- /Class5/figures/fig19_特征缩放后的损失曲线和准确率曲线.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig19_特征缩放后的损失曲线和准确率曲线.png -------------------------------------------------------------------------------- /Class5/figures/fig20_特征缩放后的混淆矩.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig20_特征缩放后的混淆矩.png -------------------------------------------------------------------------------- /Class5/figures/fig21_从逻辑回归到深度神经网络的演进.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig21_从逻辑回归到深度神经网络的演进.jpg -------------------------------------------------------------------------------- /Class5/figures/fig22_局部最低点和鞍点.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig22_局部最低点和鞍点.jpg -------------------------------------------------------------------------------- /Class5/figures/fig23_动量SGD示意图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig23_动量SGD示意图.jpg -------------------------------------------------------------------------------- /Class5/figures/fig24_神经元的激活过程.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig24_神经元的激活过程.jpg -------------------------------------------------------------------------------- /Class5/figures/fig25_Sigmoid函数图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig25_Sigmoid函数图像.jpg -------------------------------------------------------------------------------- /Class5/figures/fig26_Tanh函数图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig26_Tanh函数图像.jpg -------------------------------------------------------------------------------- /Class5/figures/fig27_ReLU函数图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig27_ReLU函数图像.jpg -------------------------------------------------------------------------------- /Class5/figures/fig28_Leaky ReLU函数图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig28_Leaky ReLU函数图像.jpg -------------------------------------------------------------------------------- /Class5/figures/fig29_eLU函数图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig29_eLU函数图像.jpg -------------------------------------------------------------------------------- /Class5/figures/fig30_Softmax多分类输出层的激活函数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig30_Softmax多分类输出层的激活函数.jpg -------------------------------------------------------------------------------- /Class5/figures/fig31_验证集上损失的波动很大.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig31_验证集上损失的波动很大.jpg -------------------------------------------------------------------------------- /Class5/figures/fig32_过拟合现象仍然存在.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig32_过拟合现象仍然存在.jpg -------------------------------------------------------------------------------- /Class5/figures/fig33_Dropout示意图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig33_Dropout示意图.jpg -------------------------------------------------------------------------------- /Class5/figures/fig34_添加Dropout层之后过拟合现象被大幅度地抑制.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig34_添加Dropout层之后过拟合现象被大幅度地抑制.jpg -------------------------------------------------------------------------------- /Class5/figures/fig35_新的混淆矩阵.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class5/figures/fig35_新的混淆矩阵.png -------------------------------------------------------------------------------- /Class6/figures/fig01_程序编译出来的卷积网络的结构信息.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig01_程序编译出来的卷积网络的结构信息.jpg -------------------------------------------------------------------------------- /Class6/figures/fig02_卷积网络的典型架构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig02_卷积网络的典型架构.jpg -------------------------------------------------------------------------------- /Class6/figures/fig03_全连接和局部连接的示意.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig03_全连接和局部连接的示意.jpg -------------------------------------------------------------------------------- /Class6/figures/fig04_字母X中的小模式.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig04_字母X中的小模式.jpg -------------------------------------------------------------------------------- /Class6/figures/fig05_通过滑动窗口抽取局部特征.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig05_通过滑动窗口抽取局部特征.jpg -------------------------------------------------------------------------------- /Class6/figures/fig06_不同的过滤器抽取不同类型的特征.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig06_不同的过滤器抽取不同类型的特征.jpg -------------------------------------------------------------------------------- /Class6/figures/fig07_MNIST案例中第一个卷积层的输入特征图和输出特征图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig07_MNIST案例中第一个卷积层的输入特征图和输出特征图.jpg -------------------------------------------------------------------------------- /Class6/figures/fig08_MNIST案例中第二个卷积层的输入特征图和输出特征图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig08_MNIST案例中第二个卷积层的输入特征图和输出特征图.jpg -------------------------------------------------------------------------------- /Class6/figures/fig09_卷积运算示意——黑白图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig09_卷积运算示意——黑白图像.jpg -------------------------------------------------------------------------------- /Class6/figures/fig10_卷积运算示意——RGB图像.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig10_卷积运算示意——RGB图像.jpg -------------------------------------------------------------------------------- /Class6/figures/fig11_边界效益.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig11_边界效益.jpg -------------------------------------------------------------------------------- /Class6/figures/fig12_填充操作.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig12_填充操作.jpg -------------------------------------------------------------------------------- /Class6/figures/fig13_步幅为2的步进卷积.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig13_步幅为2的步进卷积.jpg -------------------------------------------------------------------------------- /Class6/figures/fig14_程序编译出的卷积网络结构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig14_程序编译出的卷积网络结构.png -------------------------------------------------------------------------------- /Class6/figures/fig15_损失曲线和准确率曲线.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig15_损失曲线和准确率曲线.png -------------------------------------------------------------------------------- /Class6/figures/fig16_第一次调优后损失曲线和准确率曲线.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig16_第一次调优后损失曲线和准确率曲线.png -------------------------------------------------------------------------------- /Class6/figures/fig17_第二次调优后损失曲线和准确率曲线.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig17_第二次调优后损失曲线和准确率曲线.png -------------------------------------------------------------------------------- /Class6/figures/fig18_对同一张图片进行数据增强.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig18_对同一张图片进行数据增强.jpg -------------------------------------------------------------------------------- /Class6/figures/fig19_数据增强后损失曲线和准确率曲线.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig19_数据增强后损失曲线和准确率曲线.png -------------------------------------------------------------------------------- /Class6/figures/fig20_各种大型卷积网络性能比较.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig20_各种大型卷积网络性能比较.jpg -------------------------------------------------------------------------------- /Class6/figures/fig21_VGGNet架构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig21_VGGNet架构.jpg -------------------------------------------------------------------------------- /Class6/figures/fig22_Inception模块的基本架构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class6/figures/fig22_Inception模块的基本架构.jpg -------------------------------------------------------------------------------- /Class7/Example/Ex01/C01 RNN - Comments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "找到了13个词\n", 13 | "这3句话(单词)的序号编码 [[2, 3, 4, 1, 5, 6], [7, 8, 9, 1, 10, 11], [12, 13]]\n", 14 | "这3句话单词的One-hot编码 [[0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 15 | " 0. 0. 0. 0. 0. 0.]\n", 16 | " [0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 17 | " 0. 0. 0. 0. 0. 0.]\n", 18 | " [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", 19 | " 0. 0. 0. 0. 0. 0.]]\n" 20 | ] 21 | } 22 | ], 23 | "source": [ 24 | "from keras.preprocessing.text import Tokenizer # 导入Tokenizer工具\n", 25 | "words = ['Lao Wang has a Wechat account.', 'He is not a nice person.', 'Be careful.']\n", 26 | "tokenizer = Tokenizer(num_words=30) # 词典大小只设定30个词(因为句子数量少)\n", 27 | "tokenizer.fit_on_texts(words) # 根据3个句子编辑词典\n", 28 | "sequences = tokenizer.texts_to_sequences(words) # 为3个句子根据词典里面的索引进行序号编码\n", 29 | "one_hot_matrix = tokenizer.texts_to_matrix(words, mode='binary') # 进行ont-hot编码\n", 30 | "word_index = tokenizer.word_index # 词典中的单词索引总数\n", 31 | "print('找到了%s个词'%len(word_index))\n", 32 | "print('这3句话(单词)的序号编码' , sequences)\n", 33 | "print('这3句话单词的One-hot编码', one_hot_matrix)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### 7.4.1 用Tokenizer给文本分词" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 65 | "\n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | "
idReview TextRating
00Absolutely wonderful - silky and comfortable4
11Love this dress! it's sooo pretty. i happene...5
22I had such high hopes for this dress and reall...3
33I love, love, love this jumpsuit. it's fun, fl...5
44This shirt is very flattering to all due to th...5
\n", 107 | "
" 108 | ], 109 | "text/plain": [ 110 | " id Review Text Rating\n", 111 | "0 0 Absolutely wonderful - silky and comfortable 4\n", 112 | "1 1 Love this dress! it's sooo pretty. i happene... 5\n", 113 | "2 2 I had such high hopes for this dress and reall... 3\n", 114 | "3 3 I love, love, love this jumpsuit. it's fun, fl... 5\n", 115 | "4 4 This shirt is very flattering to all due to th... 5" 116 | ] 117 | }, 118 | "execution_count": 2, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "# 读入这个评论文本数据集:\n", 125 | "import numpy as np\n", 126 | "import pandas as pd\n", 127 | "dir = 'dataset/'\n", 128 | "dir_train = dir + 'Clothing Reviews.csv'\n", 129 | "df_train = pd.read_csv(dir_train)\n", 130 | "df_train.head()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 3, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# 然后对数据集进行分词工作。词典的大小设定为2万。\n", 140 | "from keras.preprocessing.text import Tokenizer # 导入分词工具\n", 141 | "X_train_lst = df_train[\"Review Text\"].values # 将评论读入张量(训练集)\n", 142 | "y_train = df_train[\"Rating\"].values # 构建标签集\n", 143 | "dictionary_size = 20000 # 设定词典的大小\n", 144 | "tokenizer = Tokenizer(num_words=dictionary_size) # 初始化词典\n", 145 | "tokenizer.fit_on_texts( X_train_lst ) # 使用训练集创建词典索引\n", 146 | "# 为所有的单词分配索引值,完成分词工作\n", 147 | "X_train_tokenized_lst = tokenizer.texts_to_sequences(X_train_lst)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 4, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD4CAYAAADrRI2NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAASz0lEQVR4nO3db+yV533f8fen2HVQE6t2/bNHAQ0WUanYWnGNGJKnyUuimibTcB5EItJiHlgjshwt0SpNkEpr8gDJmZpk9TRbIo1lvCVBSElklNhrKWsURXJNf06xATvMtGYxBZlfG0XBT1iNv3twLtQjfPj952B+1/sl3Tr3+d7Xdc51Hcsfbq5zn5tUFZKkfvzStR6AJGm8DH5J6ozBL0mdMfglqTMGvyR15oZrPYCZ3HbbbbVmzZprPQxJuq68+OKLf1dVE6OOveeDf82aNUxOTl7rYUjSdSXJ/73SMZd6JKkzBr8kdcbgl6TOzBj8Sd6X5HCSl5IcT/LFVv9Ckr9NcqRtHx3qsyvJySQnktw/VL8nydF27LEkuTrTkiRdyWy+3L0AfKiq3kpyI/CjJM+1Y1+tqj8cbpxkPbANuBP4deDPkvxGVV0EngB2AH8BPAtsAZ5DkjQ2M57x18Bb7emNbZvuzm5bgX1VdaGqXgdOApuSrABurqrna3BnuKeBBxY0eknSnM1qjT/JsiRHgHPAwap6oR36TJKXkzyZ5JZWWwm8MdT9dKutbPuX10e9344kk0kmp6amZj8bSdKMZhX8VXWxqjYAqxicvd/FYNnmg8AG4Czw5dZ81Lp9TVMf9X57qmpjVW2cmBj5+wNJ0jzN6aqeqvo58ANgS1W92f5AeAf4GrCpNTsNrB7qtgo40+qrRtQlSWM045e7SSaAf6iqnydZDnwE+FKSFVV1tjX7OHCs7R8AvpnkKwy+3F0HHK6qi0nOJ9kMvAA8CPy3RZ5PV9bs/P7I+qlHPzbmkUi6nszmqp4VwN4kyxj8DWF/VX0vyf9IsoHBcs0p4NMAVXU8yX7gFeBt4JF2RQ/Aw8BTwHIGV/N4RY8kjdmMwV9VLwN3j6h/apo+u4HdI+qTwF1zHKMkaRH5y11J6ozBL0mdMfglqTMGvyR1xuCXpM4Y/JLUGYNfkjpj8EtSZwx+SeqMwS9JnTH4JakzBr8kdcbgl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ0x+CWpMwa/JHXG4JekzswY/Enel+RwkpeSHE/yxVa/NcnBJK+1x1uG+uxKcjLJiST3D9XvSXK0HXssSa7OtCRJVzKbM/4LwIeq6reADcCWJJuBncChqloHHGrPSbIe2AbcCWwBHk+yrL3WE8AOYF3btizeVCRJszFj8NfAW+3pjW0rYCuwt9X3Ag+0/a3Avqq6UFWvAyeBTUlWADdX1fNVVcDTQ30kSWMyqzX+JMuSHAHOAQer6gXgjqo6C9Aeb2/NVwJvDHU/3Wor2/7l9VHvtyPJZJLJqampOUxHkjSTWQV/VV2sqg3AKgZn73dN03zUun1NUx/1fnuqamNVbZyYmJjNECVJszSnq3qq6ufADxiszb/Zlm9oj+das9PA6qFuq4Azrb5qRF2SNEazuapnIsmvtv3lwEeAnwAHgO2t2XbgmbZ/ANiW5KYkaxl8iXu4LQedT7K5Xc3z4FAfSdKY3DCLNiuAve3KnF8C9lfV95I8D+xP8hDwU+ATAFV1PMl+4BXgbeCRqrrYXuth4ClgOfBc2yRJYzRj8FfVy8DdI+p/D3z4Cn12A7tH1CeB6b4fkCRdZf5yV5I6Y/BLUmcMfknqjMEvSZ0x+CWpMwa/JHXG4Jekzhj8ktQZg1+SOmPwS1JnDH5J6ozBL0mdMfglqTMGvyR1xuCXpM4Y/JLUGYNfkjpj8EtSZwx+SeqMwS9JnTH4JakzN8zUIMlq4GngnwDvAHuq6o+SfAH498BUa/r5qnq29dkFPARcBP5DVf1Jq98DPAUsB54FPltVtZgTGrc1O78/sn7q0Y+NeSSSNDszBj/wNvB7VfXjJB8AXkxysB37alX94XDjJOuBbcCdwK8Df5bkN6rqIvAEsAP4CwbBvwV4bnGmIkmajRmDv6rOAmfb/vkkrwIrp+myFdhXVReA15OcBDYlOQXcXFXPAyR5GniA6yT4r3RmL0nXmzmt8SdZA9wNvNBKn0nycpInk9zSaiuBN4a6nW61lW3/8vqo99mRZDLJ5NTU1KgmkqR5mnXwJ3k/8G3gc1X1CwbLNh8ENjD4G8GXLzUd0b2mqb+7WLWnqjZW1caJiYnZDlGSNAuzCv4kNzII/W9U1XcAqurNqrpYVe8AXwM2teangdVD3VcBZ1p91Yi6JGmMZgz+JAG+DrxaVV8Zqq8YavZx4FjbPwBsS3JTkrXAOuBw+67gfJLN7TUfBJ5ZpHlIkmZpNlf13At8Cjia5EirfR74ZJINDJZrTgGfBqiq40n2A68wuCLokXZFD8DD/OPlnM9xnXyxK0lLyWyu6vkRo9fnn52mz25g94j6JHDXXAYoSVpc/nJXkjpj8EtSZwx+SeqMwS9JnZnNVT2aB2/eJum9yjN+SeqMwS9JnTH4JakzBr8kdcbgl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ3xlg1DrnSbhXG8h7dykDQunvFLUmc841+C/FuFpOl4xi9JnTH4JakzBr8kdWbG4E+yOsmfJ3k1yfEkn231W5McTPJae7xlqM+uJCeTnEhy/1D9niRH27HHkuTqTEuSdCWzOeN/G/i9qvpNYDPwSJL1wE7gUFWtAw6157Rj24A7gS3A40mWtdd6AtgBrGvblkWciyRpFmYM/qo6W1U/bvvngVeBlcBWYG9rthd4oO1vBfZV1YWqeh04CWxKsgK4uaqer6oCnh7qI0kakzmt8SdZA9wNvADcUVVnYfCHA3B7a7YSeGOo2+lWW9n2L6+Pep8dSSaTTE5NTc1liJKkGcw6+JO8H/g28Lmq+sV0TUfUapr6u4tVe6pqY1VtnJiYmO0QJUmzMKvgT3Ijg9D/RlV9p5XfbMs3tMdzrX4aWD3UfRVwptVXjahLksZoNlf1BPg68GpVfWXo0AFge9vfDjwzVN+W5KYkaxl8iXu4LQedT7K5veaDQ30kSWMym1s23At8Cjia5EirfR54FNif5CHgp8AnAKrqeJL9wCsMrgh6pKoutn4PA08By4Hn2iZJGqMZg7+qfsTo9XmAD1+hz25g94j6JHDXXAYoSVpc/nJXkjpj8EtSZwx+SeqMwS9JnfEfYnmPmO6fffQfUJG0mDzjl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ0x+CWpMwa/JHXGH3BdB6b7cZckzZVn/JLUGYNfkjpj8EtSZwx+SepMl1/u+mWppJ55xi9JnTH4JakzMwZ/kieTnEtybKj2hSR/m+RI2z46dGxXkpNJTiS5f6h+T5Kj7dhjSbL405EkzWQ2Z/xPAVtG1L9aVRva9ixAkvXANuDO1ufxJMta+yeAHcC6to16TUnSVTZj8FfVD4GfzfL1tgL7qupCVb0OnAQ2JVkB3FxVz1dVAU8DD8xzzJKkBVjIGv9nkrzcloJuabWVwBtDbU632sq2f3l9pCQ7kkwmmZyamlrAECVJl5tv8D8BfBDYAJwFvtzqo9bta5r6SFW1p6o2VtXGiYmJeQ5RkjTKvIK/qt6sqotV9Q7wNWBTO3QaWD3UdBVwptVXjahLksZsXsHf1uwv+Thw6YqfA8C2JDclWcvgS9zDVXUWOJ9kc7ua50HgmQWMW5I0TzP+cjfJt4D7gNuSnAb+ALgvyQYGyzWngE8DVNXxJPuBV4C3gUeq6mJ7qYcZXCG0HHiubZKkMZsx+KvqkyPKX5+m/W5g94j6JHDXnEYnSVp0/nJXkjpj8EtSZwx+SeqMwS9JnTH4JakzBr8kdcbgl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ0x+CWpMwa/JHXG4Jekzhj8ktQZg1+SOmPwS1JnDH5J6ozBL0mdMfglqTMzBn+SJ5OcS3JsqHZrkoNJXmuPtwwd25XkZJITSe4fqt+T5Gg79liSLP50JEkzmc0Z/1PAlstqO4FDVbUOONSek2Q9sA24s/V5PMmy1ucJYAewrm2Xv6YkaQxmDP6q+iHws8vKW4G9bX8v8MBQfV9VXaiq14GTwKYkK4Cbq+r5qirg6aE+kqQxmu8a/x1VdRagPd7e6iuBN4banW61lW3/8vpISXYkmUwyOTU1Nc8hSpJGWewvd0et29c09ZGqak9VbayqjRMTE4s2OEnS/IP/zbZ8Q3s81+qngdVD7VYBZ1p91Yi6JGnM5hv8B4DtbX878MxQfVuSm5KsZfAl7uG2HHQ+yeZ2Nc+DQ30kSWN0w0wNknwLuA+4Lclp4A+AR4H9SR4Cfgp8AqCqjifZD7wCvA08UlUX20s9zOAKoeXAc22TJI3ZjMFfVZ+8wqEPX6H9bmD3iPokcNecRidJWnT+cleSOmPwS1JnDH5J6ozBL0mdMfglqTMGvyR1xuCXpM4Y/JLUGYNfkjpj8EtSZwx+SeqMwS9JnTH4JakzBr8kdcbgl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ0x+CWpMwsK/iSnkhxNciTJZKvdmuRgktfa4y1D7XclOZnkRJL7Fzp4SdLcLcYZ/7+uqg1VtbE93wkcqqp1wKH2nCTrgW3AncAW4PEkyxbh/SVJc3A1lnq2Anvb/l7ggaH6vqq6UFWvAyeBTVfh/SVJ01ho8Bfwp0leTLKj1e6oqrMA7fH2Vl8JvDHU93SrvUuSHUkmk0xOTU0tcIiSpGE3LLD/vVV1JsntwMEkP5mmbUbUalTDqtoD7AHYuHHjyDaSpPlZ0Bl/VZ1pj+eA7zJYunkzyQqA9niuNT8NrB7qvgo4s5D3lyTN3byDP8mvJPnApX3gd4BjwAFge2u2HXim7R8AtiW5KclaYB1weL7vL0man4Us9dwBfDfJpdf5ZlX9ryR/CexP8hDwU+ATAFV1PMl+4BXgbeCRqrq4oNFLkuZs3sFfVX8D/NaI+t8DH75Cn93A7vm+pyRp4fzlriR1xuCXpM4Y/JLUGYNfkjpj8EtSZwx+SeqMwS9JnTH4JakzBr8kdcbgl6TOLPS2zO9pa3Z+/1oPQZLeczzjl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ0x+CWpMwa/JHXG4Jekzhj8ktSZsd+yIckW4I+AZcAfV9Wj4x5Dr650C4tTj35szCORdC2N9Yw/yTLgvwO/C6wHPplk/TjHIEm9G/dSzybgZFX9TVX9P2AfsHXMY5Ckro17qWcl8MbQ89PAv7i8UZIdwI729K0kJ+b5frcBfzfPvtezOc07X7qKIxkv/3v3xXlP759e6cC4gz8javWuQtUeYM+C3yyZrKqNC32d643z7ovz7stizHvcSz2ngdVDz1cBZ8Y8Bknq2riD/y+BdUnWJvllYBtwYMxjkKSujXWpp6reTvIZ4E8YXM75ZFUdv4pvueDlouuU8+6L8+7LwpfBq961xC5JWsL85a4kdcbgl6TOLMngT7IlyYkkJ5PsvNbjWWxJnkxyLsmxodqtSQ4mea093jJ0bFf7LE4kuf/ajHphkqxO8udJXk1yPMlnW32pz/t9SQ4neanN+4utvqTnfUmSZUn+Ksn32vNe5n0qydEkR5JMttrizb2qltTG4Evjvwb+GfDLwEvA+ms9rkWe478Cfhs4NlT7L8DOtr8T+FLbX98+g5uAte2zWXat5zCPOa8AfrvtfwD4P21uS33eAd7f9m8EXgA2L/V5D83/PwLfBL7Xnvcy71PAbZfVFm3uS/GMf8nfFqKqfgj87LLyVmBv298LPDBU31dVF6rqdeAkg8/oulJVZ6vqx23/PPAqg1+CL/V5V1W91Z7e2LZiic8bIMkq4GPAHw+Vl/y8p7Foc1+KwT/qthArr9FYxumOqjoLg5AEbm/1Jfd5JFkD3M3g7HfJz7stdxwBzgEHq6qLeQP/FfhPwDtDtR7mDYM/3P80yYvtFjawiHMf+22Zx2BWt4XoyJL6PJK8H/g28Lmq+kUyanqDpiNq1+W8q+oisCHJrwLfTXLXNM2XxLyT/BvgXFW9mOS+2XQZUbvu5j3k3qo6k+R24GCSn0zTds5zX4pn/L3eFuLNJCsA2uO5Vl8yn0eSGxmE/jeq6jutvOTnfUlV/Rz4AbCFpT/ve4F/m+QUg+XaDyX5nyz9eQNQVWfa4znguwyWbhZt7ksx+Hu9LcQBYHvb3w48M1TfluSmJGuBdcDhazC+Bcng1P7rwKtV9ZWhQ0t93hPtTJ8ky4GPAD9hic+7qnZV1aqqWsPg/+H/XVX/jiU+b4Akv5LkA5f2gd8BjrGYc7/W315fpW/EP8rgqo+/Bn7/Wo/nKszvW8BZ4B8Y/Gn/EPBrwCHgtfZ461D732+fxQngd6/1+Oc553/J4K+vLwNH2vbRDub9z4G/avM+BvznVl/S877sM7iPf7yqZ8nPm8EViS+17filDFvMuXvLBknqzFJc6pEkTcPgl6TOGPyS1BmDX5I6Y/BLUmcMfknqjMEvSZ35//icL3YyQbniAAAAAElFTkSuQmCC", 158 | "text/plain": [ 159 | "
" 160 | ] 161 | }, 162 | "metadata": { 163 | "needs_background": "light" 164 | }, 165 | "output_type": "display_data" 166 | } 167 | ], 168 | "source": [ 169 | "import matplotlib.pyplot as plt # 导入matplotlib\n", 170 | "word_per_comment = [len(comment) for comment in X_train_tokenized_lst]\n", 171 | "plt.hist(word_per_comment, bins = np.arange(0,500,10)) # 显示评论长度分布\n", 172 | "plt.show()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 5, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "from keras.preprocessing.sequence import pad_sequences \n", 182 | "max_comment_length = 100 # 设定评论输入长度为100,并填充默认值(如字数少于100)\n", 183 | "X_train = pad_sequences(X_train_tokenized_lst, maxlen=max_comment_length)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "### 7.4.2 构建包含词嵌入的SimpleRNN" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "from keras.models import Sequential # 导入贯序模型\n", 200 | "from keras.layers.embeddings import Embedding #导入词嵌入层\n", 201 | "from keras.layers import Dense #导入全连接层\n", 202 | "from keras.layers import SimpleRNN #导入SimpleRNN层\n", 203 | "embedding_vecor_length = 60 # 设定词嵌入向量长度为60\n", 204 | "rnn = Sequential() # 贯序模型\n", 205 | "rnn.add(Embedding(dictionary_size, embedding_vecor_length, \n", 206 | " input_length=max_comment_length)) # 加入词嵌入层\n", 207 | "rnn.add(SimpleRNN(100)) # 加入SimpleRNN层\n", 208 | "rnn.add(Dense(10, activation='relu')) # 加入全连接层\n", 209 | "rnn.add(Dense(6, activation='softmax')) # 加入分类输出层\n", 210 | "rnn.compile(loss='sparse_categorical_crossentropy', #损失函数\n", 211 | " optimizer='adam', # 优化器\n", 212 | " metrics=['acc']) # 评估指标\n", 213 | "print(rnn.summary()) #打印网络模型 \n" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "history = rnn.fit(X_train, y_train,\n", 223 | " validation_split = 0.3,\n", 224 | " epochs = 10,\n", 225 | " batch_size = 64)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "## 7.6 用LSTM鉴定评论文本" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "from keras.models import Sequential # 导入贯序模型\n", 242 | "from keras.layers.embeddings import Embedding #导入词嵌入层\n", 243 | "from keras.layers import Dense #导入全连接层\n", 244 | "from keras.layers import LSTM #导入LSTM层\n", 245 | "embedding_vecor_length = 60 # 设定词嵌入向量长度为60\n", 246 | "lstm = Sequential() # 贯序模型\n", 247 | "lstm.add(Embedding(dictionary_size, embedding_vecor_length, \n", 248 | " input_length=max_comment_length)) # 加入词嵌入层\n", 249 | "lstm.add(LSTM(100)) # 加入LSTM层\n", 250 | "lstm.add(Dense(10, activation='relu')) # 加入全连接层\n", 251 | "lstm.add(Dense(6, activation='softmax')) # 加入分类输出层\n", 252 | "lstm.compile(loss='sparse_categorical_crossentropy', #损失函数\n", 253 | " optimizer = 'adam', # 优化器\n", 254 | " metrics = ['acc']) # 评估指标\n", 255 | "history = rnn.fit(X_train, y_train, \n", 256 | " validation_split = 0.3,\n", 257 | " epochs=10, \n", 258 | " batch_size=64)" 259 | ] 260 | } 261 | ], 262 | "metadata": { 263 | "interpreter": { 264 | "hash": "96bbb65fb5df4d9cc0d3b437c46ecfe8c6742e111c114025f0cea31b14306341" 265 | }, 266 | "kernelspec": { 267 | "display_name": "Python 3.8.8 ('Vuean_ML')", 268 | "language": "python", 269 | "name": "python3" 270 | }, 271 | "language_info": { 272 | "codemirror_mode": { 273 | "name": "ipython", 274 | "version": 3 275 | }, 276 | "file_extension": ".py", 277 | "mimetype": "text/x-python", 278 | "name": "python", 279 | "nbconvert_exporter": "python", 280 | "pygments_lexer": "ipython3", 281 | "version": "3.8.8" 282 | }, 283 | "orig_nbformat": 4 284 | }, 285 | "nbformat": 4, 286 | "nbformat_minor": 2 287 | } 288 | -------------------------------------------------------------------------------- /Class7/Example/Ex02/C02 CNN1D GRU - New Earth.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "### 7.8.1 时序数据的导入和处理" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": {}, 15 | "outputs": [ 16 | { 17 | "name": "stdout", 18 | "output_type": "stream", 19 | "text": [ 20 | " LABEL FLUX.1 FLUX.2 FLUX.3 FLUX.4 FLUX.5 FLUX.6 FLUX.7 \\\n", 21 | "0 2 93.85 83.81 20.10 -26.98 -39.56 -124.71 -135.18 \n", 22 | "1 2 -38.88 -33.83 -58.54 -40.09 -79.31 -72.81 -86.55 \n", 23 | "2 2 532.64 535.92 513.73 496.92 456.45 466.00 464.50 \n", 24 | "3 2 326.52 347.39 302.35 298.13 317.74 312.70 322.33 \n", 25 | "4 2 -1107.21 -1112.59 -1118.95 -1095.10 -1057.55 -1034.48 -998.34 \n", 26 | "\n", 27 | " FLUX.8 FLUX.9 ... FLUX.3188 FLUX.3189 FLUX.3190 FLUX.3191 \\\n", 28 | "0 -96.27 -79.89 ... -78.07 -102.15 -102.15 25.13 \n", 29 | "1 -85.33 -83.97 ... -3.28 -32.21 -32.21 -24.89 \n", 30 | "2 486.39 436.56 ... -71.69 13.31 13.31 -29.89 \n", 31 | "3 311.31 312.42 ... 5.71 -3.73 -3.73 30.05 \n", 32 | "4 -1022.71 -989.57 ... -594.37 -401.66 -401.66 -357.24 \n", 33 | "\n", 34 | " FLUX.3192 FLUX.3193 FLUX.3194 FLUX.3195 FLUX.3196 FLUX.3197 \n", 35 | "0 48.57 92.54 39.32 61.42 5.08 -39.54 \n", 36 | "1 -4.86 0.76 -11.70 6.46 16.00 19.93 \n", 37 | "2 -20.88 5.06 -11.80 -28.91 -70.02 -96.67 \n", 38 | "3 20.03 -12.67 -8.77 -17.31 -17.35 13.98 \n", 39 | "4 -443.76 -438.54 -399.71 -384.65 -411.79 -510.54 \n", 40 | "\n", 41 | "[5 rows x 3198 columns]\n", 42 | "\n", 43 | "RangeIndex: 5087 entries, 0 to 5086\n", 44 | "Columns: 3198 entries, LABEL to FLUX.3197\n", 45 | "dtypes: float64(3197), int64(1)\n", 46 | "memory usage: 124.1 MB\n", 47 | "None\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "# 首先把数据从文件中读入Dataframe:\n", 53 | "import numpy as np\n", 54 | "import pandas as pd\n", 55 | "df_train = pd.read_csv('./dataset/exoTrain.csv')\n", 56 | "df_test = pd.read_csv('./dataset/exoTest.csv')\n", 57 | "print(df_train.head()) # 输入头几行数据\n", 58 | "print(df_train.info()) # 输出训练集信息" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "# 数据集是预先排过序的,下面的代码将其进行乱序排列:\n", 68 | "from sklearn.utils import shuffle # 导入乱序工具\n", 69 | "df_train = shuffle(df_train)\n", 70 | "df_test = shuffle(df_test)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 3, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "[[ 9.17 -29.55 -49.19 ... 22.86 55.46 58.34]\n", 83 | " [ -5.63 -11.97 -7.33 ... 1.64 2.09 4.24]\n", 84 | " [ -91.24 -88.34 -89.36 ... -12.44 -5.5 -7.75]\n", 85 | " ...\n", 86 | " [-112.91 -108.05 -149.54 ... 52.41 23.74 56.08]\n", 87 | " [ -3.27 -8.6 -9.61 ... 2.81 -3.27 5.53]\n", 88 | " [ -53.26 -27.11 -25.67 ... 17.07 -7.19 11.14]]\n", 89 | "[0 0 0 ... 0 0 0]\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "X_train = df_train.iloc[:,1:].values # 构建特征集(训练)\n", 95 | "y_train = df_train.iloc[:,0].values # 构建标签集(训练)\n", 96 | "X_test = df_test.iloc[:,1:].values # 构建特征集(测试)\n", 97 | "y_test = df_test.iloc[:,0].values # 构建标签集(测试)\n", 98 | "y_train = y_train - 1 # 标签转换成惯用的(0,1)分类\n", 99 | "y_test = y_test - 1\n", 100 | "print (X_train) # 打印训练集中的特征\n", 101 | "print (y_train) # 打印训练集中的标签" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "(5087, 3197, 1)\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "X_train = np.expand_dims(X_train, axis=2) # 张量升阶,以满足序列数据集的要求\n", 119 | "X_test = np.expand_dims(X_test, axis=2) # 张量升阶,以满足序列数据集的要求\n", 120 | "print(X_train.shape)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "from keras.models import Sequential # 导入序贯模型\n", 130 | "from keras import layers # 导入所有类型的层\n", 131 | "from keras.optimizers import Adam # 导入优化器\n", 132 | "model = Sequential() # 序贯模型\n", 133 | "model.add(layers.Conv1D(32, kernel_size = 10, strides = 4,\n", 134 | " input_shape = (3197, 1))) # 1D CNN层\n", 135 | "model.add(layers.MaxPooling1D(pool_size = 4, strides = 2)) # 池化层\n", 136 | "model.add(layers.GRU(256, return_sequences=True)) # 关键,GRU层要够大\n", 137 | "model.add(layers.Flatten()) # 展平\n", 138 | "model.add(layers.Dropout(0.5)) # Dropout层\n", 139 | "model.add(layers.BatchNormalization()) # 批标准化\n", 140 | "model.add(layers.Dense(1, activation='sigmoid')) # 分类输出层\n", 141 | "opt = Adam(lr = 0.0001, beta_1=0.9, beta_2=0.999, decay=0.01)\n", 142 | "model.compile(optimizer=opt, # 优化器\n", 143 | " loss = 'binary_crossentropy', # 交叉熵\n", 144 | " metrics = ['accuracy']) # 准确率" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "history = model.fit(X_train,y_train, # 训练集\n", 154 | " validation_split = 0.2, # 部分训练集数据拆分成验证集\n", 155 | " batch_size = 128, # 批量大小\n", 156 | " epochs = 4, # 训练轮次\n", 157 | " shuffle = True) # 乱序" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "### 7.8.3 输出阈值的调整" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "from sklearn.metrics import classification_report # 分类报告\n", 174 | "from sklearn.metrics import confusion_matrix # 混淆矩阵\n", 175 | "y_prob = model.predict(X_test) # 对测试集进行预测\n", 176 | "y_pred = np.where(y_prob > 0.5, 1, 0) #将概率值转换成真值\n", 177 | "cm = confusion_matrix(y_pred, y_test)\n", 178 | "print('Confusion matrix:\\n', cm, '\\n')\n", 179 | "print(classification_report(y_pred, y_test))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "阈值调整" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "y_pred = np.where(y_prob > 0.15, 1, 0) # 进行阈值调整\n", 196 | "cm = confusion_matrix(y_pred, y_test) \n", 197 | "print('Confusion matrix:\\n', cm, '\\n')\n", 198 | "print(classification_report(y_pred, y_test))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "### 7.8.4 使用函数式API" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "from keras import layers # 导入各种层\n", 215 | "from keras.models import Model # 导入模型\n", 216 | "from keras.optimizers import Adam # 导入Adam优化器\n", 217 | "input = layers.Input(shape=(3197, 1)) # Input\n", 218 | "# 通过函数式API构建模型\n", 219 | "x = layers.Conv1D(32, kernel_size=10, strides=4)(input)\n", 220 | "x = layers.MaxPooling1D(pool_size=4, strides=2)(x)\n", 221 | "x = layers.GRU(256, return_sequences=True)(x)\n", 222 | "x = layers.Flatten()(x)\n", 223 | "x = layers.Dropout(0.5)(x)\n", 224 | "x = layers.BatchNormalization()(x)\n", 225 | "output = layers.Dense(1, activation='sigmoid')(x) # Output\n", 226 | "model = Model(input, output) \n", 227 | "model.summary() # 显示模型的输出\n", 228 | "opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01) # 设置优化器\n", 229 | "model.compile(optimizer=opt, # 优化器\n", 230 | " loss = 'binary_crossentropy', # 交叉熵\n", 231 | " metrics=['accuracy']) # 准确率" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "双向RNN模型:" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "# 首先在给输入数据集升维之前数据集进行逆序:\n", 248 | "X_train_rev = [X[::-1] for X in X_train]\n", 249 | "X_test_rev = [X[::-1] for X in X_test]\n", 250 | "X_train = np.expand_dims(X_train, axis=2)\n", 251 | "X_train_rev = np.expand_dims(X_train_rev, axis=2)\n", 252 | "X_test = np.expand_dims(X_test, axis=2)\n", 253 | "X_test_rev = np.expand_dims(X_test_rev, axis=2)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "# 再构建多头网络:\n", 263 | "# 构建正向网络\n", 264 | "input_1 = layers.Input(shape=(3197, 1))\n", 265 | "x = layers.GRU(32, return_sequences=True)(input_1)\n", 266 | "x = layers.Flatten()(x)\n", 267 | "x = layers.Dropout(0.5)(x)\n", 268 | "# 构建逆向网络\n", 269 | "input_2 = layers.Input(shape=(3197, 1))\n", 270 | "y = layers.GRU(32, return_sequences=True)(input_2)\n", 271 | "y = layers.Flatten()(y)\n", 272 | "y = layers.Dropout(0.5)(y)\n", 273 | "# 连接两个网络\n", 274 | "z = layers.concatenate([x, y])\n", 275 | "output = layers.Dense(1, activation='sigmoid')(z)\n", 276 | "model = Model([input_1,input_2], output)\n", 277 | "model.summary()" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "history = model.fit([X_train, X_train_rev], y_train, # 训练集\n", 287 | " validation_split = 0.2, # 部分训练集数据拆分成验证集\n", 288 | " batch_size = 128, # 批量大小\n", 289 | " epochs = 1, # 训练轮次\n", 290 | " shuffle = True) # 乱序" 291 | ] 292 | } 293 | ], 294 | "metadata": { 295 | "interpreter": { 296 | "hash": "96bbb65fb5df4d9cc0d3b437c46ecfe8c6742e111c114025f0cea31b14306341" 297 | }, 298 | "kernelspec": { 299 | "display_name": "Python 3.8.8 ('Vuean_ML')", 300 | "language": "python", 301 | "name": "python3" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.8.8" 314 | }, 315 | "orig_nbformat": 4 316 | }, 317 | "nbformat": 4, 318 | "nbformat_minor": 2 319 | } 320 | -------------------------------------------------------------------------------- /Class7/Example/Ex03/C03 RNN - Quora Queries.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/Example/Ex03/C03 RNN - Quora Queries.ipynb -------------------------------------------------------------------------------- /Class7/figures/fig01_普通网络的神经元.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig01_普通网络的神经元.jpg -------------------------------------------------------------------------------- /Class7/figures/fig02_循环神经网络中的神经元.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig02_循环神经网络中的神经元.jpg -------------------------------------------------------------------------------- /Class7/figures/fig03_多个循环神经网络的神经元.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig03_多个循环神经网络的神经元.jpg -------------------------------------------------------------------------------- /Class7/figures/fig04_时间点1读入一个特征.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig04_时间点1读入一个特征.jpg -------------------------------------------------------------------------------- /Class7/figures/fig05_时间点2读入两个特征.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig05_时间点2读入两个特征.jpg -------------------------------------------------------------------------------- /Class7/figures/fig06_遍历特征处理.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig06_遍历特征处理.jpg -------------------------------------------------------------------------------- /Class7/figures/fig07_One-hot编码.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig07_One-hot编码.jpg -------------------------------------------------------------------------------- /Class7/figures/fig08_分词和词嵌入示意图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig08_分词和词嵌入示意图.jpg -------------------------------------------------------------------------------- /Class7/figures/fig09_影片形成的词嵌入空间.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig09_影片形成的词嵌入空间.jpg -------------------------------------------------------------------------------- /Class7/figures/fig10_训练集中的前五条数据.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig10_训练集中的前五条数据.jpg -------------------------------------------------------------------------------- /Class7/figures/fig11_评论长度分布.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig11_评论长度分布.jpg -------------------------------------------------------------------------------- /Class7/figures/fig12_包含词嵌入的SimpleRNN网络结构.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class7/figures/fig12_包含词嵌入的SimpleRNN网络结构.jpg -------------------------------------------------------------------------------- /Class8/Example/Ex01/dataset/heart.csv: -------------------------------------------------------------------------------- 1 | age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target 2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1 3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1 4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1 5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1 6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1 7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1 8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1 9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1 100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1 101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1 102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1 103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1 104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1 105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1 106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1 107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1 108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1 109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1 110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1 111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1 112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1 113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1 114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1 115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1 116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1 117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1 118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1 119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1 120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1 121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1 122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1 123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1 124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1 125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1 126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1 127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1 128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1 129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1 130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1 131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1 132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1 133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1 134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1 135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1 136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1 137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1 138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1 139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1 140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1 141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1 142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1 143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1 144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1 145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1 146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1 147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1 148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1 149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1 150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1 151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1 152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1 153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1 154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1 155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1 156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1 157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1 158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1 159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1 160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1 161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1 162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1 163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1 164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1 165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1 166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1 167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0 168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0 169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0 170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0 171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0 172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0 173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0 174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0 175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0 176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0 177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0 178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0 179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0 180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0 181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0 182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0 183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0 184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0 185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0 186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0 187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0 188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0 189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0 190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0 191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0 192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0 193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0 194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0 195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0 196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0 197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0 198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0 199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0 200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0 201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0 202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0 203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0 204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0 205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0 206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0 207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0 208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0 209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0 210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0 211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0 212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0 213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0 214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0 215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0 216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0 217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0 218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0 219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0 220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0 221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0 222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0 223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0 224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0 225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0 226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0 227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0 228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0 229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0 230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0 231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0 232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0 233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0 234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0 235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0 236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0 237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0 238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0 239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0 240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0 241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0 242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0 243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0 244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0 245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0 246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0 247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0 248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0 249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0 250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0 251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0 252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0 253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0 254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0 255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0 256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0 257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0 258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0 259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0 260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0 261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0 262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0 263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0 264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0 265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0 266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0 267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0 268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0 269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0 270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0 271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0 272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0 273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0 274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0 275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0 276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0 277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0 278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0 279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0 280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0 281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0 282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0 283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0 284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0 285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0 286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0 287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0 288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0 289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0 290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0 291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0 292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0 293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0 294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0 295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0 296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0 297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0 298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0 299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0 300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0 301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0 302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0 303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0 304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0 305 | -------------------------------------------------------------------------------- /Class8/figures/fig01_根据KNN算法来确定支持者.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig01_根据KNN算法来确定支持者.jpg -------------------------------------------------------------------------------- /Class8/figures/fig02_欧氏距离和曼哈顿距离.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig02_欧氏距离和曼哈顿距离.jpg -------------------------------------------------------------------------------- /Class8/figures/fig03_KNN算法示意.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig03_KNN算法示意.jpg -------------------------------------------------------------------------------- /Class8/figures/fig04_不同K值时模型所取得的测试集准确率和F1分数.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig04_不同K值时模型所取得的测试集准确率和F1分数.jpg -------------------------------------------------------------------------------- /Class8/figures/fig05_SVM超平面的确定.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig05_SVM超平面的确定.jpg -------------------------------------------------------------------------------- /Class8/figures/fig06_根据相亲数据集所生成的决策树.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig06_根据相亲数据集所生成的决策树.jpg -------------------------------------------------------------------------------- /Class8/figures/fig07_一个过拟合的决策树分类结果.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig07_一个过拟合的决策树分类结果.jpg -------------------------------------------------------------------------------- /Class8/figures/fig08_Sklearn的算法官方小抄.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig08_Sklearn的算法官方小抄.jpg -------------------------------------------------------------------------------- /Class8/figures/fig09_各种算法的准确率.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig09_各种算法的准确率.png -------------------------------------------------------------------------------- /Class8/figures/fig10_各种算法的混淆矩阵.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig10_各种算法的混淆矩阵.png -------------------------------------------------------------------------------- /Class8/figures/fig11_参数优化后随机森林算法的混淆矩阵.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class8/figures/fig11_参数优化后随机森林算法的混淆矩阵.png -------------------------------------------------------------------------------- /Class9/Example/Ex01/C03 Stacking - Bank Customer.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/Example/Ex01/C03 Stacking - Bank Customer.ipynb -------------------------------------------------------------------------------- /Class9/Example/Ex03/C04 Ensemble - Heart.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/Example/Ex03/C04 Ensemble - Heart.ipynb -------------------------------------------------------------------------------- /Class9/Example/Ex03/dataset/heart.csv: -------------------------------------------------------------------------------- 1 | age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target 2 | 63,1,3,145,233,1,0,150,0,2.3,0,0,1,1 3 | 37,1,2,130,250,0,1,187,0,3.5,0,0,2,1 4 | 41,0,1,130,204,0,0,172,0,1.4,2,0,2,1 5 | 56,1,1,120,236,0,1,178,0,0.8,2,0,2,1 6 | 57,0,0,120,354,0,1,163,1,0.6,2,0,2,1 7 | 57,1,0,140,192,0,1,148,0,0.4,1,0,1,1 8 | 56,0,1,140,294,0,0,153,0,1.3,1,0,2,1 9 | 44,1,1,120,263,0,1,173,0,0,2,0,3,1 10 | 52,1,2,172,199,1,1,162,0,0.5,2,0,3,1 11 | 57,1,2,150,168,0,1,174,0,1.6,2,0,2,1 12 | 54,1,0,140,239,0,1,160,0,1.2,2,0,2,1 13 | 48,0,2,130,275,0,1,139,0,0.2,2,0,2,1 14 | 49,1,1,130,266,0,1,171,0,0.6,2,0,2,1 15 | 64,1,3,110,211,0,0,144,1,1.8,1,0,2,1 16 | 58,0,3,150,283,1,0,162,0,1,2,0,2,1 17 | 50,0,2,120,219,0,1,158,0,1.6,1,0,2,1 18 | 58,0,2,120,340,0,1,172,0,0,2,0,2,1 19 | 66,0,3,150,226,0,1,114,0,2.6,0,0,2,1 20 | 43,1,0,150,247,0,1,171,0,1.5,2,0,2,1 21 | 69,0,3,140,239,0,1,151,0,1.8,2,2,2,1 22 | 59,1,0,135,234,0,1,161,0,0.5,1,0,3,1 23 | 44,1,2,130,233,0,1,179,1,0.4,2,0,2,1 24 | 42,1,0,140,226,0,1,178,0,0,2,0,2,1 25 | 61,1,2,150,243,1,1,137,1,1,1,0,2,1 26 | 40,1,3,140,199,0,1,178,1,1.4,2,0,3,1 27 | 71,0,1,160,302,0,1,162,0,0.4,2,2,2,1 28 | 59,1,2,150,212,1,1,157,0,1.6,2,0,2,1 29 | 51,1,2,110,175,0,1,123,0,0.6,2,0,2,1 30 | 65,0,2,140,417,1,0,157,0,0.8,2,1,2,1 31 | 53,1,2,130,197,1,0,152,0,1.2,0,0,2,1 32 | 41,0,1,105,198,0,1,168,0,0,2,1,2,1 33 | 65,1,0,120,177,0,1,140,0,0.4,2,0,3,1 34 | 44,1,1,130,219,0,0,188,0,0,2,0,2,1 35 | 54,1,2,125,273,0,0,152,0,0.5,0,1,2,1 36 | 51,1,3,125,213,0,0,125,1,1.4,2,1,2,1 37 | 46,0,2,142,177,0,0,160,1,1.4,0,0,2,1 38 | 54,0,2,135,304,1,1,170,0,0,2,0,2,1 39 | 54,1,2,150,232,0,0,165,0,1.6,2,0,3,1 40 | 65,0,2,155,269,0,1,148,0,0.8,2,0,2,1 41 | 65,0,2,160,360,0,0,151,0,0.8,2,0,2,1 42 | 51,0,2,140,308,0,0,142,0,1.5,2,1,2,1 43 | 48,1,1,130,245,0,0,180,0,0.2,1,0,2,1 44 | 45,1,0,104,208,0,0,148,1,3,1,0,2,1 45 | 53,0,0,130,264,0,0,143,0,0.4,1,0,2,1 46 | 39,1,2,140,321,0,0,182,0,0,2,0,2,1 47 | 52,1,1,120,325,0,1,172,0,0.2,2,0,2,1 48 | 44,1,2,140,235,0,0,180,0,0,2,0,2,1 49 | 47,1,2,138,257,0,0,156,0,0,2,0,2,1 50 | 53,0,2,128,216,0,0,115,0,0,2,0,0,1 51 | 53,0,0,138,234,0,0,160,0,0,2,0,2,1 52 | 51,0,2,130,256,0,0,149,0,0.5,2,0,2,1 53 | 66,1,0,120,302,0,0,151,0,0.4,1,0,2,1 54 | 62,1,2,130,231,0,1,146,0,1.8,1,3,3,1 55 | 44,0,2,108,141,0,1,175,0,0.6,1,0,2,1 56 | 63,0,2,135,252,0,0,172,0,0,2,0,2,1 57 | 52,1,1,134,201,0,1,158,0,0.8,2,1,2,1 58 | 48,1,0,122,222,0,0,186,0,0,2,0,2,1 59 | 45,1,0,115,260,0,0,185,0,0,2,0,2,1 60 | 34,1,3,118,182,0,0,174,0,0,2,0,2,1 61 | 57,0,0,128,303,0,0,159,0,0,2,1,2,1 62 | 71,0,2,110,265,1,0,130,0,0,2,1,2,1 63 | 54,1,1,108,309,0,1,156,0,0,2,0,3,1 64 | 52,1,3,118,186,0,0,190,0,0,1,0,1,1 65 | 41,1,1,135,203,0,1,132,0,0,1,0,1,1 66 | 58,1,2,140,211,1,0,165,0,0,2,0,2,1 67 | 35,0,0,138,183,0,1,182,0,1.4,2,0,2,1 68 | 51,1,2,100,222,0,1,143,1,1.2,1,0,2,1 69 | 45,0,1,130,234,0,0,175,0,0.6,1,0,2,1 70 | 44,1,1,120,220,0,1,170,0,0,2,0,2,1 71 | 62,0,0,124,209,0,1,163,0,0,2,0,2,1 72 | 54,1,2,120,258,0,0,147,0,0.4,1,0,3,1 73 | 51,1,2,94,227,0,1,154,1,0,2,1,3,1 74 | 29,1,1,130,204,0,0,202,0,0,2,0,2,1 75 | 51,1,0,140,261,0,0,186,1,0,2,0,2,1 76 | 43,0,2,122,213,0,1,165,0,0.2,1,0,2,1 77 | 55,0,1,135,250,0,0,161,0,1.4,1,0,2,1 78 | 51,1,2,125,245,1,0,166,0,2.4,1,0,2,1 79 | 59,1,1,140,221,0,1,164,1,0,2,0,2,1 80 | 52,1,1,128,205,1,1,184,0,0,2,0,2,1 81 | 58,1,2,105,240,0,0,154,1,0.6,1,0,3,1 82 | 41,1,2,112,250,0,1,179,0,0,2,0,2,1 83 | 45,1,1,128,308,0,0,170,0,0,2,0,2,1 84 | 60,0,2,102,318,0,1,160,0,0,2,1,2,1 85 | 52,1,3,152,298,1,1,178,0,1.2,1,0,3,1 86 | 42,0,0,102,265,0,0,122,0,0.6,1,0,2,1 87 | 67,0,2,115,564,0,0,160,0,1.6,1,0,3,1 88 | 68,1,2,118,277,0,1,151,0,1,2,1,3,1 89 | 46,1,1,101,197,1,1,156,0,0,2,0,3,1 90 | 54,0,2,110,214,0,1,158,0,1.6,1,0,2,1 91 | 58,0,0,100,248,0,0,122,0,1,1,0,2,1 92 | 48,1,2,124,255,1,1,175,0,0,2,2,2,1 93 | 57,1,0,132,207,0,1,168,1,0,2,0,3,1 94 | 52,1,2,138,223,0,1,169,0,0,2,4,2,1 95 | 54,0,1,132,288,1,0,159,1,0,2,1,2,1 96 | 45,0,1,112,160,0,1,138,0,0,1,0,2,1 97 | 53,1,0,142,226,0,0,111,1,0,2,0,3,1 98 | 62,0,0,140,394,0,0,157,0,1.2,1,0,2,1 99 | 52,1,0,108,233,1,1,147,0,0.1,2,3,3,1 100 | 43,1,2,130,315,0,1,162,0,1.9,2,1,2,1 101 | 53,1,2,130,246,1,0,173,0,0,2,3,2,1 102 | 42,1,3,148,244,0,0,178,0,0.8,2,2,2,1 103 | 59,1,3,178,270,0,0,145,0,4.2,0,0,3,1 104 | 63,0,1,140,195,0,1,179,0,0,2,2,2,1 105 | 42,1,2,120,240,1,1,194,0,0.8,0,0,3,1 106 | 50,1,2,129,196,0,1,163,0,0,2,0,2,1 107 | 68,0,2,120,211,0,0,115,0,1.5,1,0,2,1 108 | 69,1,3,160,234,1,0,131,0,0.1,1,1,2,1 109 | 45,0,0,138,236,0,0,152,1,0.2,1,0,2,1 110 | 50,0,1,120,244,0,1,162,0,1.1,2,0,2,1 111 | 50,0,0,110,254,0,0,159,0,0,2,0,2,1 112 | 64,0,0,180,325,0,1,154,1,0,2,0,2,1 113 | 57,1,2,150,126,1,1,173,0,0.2,2,1,3,1 114 | 64,0,2,140,313,0,1,133,0,0.2,2,0,3,1 115 | 43,1,0,110,211,0,1,161,0,0,2,0,3,1 116 | 55,1,1,130,262,0,1,155,0,0,2,0,2,1 117 | 37,0,2,120,215,0,1,170,0,0,2,0,2,1 118 | 41,1,2,130,214,0,0,168,0,2,1,0,2,1 119 | 56,1,3,120,193,0,0,162,0,1.9,1,0,3,1 120 | 46,0,1,105,204,0,1,172,0,0,2,0,2,1 121 | 46,0,0,138,243,0,0,152,1,0,1,0,2,1 122 | 64,0,0,130,303,0,1,122,0,2,1,2,2,1 123 | 59,1,0,138,271,0,0,182,0,0,2,0,2,1 124 | 41,0,2,112,268,0,0,172,1,0,2,0,2,1 125 | 54,0,2,108,267,0,0,167,0,0,2,0,2,1 126 | 39,0,2,94,199,0,1,179,0,0,2,0,2,1 127 | 34,0,1,118,210,0,1,192,0,0.7,2,0,2,1 128 | 47,1,0,112,204,0,1,143,0,0.1,2,0,2,1 129 | 67,0,2,152,277,0,1,172,0,0,2,1,2,1 130 | 52,0,2,136,196,0,0,169,0,0.1,1,0,2,1 131 | 74,0,1,120,269,0,0,121,1,0.2,2,1,2,1 132 | 54,0,2,160,201,0,1,163,0,0,2,1,2,1 133 | 49,0,1,134,271,0,1,162,0,0,1,0,2,1 134 | 42,1,1,120,295,0,1,162,0,0,2,0,2,1 135 | 41,1,1,110,235,0,1,153,0,0,2,0,2,1 136 | 41,0,1,126,306,0,1,163,0,0,2,0,2,1 137 | 49,0,0,130,269,0,1,163,0,0,2,0,2,1 138 | 60,0,2,120,178,1,1,96,0,0,2,0,2,1 139 | 62,1,1,128,208,1,0,140,0,0,2,0,2,1 140 | 57,1,0,110,201,0,1,126,1,1.5,1,0,1,1 141 | 64,1,0,128,263,0,1,105,1,0.2,1,1,3,1 142 | 51,0,2,120,295,0,0,157,0,0.6,2,0,2,1 143 | 43,1,0,115,303,0,1,181,0,1.2,1,0,2,1 144 | 42,0,2,120,209,0,1,173,0,0,1,0,2,1 145 | 67,0,0,106,223,0,1,142,0,0.3,2,2,2,1 146 | 76,0,2,140,197,0,2,116,0,1.1,1,0,2,1 147 | 70,1,1,156,245,0,0,143,0,0,2,0,2,1 148 | 44,0,2,118,242,0,1,149,0,0.3,1,1,2,1 149 | 60,0,3,150,240,0,1,171,0,0.9,2,0,2,1 150 | 44,1,2,120,226,0,1,169,0,0,2,0,2,1 151 | 42,1,2,130,180,0,1,150,0,0,2,0,2,1 152 | 66,1,0,160,228,0,0,138,0,2.3,2,0,1,1 153 | 71,0,0,112,149,0,1,125,0,1.6,1,0,2,1 154 | 64,1,3,170,227,0,0,155,0,0.6,1,0,3,1 155 | 66,0,2,146,278,0,0,152,0,0,1,1,2,1 156 | 39,0,2,138,220,0,1,152,0,0,1,0,2,1 157 | 58,0,0,130,197,0,1,131,0,0.6,1,0,2,1 158 | 47,1,2,130,253,0,1,179,0,0,2,0,2,1 159 | 35,1,1,122,192,0,1,174,0,0,2,0,2,1 160 | 58,1,1,125,220,0,1,144,0,0.4,1,4,3,1 161 | 56,1,1,130,221,0,0,163,0,0,2,0,3,1 162 | 56,1,1,120,240,0,1,169,0,0,0,0,2,1 163 | 55,0,1,132,342,0,1,166,0,1.2,2,0,2,1 164 | 41,1,1,120,157,0,1,182,0,0,2,0,2,1 165 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1 166 | 38,1,2,138,175,0,1,173,0,0,2,4,2,1 167 | 67,1,0,160,286,0,0,108,1,1.5,1,3,2,0 168 | 67,1,0,120,229,0,0,129,1,2.6,1,2,3,0 169 | 62,0,0,140,268,0,0,160,0,3.6,0,2,2,0 170 | 63,1,0,130,254,0,0,147,0,1.4,1,1,3,0 171 | 53,1,0,140,203,1,0,155,1,3.1,0,0,3,0 172 | 56,1,2,130,256,1,0,142,1,0.6,1,1,1,0 173 | 48,1,1,110,229,0,1,168,0,1,0,0,3,0 174 | 58,1,1,120,284,0,0,160,0,1.8,1,0,2,0 175 | 58,1,2,132,224,0,0,173,0,3.2,2,2,3,0 176 | 60,1,0,130,206,0,0,132,1,2.4,1,2,3,0 177 | 40,1,0,110,167,0,0,114,1,2,1,0,3,0 178 | 60,1,0,117,230,1,1,160,1,1.4,2,2,3,0 179 | 64,1,2,140,335,0,1,158,0,0,2,0,2,0 180 | 43,1,0,120,177,0,0,120,1,2.5,1,0,3,0 181 | 57,1,0,150,276,0,0,112,1,0.6,1,1,1,0 182 | 55,1,0,132,353,0,1,132,1,1.2,1,1,3,0 183 | 65,0,0,150,225,0,0,114,0,1,1,3,3,0 184 | 61,0,0,130,330,0,0,169,0,0,2,0,2,0 185 | 58,1,2,112,230,0,0,165,0,2.5,1,1,3,0 186 | 50,1,0,150,243,0,0,128,0,2.6,1,0,3,0 187 | 44,1,0,112,290,0,0,153,0,0,2,1,2,0 188 | 60,1,0,130,253,0,1,144,1,1.4,2,1,3,0 189 | 54,1,0,124,266,0,0,109,1,2.2,1,1,3,0 190 | 50,1,2,140,233,0,1,163,0,0.6,1,1,3,0 191 | 41,1,0,110,172,0,0,158,0,0,2,0,3,0 192 | 51,0,0,130,305,0,1,142,1,1.2,1,0,3,0 193 | 58,1,0,128,216,0,0,131,1,2.2,1,3,3,0 194 | 54,1,0,120,188,0,1,113,0,1.4,1,1,3,0 195 | 60,1,0,145,282,0,0,142,1,2.8,1,2,3,0 196 | 60,1,2,140,185,0,0,155,0,3,1,0,2,0 197 | 59,1,0,170,326,0,0,140,1,3.4,0,0,3,0 198 | 46,1,2,150,231,0,1,147,0,3.6,1,0,2,0 199 | 67,1,0,125,254,1,1,163,0,0.2,1,2,3,0 200 | 62,1,0,120,267,0,1,99,1,1.8,1,2,3,0 201 | 65,1,0,110,248,0,0,158,0,0.6,2,2,1,0 202 | 44,1,0,110,197,0,0,177,0,0,2,1,2,0 203 | 60,1,0,125,258,0,0,141,1,2.8,1,1,3,0 204 | 58,1,0,150,270,0,0,111,1,0.8,2,0,3,0 205 | 68,1,2,180,274,1,0,150,1,1.6,1,0,3,0 206 | 62,0,0,160,164,0,0,145,0,6.2,0,3,3,0 207 | 52,1,0,128,255,0,1,161,1,0,2,1,3,0 208 | 59,1,0,110,239,0,0,142,1,1.2,1,1,3,0 209 | 60,0,0,150,258,0,0,157,0,2.6,1,2,3,0 210 | 49,1,2,120,188,0,1,139,0,2,1,3,3,0 211 | 59,1,0,140,177,0,1,162,1,0,2,1,3,0 212 | 57,1,2,128,229,0,0,150,0,0.4,1,1,3,0 213 | 61,1,0,120,260,0,1,140,1,3.6,1,1,3,0 214 | 39,1,0,118,219,0,1,140,0,1.2,1,0,3,0 215 | 61,0,0,145,307,0,0,146,1,1,1,0,3,0 216 | 56,1,0,125,249,1,0,144,1,1.2,1,1,2,0 217 | 43,0,0,132,341,1,0,136,1,3,1,0,3,0 218 | 62,0,2,130,263,0,1,97,0,1.2,1,1,3,0 219 | 63,1,0,130,330,1,0,132,1,1.8,2,3,3,0 220 | 65,1,0,135,254,0,0,127,0,2.8,1,1,3,0 221 | 48,1,0,130,256,1,0,150,1,0,2,2,3,0 222 | 63,0,0,150,407,0,0,154,0,4,1,3,3,0 223 | 55,1,0,140,217,0,1,111,1,5.6,0,0,3,0 224 | 65,1,3,138,282,1,0,174,0,1.4,1,1,2,0 225 | 56,0,0,200,288,1,0,133,1,4,0,2,3,0 226 | 54,1,0,110,239,0,1,126,1,2.8,1,1,3,0 227 | 70,1,0,145,174,0,1,125,1,2.6,0,0,3,0 228 | 62,1,1,120,281,0,0,103,0,1.4,1,1,3,0 229 | 35,1,0,120,198,0,1,130,1,1.6,1,0,3,0 230 | 59,1,3,170,288,0,0,159,0,0.2,1,0,3,0 231 | 64,1,2,125,309,0,1,131,1,1.8,1,0,3,0 232 | 47,1,2,108,243,0,1,152,0,0,2,0,2,0 233 | 57,1,0,165,289,1,0,124,0,1,1,3,3,0 234 | 55,1,0,160,289,0,0,145,1,0.8,1,1,3,0 235 | 64,1,0,120,246,0,0,96,1,2.2,0,1,2,0 236 | 70,1,0,130,322,0,0,109,0,2.4,1,3,2,0 237 | 51,1,0,140,299,0,1,173,1,1.6,2,0,3,0 238 | 58,1,0,125,300,0,0,171,0,0,2,2,3,0 239 | 60,1,0,140,293,0,0,170,0,1.2,1,2,3,0 240 | 77,1,0,125,304,0,0,162,1,0,2,3,2,0 241 | 35,1,0,126,282,0,0,156,1,0,2,0,3,0 242 | 70,1,2,160,269,0,1,112,1,2.9,1,1,3,0 243 | 59,0,0,174,249,0,1,143,1,0,1,0,2,0 244 | 64,1,0,145,212,0,0,132,0,2,1,2,1,0 245 | 57,1,0,152,274,0,1,88,1,1.2,1,1,3,0 246 | 56,1,0,132,184,0,0,105,1,2.1,1,1,1,0 247 | 48,1,0,124,274,0,0,166,0,0.5,1,0,3,0 248 | 56,0,0,134,409,0,0,150,1,1.9,1,2,3,0 249 | 66,1,1,160,246,0,1,120,1,0,1,3,1,0 250 | 54,1,1,192,283,0,0,195,0,0,2,1,3,0 251 | 69,1,2,140,254,0,0,146,0,2,1,3,3,0 252 | 51,1,0,140,298,0,1,122,1,4.2,1,3,3,0 253 | 43,1,0,132,247,1,0,143,1,0.1,1,4,3,0 254 | 62,0,0,138,294,1,1,106,0,1.9,1,3,2,0 255 | 67,1,0,100,299,0,0,125,1,0.9,1,2,2,0 256 | 59,1,3,160,273,0,0,125,0,0,2,0,2,0 257 | 45,1,0,142,309,0,0,147,1,0,1,3,3,0 258 | 58,1,0,128,259,0,0,130,1,3,1,2,3,0 259 | 50,1,0,144,200,0,0,126,1,0.9,1,0,3,0 260 | 62,0,0,150,244,0,1,154,1,1.4,1,0,2,0 261 | 38,1,3,120,231,0,1,182,1,3.8,1,0,3,0 262 | 66,0,0,178,228,1,1,165,1,1,1,2,3,0 263 | 52,1,0,112,230,0,1,160,0,0,2,1,2,0 264 | 53,1,0,123,282,0,1,95,1,2,1,2,3,0 265 | 63,0,0,108,269,0,1,169,1,1.8,1,2,2,0 266 | 54,1,0,110,206,0,0,108,1,0,1,1,2,0 267 | 66,1,0,112,212,0,0,132,1,0.1,2,1,2,0 268 | 55,0,0,180,327,0,2,117,1,3.4,1,0,2,0 269 | 49,1,2,118,149,0,0,126,0,0.8,2,3,2,0 270 | 54,1,0,122,286,0,0,116,1,3.2,1,2,2,0 271 | 56,1,0,130,283,1,0,103,1,1.6,0,0,3,0 272 | 46,1,0,120,249,0,0,144,0,0.8,2,0,3,0 273 | 61,1,3,134,234,0,1,145,0,2.6,1,2,2,0 274 | 67,1,0,120,237,0,1,71,0,1,1,0,2,0 275 | 58,1,0,100,234,0,1,156,0,0.1,2,1,3,0 276 | 47,1,0,110,275,0,0,118,1,1,1,1,2,0 277 | 52,1,0,125,212,0,1,168,0,1,2,2,3,0 278 | 58,1,0,146,218,0,1,105,0,2,1,1,3,0 279 | 57,1,1,124,261,0,1,141,0,0.3,2,0,3,0 280 | 58,0,1,136,319,1,0,152,0,0,2,2,2,0 281 | 61,1,0,138,166,0,0,125,1,3.6,1,1,2,0 282 | 42,1,0,136,315,0,1,125,1,1.8,1,0,1,0 283 | 52,1,0,128,204,1,1,156,1,1,1,0,0,0 284 | 59,1,2,126,218,1,1,134,0,2.2,1,1,1,0 285 | 40,1,0,152,223,0,1,181,0,0,2,0,3,0 286 | 61,1,0,140,207,0,0,138,1,1.9,2,1,3,0 287 | 46,1,0,140,311,0,1,120,1,1.8,1,2,3,0 288 | 59,1,3,134,204,0,1,162,0,0.8,2,2,2,0 289 | 57,1,1,154,232,0,0,164,0,0,2,1,2,0 290 | 57,1,0,110,335,0,1,143,1,3,1,1,3,0 291 | 55,0,0,128,205,0,2,130,1,2,1,1,3,0 292 | 61,1,0,148,203,0,1,161,0,0,2,1,3,0 293 | 58,1,0,114,318,0,2,140,0,4.4,0,3,1,0 294 | 58,0,0,170,225,1,0,146,1,2.8,1,2,1,0 295 | 67,1,2,152,212,0,0,150,0,0.8,1,0,3,0 296 | 44,1,0,120,169,0,1,144,1,2.8,0,0,1,0 297 | 63,1,0,140,187,0,0,144,1,4,2,2,3,0 298 | 63,0,0,124,197,0,1,136,1,0,1,0,2,0 299 | 59,1,0,164,176,1,0,90,0,1,1,2,1,0 300 | 57,0,0,140,241,0,1,123,1,0.2,1,0,3,0 301 | 45,1,3,110,264,0,1,132,0,1.2,1,0,3,0 302 | 68,1,0,144,193,1,1,141,0,3.4,1,2,3,0 303 | 57,1,0,130,131,0,1,115,1,1.2,1,1,3,0 304 | 57,0,1,130,236,0,0,174,0,0,1,1,2,0 305 | -------------------------------------------------------------------------------- /Class9/figures/fig01_偏差和方差都低是我们对模型的追求.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig01_偏差和方差都低是我们对模型的追求.jpg -------------------------------------------------------------------------------- /Class9/figures/fig02_损失-偏差-方差与模型复杂度之间的关系.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig02_损失-偏差-方差与模型复杂度之间的关系.jpg -------------------------------------------------------------------------------- /Class9/figures/fig03_损失-偏差-方差与模型复杂度之间的关系.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig03_损失-偏差-方差与模型复杂度之间的关系.jpg -------------------------------------------------------------------------------- /Class9/figures/fig04_函数复杂度的提升拟合能力的增强会带来高方差.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig04_函数复杂度的提升拟合能力的增强会带来高方差.jpg -------------------------------------------------------------------------------- /Class9/figures/fig05_有放回的随机抽取数据样本.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig05_有放回的随机抽取数据样本.jpg -------------------------------------------------------------------------------- /Class9/figures/fig06_Bagging的过程.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig06_Bagging的过程.jpg -------------------------------------------------------------------------------- /Class9/figures/fig07_4种算法的比较.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig07_4种算法的比较.png -------------------------------------------------------------------------------- /Class9/figures/fig08_Boosting示意图.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vuean/Zero-Basic-Machine-Learning/c5a44fa0fd964c11aa87c17259d1c1f5f6581946/Class9/figures/fig08_Boosting示意图.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Zero-Basic-Machine-Learning 2 | 零基础学机器学习 - 黄佳 - 人民邮电出版社 3 | 4 | ## 前言 5 | 6 | 机器学习书籍很多,其中的知识点也多,体系庞大繁杂,对于初学者来说,一不小心就会被淹没在浩瀚的知识海洋中,觉得学习难以继续。本书给出了一个机器学习入门路线图,从一个零基础的初学者(主角小冰)的视角出发,让读者跟着她一步一步向前,循序渐进地学习知识;老师咖哥将即时解答其各种疑问,手把手地帮忙扫清路途中的障碍,轻松地把大家引入机器学习的知识殿堂。 7 | 8 | 本书还有以下特色。 9 | 10 | - 本书针对的是入门级的读者,学起来非常简单,读起来风格轻松,还略有幽默,一扫机器学习给人带来的晦涩难懂、都是高深算法的印象。其实入门机器学习,并不一定马上就需要研究艰深的算法,那样只会把初学者吓跑。 11 | 12 | - 虽然本书行文风格是轻松幽默的,但是内容很实用,非常强调实战。书中的案例大多源自真实项目,不仅接地气,还便于动手操作。 13 | 14 | - 覆盖面广,包括机器学习、深度学习和强化学习的基础内容。 15 | 16 | - 呈现的形式灵活。所有机器学习内容在书中都以课程、对话、答疑和练习的形式呈现。 17 | 18 | 本书的具体内容包括以下部分。 19 | 20 | - 机器学习的基本原理。 21 | 22 | - 机器学习相关的数学和Python基础知识。 23 | 24 | - 机器学习算法及实战案例。·深度学习原理及实战案例。·强化学习算法及简单实战。 25 | 26 | ## 目录 27 | 28 | [第1课](https://github.com/Vuean/Zero-Basic-Machine-Learning/blob/main/Class1/README.md) 29 | 30 | [第2课](https://github.com/Vuean/Zero-Basic-Machine-Learning/blob/main/Class2/README.md) 31 | 32 | [第3课]() 33 | 34 | [第4课]() 35 | 36 | [第5课]() 37 | 38 | [第6课]() 39 | 40 | [第7课]() 41 | 42 | [第8课]() 43 | 44 | [第9课]() 45 | 46 | [第10课]() 47 | 48 | [第11课]() 49 | --------------------------------------------------------------------------------