├── .gitignore ├── README.md ├── aws.md ├── chapter10_Natural-Language-Process ├── char_rnn │ ├── README.md │ ├── char_rnn.ipynb │ ├── config.py │ ├── data │ │ ├── __init__.py │ │ └── dataset.py │ ├── dataset │ │ ├── jay.txt │ │ └── poetry.txt │ ├── main.py │ └── models │ │ ├── __init__.py │ │ └── char_rnn.py └── seq2seq-translation │ ├── README.md │ ├── dataset.py │ ├── evaluate.py │ ├── model │ ├── __init__.py │ └── seq2seq.py │ └── train.py ├── chapter2_PyTorch-Basics ├── PyTorch-introduction.ipynb ├── Tensor-and-Variable.ipynb ├── autograd.ipynb └── dynamic-graph.ipynb ├── chapter3_NN ├── bp.ipynb ├── deep-nn.ipynb ├── linear-regression-gradient-descend.ipynb ├── logistic-regression │ ├── data.txt │ └── logistic-regression.ipynb ├── nn-sequential-module.ipynb ├── optimizer │ ├── adadelta.ipynb │ ├── adagrad.ipynb │ ├── adam.ipynb │ ├── momentum.ipynb │ ├── rmsprop.ipynb │ └── sgd.ipynb └── param_initialize.ipynb ├── chapter4_CNN ├── basic_conv.ipynb ├── batch-normalization.ipynb ├── cat.png ├── data-augumentation.ipynb ├── densenet.ipynb ├── googlenet.ipynb ├── lr-decay.ipynb ├── regularization.ipynb ├── resnet.ipynb ├── utils.py └── vgg.ipynb ├── chapter5_RNN ├── nlp │ ├── n-gram.ipynb │ ├── seq-lstm.ipynb │ └── word-embedding.ipynb ├── pytorch-rnn.ipynb ├── rnn-for-image.ipynb ├── time-series │ ├── data.csv │ └── lstm-time-series.ipynb └── utils.py ├── chapter6_GAN ├── autoencoder.ipynb ├── gan.ipynb └── vae.ipynb ├── chapter7_RL ├── dqn.ipynb ├── dqn.py ├── mount-car.py ├── open_ai_gym.ipynb └── q-learning-intro.ipynb ├── chapter8_PyTorch-Advances ├── data-io.ipynb ├── example_data │ ├── image │ │ ├── class_1 │ │ │ ├── 1.png │ │ │ ├── 2.png │ │ │ └── 3.png │ │ ├── class_2 │ │ │ ├── 10.png │ │ │ ├── 11.png │ │ │ └── 12.png │ │ └── class_3 │ │ │ ├── 16.png │ │ │ ├── 17.png │ │ │ └── 18.png │ └── train.txt └── tensorboard.ipynb ├── chapter9_Computer-Vision ├── Deep-Dream │ ├── README.md │ ├── backward │ │ └── backward.py │ ├── deepdream.py │ ├── guide_image │ │ ├── flower.jpg │ │ ├── input.png │ │ └── kitten.jpg │ ├── resnet.py │ ├── show_image.ipynb │ ├── sky.jpg │ └── util.py ├── fine_tune │ ├── READMD.md │ ├── config.py │ ├── fine-tune.ipynb │ ├── get_data.sh │ └── main.py ├── kaggle_dog_vs_cat │ ├── README.md │ └── model │ │ ├── dataset.py │ │ ├── feature_extraction.py │ │ ├── feature_train.py │ │ ├── fix_train.py │ │ ├── net.py │ │ └── process data.ipynb ├── neural-transfer │ ├── README.md │ ├── build_model.py │ ├── demo.ipynb │ ├── load_img.py │ ├── loss.py │ ├── picture │ │ ├── content.png │ │ ├── saved_picture.png │ │ └── style.png │ └── run_code.py └── segmentation │ ├── README.md │ ├── config.py │ ├── data │ ├── __init__.py │ └── voc.py │ ├── fcn.ipynb │ ├── get_data.sh │ ├── main.py │ └── models │ ├── __init__.py │ └── fcn.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | .idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 深度学习入门之PyTorch 2 | 3 | Learn Deep Learning with PyTorch 4 | 5 | 非常感谢您能够购买此书,这个github repository包含有[深度学习入门之PyTorch](https://item.jd.com/17915495606.html)的实例代码。由于本人水平有限,在写此书的时候参考了一些网上的资料,在这里对他们表示敬意。由于深度学习的技术在飞速的发展,同时PyTorch也在不断更新,且本人在完成此书的时候也有诸多领域没有涉及,所以这个repository会不断更新作为购买次书的一个后续服务,希望我能够在您深度学习的入门道路上提供绵薄之力。 6 | 7 | **注意:由于PyTorch版本更迭,书中的代码可能会出现bug,所以一切代码以该github中的为主。** 8 | 9 | ![image.png](http://upload-images.jianshu.io/upload_images/3623720-7cc3a383f486d157.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 10 | 11 | ## 配置环境 12 | 13 | 书中已经详细给出了如何基于Anaconda配置python环境,以及PyTorch的安装,如果你使用自己的电脑,并且有Nvidia的显卡,那么你可以愉快地进入深度学习的世界了,如果你没有Nvidia的显卡,那么我们需要一个云计算的平台来帮助我们学习深度学习之旅。[如何配置aws计算平台](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/aws.md) 14 | 15 | 16 | **以下的课程目录和书中目录有出入,因为内容正在更新到第二版,第二版即将上线!!** 17 | ## 课程目录 18 | ### part1: 深度学习基础 19 | - Chapter 2: PyTorch基础 20 | - [Tensor和Variable](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter2_PyTorch-Basics/Tensor-and-Variable.ipynb) 21 | - [自动求导机制](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter2_PyTorch-Basics/autograd.ipynb) 22 | - [动态图与静态图](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter2_PyTorch-Basics/dynamic-graph.ipynb) 23 | 24 | 25 | - Chapter 3: 神经网络 26 | - [线性模型与梯度下降](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/linear-regression-gradient-descend.ipynb) 27 | - [Logistic 回归与优化器](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/logistic-regression/logistic-regression.ipynb) 28 | - [多层神经网络,Sequential 和 Module](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/nn-sequential-module.ipynb) 29 | - [深层神经网络](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/deep-nn.ipynb) 30 | - [参数初始化方法](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/param_initialize.ipynb) 31 | - 优化算法 32 | - [SGD](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/sgd.ipynb) 33 | - [动量法](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/momentum.ipynb) 34 | - [Adagrad](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/adagrad.ipynb) 35 | - [RMSProp](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/rmsprop.ipynb) 36 | - [Adadelta](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/adadelta.ipynb) 37 | - [Adam](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/adam.ipynb) 38 | - Chapter 4: 卷积神经网络 39 | - [PyTorch 中的卷积模块](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/basic_conv.ipynb) 40 | - [批标准化,batch normalization](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/batch-normalization.ipynb) 41 | - [使用重复元素的深度网络,VGG](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/vgg.ipynb) 42 | - [更加丰富化结构的网络,GoogLeNet](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/googlenet.ipynb) 43 | - [深度残差网络,ResNet](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/resnet.ipynb) 44 | - [稠密连接的卷积网络,DenseNet](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/densenet.ipynb) 45 | - 更好的训练卷积网络 46 | - [数据增强](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/data-augumentation.ipynb) 47 | - [正则化](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/regularization.ipynb) 48 | - [学习率衰减](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/lr-decay.ipynb) 49 | - Chapter 5: 循环神经网络 50 | - [循环神经网络模块:LSTM 和 GRU](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/pytorch-rnn.ipynb) 51 | - [使用 RNN 进行图像分类](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/rnn-for-image.ipynb) 52 | - [使用 RNN 进行时间序列分析](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/time-series/lstm-time-series.ipynb) 53 | - 自然语言处理的应用: 54 | - [Word Embedding](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/nlp/word-embedding.ipynb) 55 | - [N-Gram 模型](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/nlp/n-gram.ipynb) 56 | - [Seq-LSTM 做词性预测](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/nlp/seq-lstm.ipynb) 57 | - Chapter 6: 生成对抗网络 58 | - [自动编码器](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter6_GAN/autoencoder.ipynb) 59 | - [变分自动编码器](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter6_GAN/vae.ipynb) 60 | - [生成对抗网络](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter6_GAN/gan.ipynb) 61 | - 深度卷积对抗网络 (DCGANs) 生成人脸 62 | - Chapter 7: 深度强化学习 63 | - [Q Learning](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter7_RL/q-learning-intro.ipynb) 64 | - [Open AI gym](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter7_RL/open_ai_gym.ipynb) 65 | - [Deep Q-networks](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter7_RL/dqn.ipynb) 66 | - Chapter 8: PyTorch高级 67 | - [tensorboard 可视化](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter8_PyTorch-Advances/tensorboard.ipynb) 68 | - [灵活的数据读取介绍](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter8_PyTorch-Advances/data-io.ipynb) 69 | - autograd.function 的介绍 70 | - 数据并行和多 GPU 71 | - 使用 ONNX 转化为 Caffe2 模型 72 | - 如何部署训练好的神经网络 73 | - 打造属于自己的 PyTorch 的使用习惯 74 | 75 | ### part2: 深度学习的应用 76 | - Chapter 9: 计算机视觉 77 | - [Fine-tuning: 通过微调进行迁移学习](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter9_Computer-Vision/fine_tune/) 78 | - kaggle初体验:猫狗大战 79 | - [语义分割: 通过 FCN 实现像素级别的分类](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/tree/master/chapter9_Computer-Vision/segmentation) 80 | - Pixel to Pixel 生成对抗网络 81 | - Neural Transfer: 通过卷积网络实现风格迁移 82 | - Deep Dream: 探索卷积网络眼中的世界 83 | 84 | - Chapter 10: 自然语言处理 85 | - [Char RNN 实现文本生成](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter10_Natural-Language-Process/char_rnn/) 86 | - Image Caption: 实现图片字幕生成 87 | - seq2seq 实现机器翻译 88 | - cnn + rnn + attention 实现文本识别 89 | 90 | ## 一些别的资源 91 | 92 | 关于深度学习的一些公开课程以及学习资源,可以参考我的这个[repository](https://github.com/SherlockLiao/Roadmap-of-DL-and-ML) 93 | 94 | 可以关注我的[知乎专栏](https://zhuanlan.zhihu.com/c_94953554)和[博客](https://sherlockliao.github.io/),会经常分享一些深度学习的文章 95 | 96 | 关于PyTorch的资源 97 | 98 | 我的github repo [pytorch-beginner](https://github.com/SherlockLiao/pytorch-beginner) 99 | 100 | [pytorch-tutorial](https://github.com/yunjey/pytorch-tutorial) 101 | 102 | [the-incredible-pytorch](https://github.com/ritchieng/the-incredible-pytorch) 103 | 104 | [practical-pytorch](https://github.com/spro/practical-pytorch) 105 | 106 | [PyTorchZeroToAll](https://github.com/hunkim/PyTorchZeroToAll) 107 | 108 | [Awesome-pytorch-list](https://github.com/bharathgs/Awesome-pytorch-list) 109 | 110 | 111 | 112 | ## Acknowledgement 113 | 114 | 本书的第二版内容其中一些部分参考了 mxnet gluon 的中文教程,[通过MXNet/Gluon来动手学习深度学习](https://zh.gluon.ai/)。 115 | 116 | Gluon 是一个和 PyTorch 非常相似的框架,非常简单、易上手,推荐大家去学习一下,也安利一下 gluon 的中文课程,全中文授课,有视频,有代码练习,可以说是最全面的中文深度学习教程。 -------------------------------------------------------------------------------- /aws.md: -------------------------------------------------------------------------------- 1 | ## 配置AWS云计算平台 2 | 3 | 这是一个帮助文档,我们会一步一步讲解如何从0开始在AWS上申请CPU或者GPU机器进行使用。 4 | 5 | 6 | 7 | ### 申请账号并登陆 8 | 9 | 首先我们需要在[aws官网](https://aws.amazon.com/)上面注册账号,这里需要绑定一张信用卡,所以没有master或者VISA卡的同学可以开通一张,实在不熟悉的同学可以搜索一下"如何注册aws账号"。 10 | 11 | 然后我们进入到控制面板,可以看到下面的图片,点击"EC2" 12 | 13 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xn33e4cj31kw0wo11u.jpg) 14 | 15 | 16 | 然后我们就能够进入到下一个界面 17 | 18 | 19 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xoznbz3j31kw0j7dmu.jpg) 20 | 21 | 22 | 这个界面只需要注意三个地方,一个是右上角的地区,需要选择一个离你比较近的地区,整个亚太地区可以选择韩国,日本,新加坡和孟买,需要注意的是不同的地区实例价格是不同的,如果你有vpn,那么推荐选择俄勒冈,因为这个地区最便宜,比亚太地区便宜了4到5倍。然后是左边的一个方框"限制",如果你申请CPU的计算实例,那么不用管,如果你要申请GPU计算实例,就需要点击"限制"进行申请,因为GPU实例会产生费用,亚马逊需要和你确认这个事情,一般需要两到三个工作日。 23 | 24 | 接下面就可以开始启动实例了,点击中间的红框即可开始。 25 | 26 | 27 | ### 申请实例并启动 28 | 29 | 30 | ![](https://ws2.sinaimg.cn/large/006tNc79gy1fo7xpjsxwlj31kw0q8dp2.jpg) 31 | 32 | 33 | 进入上面的界面之后,需要选择操作系统,这里我们一般都选择linux系统,当然还有很多社区AMI,也就是别人配置好的系统,这里先暂时不用管,我们一般就在上面两个红框中选择一个,第一个是一个空的系统,什么都没有,第二个是一个深度学习的系统,装好了CUDA以及很多框架,可以选择这一个,如果选择这个,那么需要的磁盘空间可能更大。 34 | 35 | 36 | 37 | 点击选择之后便可以进入下面的界面。 38 | 39 | ![](https://ws4.sinaimg.cn/large/006tNc79gy1fo7xqq958cj31kw0ki112.jpg) 40 | 41 | 42 | 这里需要选择实例类型,如果新注册的用户可以免费使用一年的t2.mirco实例,这个实例是没有GPU的,如果要使用GPU的实例,那么从上面的实例类型中选择GPU计算,便可以快速跳转到下面这里。 43 | 44 | 45 | ![](https://ws4.sinaimg.cn/large/006tNc79gy1fo7xr45wmkj31kw0nktgt.jpg) 46 | 47 | 这里有很多个实例,一般我们就选第一个p2.xlarge,这个实例包含一个Nvidia k40GPU,后面有8块GPU和16块GPU的版本,当然费用也更高。除此之外,下面还有 p3.2xlarge,这里面包含更新的 GPU,速度也会快很多,当然价格也会贵一些,有一点需要注意,选择 p2.xlarge 只能安装 cuda8,而选择 p3.2xlarge 则可以安装 cuda9。选择完成之后我们可以进入下一步配置实例信息。 48 | 49 | 50 | ![](https://ws2.sinaimg.cn/large/006tNc79gy1fo7xrl5bi9j31kw08j77v.jpg) 51 | 52 | 这里我们只需要关注根目录的大小,也就是云端计算平台的硬盘大小,因为我们需要存放数据集,需要安装框架,所以需要大一点,新注册的用户可以免费试用30G的存储,我们可以设置为40G,一般费用比较便宜。然后点击审核和启动实例。 53 | 54 | 55 | 56 | 接着进入到下面这个界面,我们可以点击右下角的启动来启动实例了。 57 | 58 | 59 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xs8wl8hj31kw0sp13n.jpg) 60 | 61 | 62 | 63 | 64 | 接着会跳出一个对话框如下。 65 | 66 | 67 | ![9.png](http://upload-images.jianshu.io/upload_images/3623720-4a6cd6ff1321e5fb.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 68 | 69 | 70 | 这里需要你创建一个密钥对,因为现在aws不支持密码登录,所以需要密钥登录,你在名称那一栏取一个名字,然后点击**下载密钥对**就可以了。 71 | 72 | 73 | 74 | 然后你就可以看到你的实例正在启动,点击下图红框的地方进入具体的实例位置。 75 | 76 | 77 | ![](https://ws3.sinaimg.cn/large/006tNc79gy1fo7xtcjn2fj31kw0c177o.jpg) 78 | 79 | 80 | 81 | 然后可以进入到下面的界面,可以看到实例正在启动,右键点击实例这一栏,然后点击连接。 82 | 83 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xtys9mej31kw0iu422.jpg) 84 | 85 | 86 | 87 | 88 | 接着便会出来下面的窗口,按着这个窗口的操作,如果使用windows系统,需要PuTTY连接,因为我的电脑是mac,所以这个部分没有尝试。在mac下打开终端,先进入刚才存放密钥的位置,然后输出`chmod 400 yourkey.pem`,这里我的密钥是'liao.pem',这个命令只需要第一次连接的时候输入,后面连接就不用管了,然后通过下面的命令连到你的远程linux服务器。 89 | 90 | 91 | ![12.png](http://upload-images.jianshu.io/upload_images/3623720-1c476e3770c0eb63.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 92 | 93 | 94 | 95 | 96 | 比如,这是我在终端中进行的输入,第一次连接会弹出一个问题,输入yes即可。 97 | 98 | ![13.png](http://upload-images.jianshu.io/upload_images/3623720-825156b98dba8b84.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 99 | 100 | 101 | 102 | 然后我们便进入到了系统,可以看到,红框就表示连接的远程服务器。 103 | 104 | ![14.png](http://upload-images.jianshu.io/upload_images/3623720-8a19f59377d88055.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 105 | 106 | 107 | 108 | 109 | ### 安装Anaconda 110 | 111 | 下面简单演示一下如何在远程环境下安装anaconda,这里需要一点bash命令的基础。首先使用自己的电脑进入到[Anaconda的官网](https://www.anaconda.com/download/#linux),然后右键点击Download,保存链接地址。 112 | 113 | ![15.png](http://upload-images.jianshu.io/upload_images/3623720-54ba5def9981eb27.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 114 | 115 | 116 | 117 | 118 | 然后在刚刚连接的远程服务器上面输入 119 | 120 | ```bash 121 | wget https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh 122 | ``` 123 | 124 | 后面是刚刚复制的地址,然后输入回车,就开始下载了,下载完成之后是一个后缀为.sh的文件,输入`sudo sh 文件名.sh`就可以开始安装了。 125 | 126 | 127 | ![16.png](http://upload-images.jianshu.io/upload_images/3623720-709e1ab46eb204a2.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 128 | 129 | 130 | 131 | 132 | 安装完成之后,通过下面的命令配置环境变量。 133 | 134 | ```bash 135 | echo 'export PATH="~/anaconda3/bin:$PATH"' >> ~/.bashrc 136 | 137 | source ~/.bashrc 138 | ``` 139 | 140 | 这样便完成了远程Anaconda的安装。 141 | 142 | 143 | 144 | ### 安装CUDA 145 | 146 | 【注意】只有CPU的实例可以跳过步骤。 147 | 148 | 我们去Nvidia官网下载CUDA并安装。选择正确的版本并获取下载地址。 149 | 150 | 【注意】目前官方默认是 cuda9,如果选择的是 p2.xlarge,则需要安装 cuda8,可以使用下面的命令来下载并安装 cuda8 151 | 152 | ```bash 153 | wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run 154 | sudo sh cuda_8.0.61_375.26_linux-run 155 | ``` 156 | 157 | 158 | 159 | ![](https://github.com/mli/gluon-tutorials-zh/blob/master/img/cuda.png?raw=true) 160 | 161 | 然后使用`wget`下载并且安装 cuda9 162 | 163 | ```bash 164 | wget https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_387.26_linux 165 | ``` 166 | 167 | 这里需要回答几个问题。 168 | 169 | ``` 170 | accept/decline/quit: accept 171 | Install NVIDIA Accelerated Graphics Driver for Linux-x86_64 375.26? 172 | (y)es/(n)o/(q)uit: y 173 | Do you want to install the OpenGL libraries? 174 | (y)es/(n)o/(q)uit [ default is yes ]: y 175 | Do you want to run nvidia-xconfig? 176 | (y)es/(n)o/(q)uit [ default is no ]: n 177 | Install the CUDA 8.0 Toolkit? 178 | (y)es/(n)o/(q)uit: y 179 | Enter Toolkit Location 180 | [ default is /usr/local/cuda-8.0 ]: 181 | Do you want to install a symbolic link at /usr/local/cuda? 182 | (y)es/(n)o/(q)uit: y 183 | Install the CUDA 8.0 Samples? 184 | (y)es/(n)o/(q)uit: n 185 | ``` 186 | 187 | 安装完成后运行 188 | 189 | ```bash 190 | nvidia-smi 191 | ``` 192 | 193 | 就可以看到这个实例的GPU了。最后将CUDA加入到library path方便之后安装的库找到它。 194 | 195 | cuda 8 196 | 197 | ```bash 198 | echo "export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/cuda-8.0/lib64" >>.bashrc 199 | ``` 200 | 201 | cuda 9 202 | 203 | ```bash 204 | echo "export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/cuda-9.1/lib64" >>.bashrc 205 | ``` 206 | 207 | 208 | 209 | ### 运行Jupyter notebook 210 | 211 | 接下来在远程终端运行Jupyter notebook。 212 | 213 | ```bash 214 | jupyter notebook 215 | ``` 216 | 217 | 如果成功的话会看到类似的输出 218 | 219 | ![](https://github.com/mli/gluon-tutorials-zh/blob/master/img/jupyter.png?raw=true) 220 | 221 | 因为我们的实例没有暴露8888端口,所以我们可以通过ssh映射到本地 222 | 223 | ```bash 224 | ssh -L8888:locallhost:8888 ubuntu@your-ip.amazonaws.com 225 | ``` 226 | 227 | 然后把jupyter log里的URL复制到本地浏览器就行了。 228 | 229 | 【注意】如果本地运行了Jupyter notebook,那么8888端口就可能被占用了。要么关掉本地jupyter,要么把端口映射改成别的。例如,假设aws使用默认8888端口,我们可以通过ssh映射到本地8889端口: 230 | 231 | ```bash 232 | ssh -N -f -L localhost:8889:localhost:8888 ubuntu@your-ip.amazonaws.com 233 | ``` 234 | 235 | 然后在本地浏览器打开localhost:8889,这时会提示需要token值。接下来,我们将aws上jupyter log里的token值(例如上图里:...localhost:8888/?token=`token值`)复制粘贴即可。 236 | 237 | 238 | 239 | ### 后续 240 | 241 | 因为云服务按时间计费,通常我们不用时需要把样例关掉,到下次要用时再开。 242 | 243 | 244 | 245 | ![17.png](http://upload-images.jianshu.io/upload_images/3623720-6e4fb6cb2d39d66f.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 246 | 247 | 248 | 如果是停掉(Stop),下次可以直接继续用,但硬盘空间会计费。如果是终结(Termination),我们一般会先把操作系统做镜像,下次开始时直接使用镜像(AMI)(上面的教程使用了Ubuntu 16.06 AMI)就行了,不需要再把上面流程走一次。 249 | 250 | 251 | ![18.png](http://upload-images.jianshu.io/upload_images/3623720-e4aac81d991e1a28.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240) 252 | 253 | 254 | 255 | 256 | **云虽然很方便,但是不便宜,所以在使用完GPU实例之后一定要记得关掉。** 257 | 258 | 259 | 260 | 上面就是整个的配置流程,有问题欢迎提出issue。 -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/README.md: -------------------------------------------------------------------------------- 1 | # Char-RNN-PyTorch 2 | 使用字符级别的RNN进行文本生成,使用PyTorch框架。[Gluon实现](https://github.com/SherlockLiao/Char-RNN-Gluon) 3 | 4 | ## Requirements 5 | [PyTorch 0.3](http://pytorch.org/) 6 | 7 | [MxTorch](https://github.com/SherlockLiao/mxtorch) 8 | 9 | [tensorboardX](https://github.com/lanpa/tensorboard-pytorch) 10 | 11 | 按照 pytorch 官网安装 pytorch,将 mxtorch 下载下来,放到根目录,安装 tensorboardX 实现 tensorboard 可视化 12 | 13 | ```bash 14 | \Char-RNN-PyTorch 15 | \mxtorch 16 | \data 17 | \dataset 18 | \models 19 | config.py 20 | main.py 21 | ``` 22 | 23 | 24 | 25 | ### 训练模型 26 | 27 | 所有的配置文件都放在 config.py 里面,通过下面的代码来训练模型 28 | 29 | ```bash 30 | python main.py train 31 | ``` 32 | 33 | 也可以在终端修改配置 34 | 35 | ```bash 36 | python main.py train \ 37 | --txt='./dataset/poetry.txt' \ # 训练用的txt文本 38 | --batch=128 \ # batch_size 39 | --max_epoch=300 \ 40 | --len=30 \ # 输入RNN的序列长度 41 | --max_vocab=5000 \ # 最大的字符数量 42 | --embed_dim=512 \ # 词向量的维度 43 | --hidden_size=512 \ # 网络的输出维度 44 | --num_layers=2 \ # RNN的层数 45 | --dropout=0.5 46 | ``` 47 | 48 | 如果希望使用训练好的网络进行文本生成,使用下面的代码 49 | 50 | ```bash 51 | python main.py predict \ 52 | --begin='天青色等烟雨' \ # 生成文本的开始,可以是一个字符,也可以一段话 53 | --predict_len=100 \ # 希望生成文本的长度 54 | --load_model='./checkpoints/CharRNN_best_model.pth' # 读取训练模型的位置 55 | ``` 56 | 57 | ## Result 58 | 如果使用古诗的数据集进行训练,可以得到下面的结果 59 | 60 | ```bash 61 | 天青色等烟雨翩 黄望堪魄弦夜 逐奏文明际天月辉 豪天明月天趣 天外何山重满 遥天明上天 心空游无拂天外空寂室叨 62 | ``` 63 | 64 | 如果使用周杰伦的歌词作为训练集,可以得到下面的结果 65 | 66 | ```bash 67 | 这感觉得可能 我这玻童来 城堡药比生对这些年风天 脚剧飘逐在尘里里步的路 麦缘日下一经经 听觉得远回白择 68 | ``` 69 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import warnings 7 | from pprint import pprint 8 | 9 | 10 | class DefaultConfig(object): 11 | model = 'CharRNN' 12 | 13 | # Dataset. 14 | txt = './dataset/poetry.txt' 15 | len = 20 16 | max_vocab = 8000 17 | begin = '天青色等烟雨' # begin word of text 18 | predict_len = 50 # predict length 19 | 20 | # Store result and save models. 21 | result_file = 'result.txt' 22 | save_file = './checkpoints/' 23 | save_freq = 30 # save model every N epochs 24 | save_best = True 25 | 26 | # Predict mode and generate contexts 27 | load_model = './checkpoints/CharRNN_best_model.pth' 28 | write_file = './write_context.txt' 29 | 30 | # Visualization parameters. 31 | vis_dir = './vis/' 32 | plot_freq = 100 # plot in tensorboard every N iterations 33 | 34 | # Model parameters. 35 | embed_dim = 512 36 | hidden_size = 512 37 | num_layers = 2 38 | dropout = 0.5 39 | 40 | # Model hyperparameters. 41 | use_gpu = True # use GPU or not 42 | ctx = 0 # running on which cuda device 43 | batch_size = 128 # batch size 44 | num_workers = 4 # how many workers for loading data 45 | max_epoch = 200 46 | lr = 1e-3 # initial learning rate 47 | weight_decay = 1e-4 48 | 49 | def _parse(self, kwargs): 50 | for k, v in kwargs.items(): 51 | if not hasattr(self, k): 52 | warnings.warn("Warning: opt has not attribut %s" % k) 53 | setattr(self, k, v) 54 | 55 | print('=========user config==========') 56 | pprint(self._state_dict()) 57 | print('============end===============') 58 | 59 | def _state_dict(self): 60 | return {k: getattr(self, k) for k, _ in DefaultConfig.__dict__.items() 61 | if not k.startswith('_')} 62 | 63 | 64 | opt = DefaultConfig() 65 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/data/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | from .dataset import TextConverter, TextDataset 7 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/data/dataset.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | 6 | This file is utils to convert text to index and create dataset to PyTorch training model. 7 | """ 8 | 9 | import numpy as np 10 | import torch 11 | 12 | 13 | class TextConverter(object): 14 | def __init__(self, text_path, max_vocab=5000): 15 | """Construct a text index converter. 16 | 17 | Args: 18 | text_path: txt file path. 19 | max_vocab: maximum number of words. 20 | """ 21 | 22 | with open(text_path, 'r') as f: 23 | text = f.read() 24 | text = text.replace('\n', ' ').replace('\r', ' ').replace(',', ' ').replace('。', ' ') 25 | vocab = set(text) 26 | # If the number of words is larger than limit, clip the words with minimum frequency. 27 | vocab_count = {} 28 | for word in vocab: 29 | vocab_count[word] = 0 30 | for word in text: 31 | vocab_count[word] += 1 32 | vocab_count_list = [] 33 | for word in vocab_count: 34 | vocab_count_list.append((word, vocab_count[word])) 35 | vocab_count_list.sort(key=lambda x: x[1], reverse=True) 36 | if len(vocab_count_list) > max_vocab: 37 | vocab_count_list = vocab_count_list[:max_vocab] 38 | vocab = [x[0] for x in vocab_count_list] 39 | self.vocab = vocab 40 | 41 | self.word_to_int_table = {c: i for i, c in enumerate(self.vocab)} 42 | self.int_to_word_table = dict(enumerate(self.vocab)) 43 | 44 | @property 45 | def vocab_size(self): 46 | return len(self.vocab) + 1 47 | 48 | def word_to_int(self, word): 49 | if word in self.word_to_int_table: 50 | return self.word_to_int_table[word] 51 | else: 52 | return len(self.vocab) 53 | 54 | def int_to_word(self, index): 55 | if index == len(self.vocab): 56 | return '' 57 | elif index < len(self.vocab): 58 | return self.int_to_word_table[index] 59 | else: 60 | raise Exception('Unknown index!') 61 | 62 | def text_to_arr(self, text): 63 | arr = [] 64 | for word in text: 65 | arr.append(self.word_to_int(word)) 66 | return np.array(arr) 67 | 68 | def arr_to_text(self, arr): 69 | words = [] 70 | for index in arr: 71 | words.append(self.int_to_word(index)) 72 | return "".join(words) 73 | 74 | 75 | class TextDataset(object): 76 | def __init__(self, text_path, n_step, arr_to_idx): 77 | 78 | with open(text_path, 'r') as f: 79 | text = f.read() 80 | text = text.replace('\n', ' ').replace('\r', ' ').replace(',', ' ').replace('。', ' ') 81 | num_seq = int(len(text) / n_step) 82 | self.num_seq = num_seq 83 | self.n_step = n_step 84 | # Clip more than maximum length. 85 | text = text[:num_seq * n_step] 86 | arr = arr_to_idx(text) 87 | arr = arr.reshape((num_seq, -1)) 88 | self.arr = torch.from_numpy(arr) 89 | 90 | def __getitem__(self, item): 91 | x = self.arr[item, :] 92 | y = torch.zeros(x.shape) 93 | y[:-1], y[-1] = x[1:], x[0] 94 | return x, y 95 | 96 | def __len__(self): 97 | return self.num_seq 98 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/main.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | from copy import deepcopy 7 | 8 | import numpy as np 9 | import torch 10 | from mxtorch import meter 11 | from mxtorch.trainer import Trainer, ScheduledOptim 12 | from torch import nn 13 | from torch.autograd import Variable 14 | from torch.utils.data import DataLoader 15 | from tqdm import tqdm 16 | 17 | import models 18 | from config import opt 19 | from data import TextDataset, TextConverter 20 | 21 | 22 | def get_data(convert): 23 | dataset = TextDataset(opt.txt, opt.len, convert.text_to_arr) 24 | return DataLoader(dataset, opt.batch_size, shuffle=True, num_workers=opt.num_workers) 25 | 26 | 27 | def get_model(convert): 28 | model = getattr(models, opt.model)(convert.vocab_size, 29 | opt.embed_dim, 30 | opt.hidden_size, 31 | opt.num_layers, 32 | opt.dropout) 33 | if opt.use_gpu: 34 | model = model.cuda() 35 | return model 36 | 37 | 38 | def get_loss(score, label): 39 | return nn.CrossEntropyLoss()(score, label.view(-1)) 40 | 41 | 42 | def get_optimizer(model): 43 | optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) 44 | return ScheduledOptim(optimizer) 45 | 46 | 47 | def pick_top_n(preds, top_n=5): 48 | top_pred_prob, top_pred_label = torch.topk(preds, top_n, 1) 49 | top_pred_prob /= torch.sum(top_pred_prob) 50 | top_pred_prob = top_pred_prob.squeeze(0).cpu().numpy() 51 | top_pred_label = top_pred_label.squeeze(0).cpu().numpy() 52 | c = np.random.choice(top_pred_label, size=1, p=top_pred_prob) 53 | return c 54 | 55 | 56 | class CharRNNTrainer(Trainer): 57 | def __init__(self, convert): 58 | self.convert = convert 59 | 60 | model = get_model(convert) 61 | criterion = get_loss 62 | optimizer = get_optimizer(model) 63 | super().__init__(model, criterion, optimizer) 64 | self.config += ('text: ' + opt.txt + '\n' + 'train text length: ' + str(opt.len) + '\n') 65 | self.config += ('predict text length: ' + str(opt.predict_len) + '\n') 66 | 67 | self.metric_meter['loss'] = meter.AverageValueMeter() 68 | 69 | def train(self, kwargs): 70 | self.reset_meter() 71 | self.model.train() 72 | train_data = kwargs['train_data'] 73 | for data in tqdm(train_data): 74 | x, y = data 75 | y = y.long() 76 | if opt.use_gpu: 77 | x = x.cuda() 78 | y = y.cuda() 79 | x, y = Variable(x), Variable(y) 80 | 81 | # Forward. 82 | score, _ = self.model(x) 83 | loss = self.criterion(score, y) 84 | 85 | # Backward. 86 | self.optimizer.zero_grad() 87 | loss.backward() 88 | # Clip gradient. 89 | nn.utils.clip_grad_norm(self.model.parameters(), 5) 90 | self.optimizer.step() 91 | 92 | self.metric_meter['loss'].add(loss.data[0]) 93 | 94 | # Update to tensorboard. 95 | if (self.n_iter + 1) % opt.plot_freq == 0: 96 | self.writer.add_scalar('perplexity', np.exp(self.metric_meter['loss'].value()[0]), self.n_plot) 97 | self.n_plot += 1 98 | 99 | self.n_iter += 1 100 | 101 | # Log the train metrics to dict. 102 | self.metric_log['perplexity'] = np.exp(self.metric_meter['loss'].value()[0]) 103 | 104 | def test(self, kwargs): 105 | """Set beginning words and predicted length, using model to generate texts. 106 | 107 | Returns: 108 | predicted generating text 109 | """ 110 | self.model.eval() 111 | begin = np.array([i for i in kwargs['begin']]) 112 | begin = np.random.choice(begin, size=1) 113 | text_len = kwargs['predict_len'] 114 | samples = [self.convert.word_to_int(c) for c in begin] 115 | input_txt = torch.LongTensor(samples)[None] 116 | if opt.use_gpu: 117 | input_txt = input_txt.cuda() 118 | input_txt = Variable(input_txt) 119 | _, init_state = self.model(input_txt) 120 | result = samples 121 | model_input = input_txt[:, -1][:, None] 122 | for i in range(text_len): 123 | out, init_state = self.model(model_input, init_state) 124 | pred = pick_top_n(out.data) 125 | model_input = Variable(torch.LongTensor(pred))[None] 126 | if opt.use_gpu: 127 | model_input = model_input.cuda() 128 | result.append(pred[0]) 129 | 130 | # Update generating txt to tensorboard. 131 | self.writer.add_text('text', self.convert.arr_to_text(result), self.n_plot) 132 | self.n_plot += 1 133 | print(self.convert.arr_to_text(result)) 134 | 135 | def predict(self, begin, predict_len): 136 | self.model.eval() 137 | samples = [self.convert.word_to_int(c) for c in begin] 138 | input_txt = torch.LongTensor(samples)[None] 139 | if opt.use_gpu: 140 | input_txt = input_txt.cuda() 141 | input_txt = Variable(input_txt) 142 | _, init_state = self.model(input_txt) 143 | result = samples 144 | model_input = input_txt[:, -1][:, None] 145 | for i in range(predict_len): 146 | out, init_state = self.model(model_input, init_state) 147 | pred = pick_top_n(out.data) 148 | model_input = Variable(torch.LongTensor(pred))[None] 149 | if opt.use_gpu: 150 | model_input = model_input.cuda() 151 | result.append(pred[0]) 152 | text = self.convert.arr_to_text(result) 153 | print('Generate text is: {}'.format(text)) 154 | with open(opt.write_file, 'a') as f: 155 | f.write(text) 156 | 157 | def load_state_dict(self, checkpoints): 158 | self.model.load_state_dict(torch.load(checkpoints)) 159 | 160 | def get_best_model(self): 161 | if self.metric_log['perplexity'] < self.best_metric: 162 | self.best_model = deepcopy(self.model.state_dict()) 163 | self.best_metric = self.metric_log['perplexity'] 164 | 165 | 166 | def train(**kwargs): 167 | opt._parse(kwargs) 168 | torch.cuda.set_device(opt.ctx) 169 | convert = TextConverter(opt.txt, max_vocab=opt.max_vocab) 170 | train_data = get_data(convert) 171 | char_rnn_trainer = CharRNNTrainer(convert) 172 | char_rnn_trainer.fit(train_data=train_data, 173 | epochs=opt.max_epoch, 174 | begin=opt.begin, 175 | predict_len=opt.predict_len) 176 | 177 | 178 | def predict(**kwargs): 179 | opt._parse(kwargs) 180 | torch.cuda.set_device(opt.ctx) 181 | convert = TextConverter(opt.txt, max_vocab=opt.max_vocab) 182 | char_rnn_trainer = CharRNNTrainer(convert) 183 | char_rnn_trainer.load_state_dict(opt.load_model) 184 | char_rnn_trainer.predict(opt.begin, opt.predict_len) 185 | 186 | 187 | if __name__ == '__main__': 188 | import fire 189 | 190 | fire.Fire() 191 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/models/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | from .char_rnn import CharRNN 7 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/char_rnn/models/char_rnn.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import torch 7 | from torch import nn 8 | from torch.autograd import Variable 9 | 10 | from config import opt 11 | 12 | 13 | class CharRNN(nn.Module): 14 | def __init__(self, num_classes, embed_dim, hidden_size, num_layers, 15 | dropout): 16 | super().__init__() 17 | self.num_layers = num_layers 18 | self.hidden_size = hidden_size 19 | 20 | self.word_to_vec = nn.Embedding(num_classes, embed_dim) 21 | self.rnn = nn.GRU(embed_dim, hidden_size, num_layers, dropout) 22 | self.project = nn.Linear(hidden_size, num_classes) 23 | 24 | def forward(self, x, hs=None): 25 | batch = x.shape[0] 26 | if hs is None: 27 | hs = Variable( 28 | torch.zeros(self.num_layers, batch, self.hidden_size)) 29 | if opt.use_gpu: 30 | hs = hs.cuda() 31 | word_embed = self.word_to_vec(x) # (batch, len, embed) 32 | word_embed = word_embed.permute(1, 0, 2) # (len, batch, embed) 33 | out, h0 = self.rnn(word_embed, hs) # (len, batch, hidden) 34 | le, mb, hd = out.shape 35 | out = out.view(le * mb, hd) 36 | out = self.project(out) 37 | out = out.view(le, mb, -1) 38 | out = out.permute(1, 0, 2).contiguous() # (batch, len, hidden) 39 | return out.view(-1, out.shape[2]), h0 40 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/seq2seq-translation/README.md: -------------------------------------------------------------------------------- 1 | # seq2seq-translation 2 | PyTorch implement of neural machine translation 3 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/seq2seq-translation/dataset.py: -------------------------------------------------------------------------------- 1 | import random 2 | import re 3 | import string 4 | import unicodedata 5 | 6 | import torch 7 | from torch.utils.data import Dataset 8 | 9 | SOS_token = 0 10 | EOS_token = 1 11 | MAX_LENGTH = 10 12 | 13 | 14 | class Lang(object): 15 | def __init__(self, name): 16 | self.name = name 17 | self.word2index = {} 18 | self.word2count = {} 19 | self.index2word = {0: "SOS", 1: "EOS"} 20 | self.n_words = 2 # Count SOS and EOS 21 | 22 | def addSentence(self, sentence): 23 | for word in sentence.split(' '): 24 | self.addWord(word) 25 | 26 | def addWord(self, word): 27 | if word not in self.word2index: 28 | self.word2index[word] = self.n_words 29 | self.word2count[word] = 1 30 | self.index2word[self.n_words] = word 31 | self.n_words += 1 32 | else: 33 | self.word2count[word] += 1 34 | 35 | 36 | def unicodeToAscii(s): 37 | return ''.join( 38 | c for c in unicodedata.normalize('NFD', s) 39 | if unicodedata.category(c) != 'Mn') 40 | 41 | 42 | # Lowercase, trim, and remove non-letter characters 43 | 44 | 45 | def normalizeString(s): 46 | s = unicodeToAscii(s.lower().strip()) 47 | s = re.sub(r"([.!?])", r" \1", s) 48 | s = re.sub(r"[^a-zA-Z.!?]+", r" ", s) 49 | return s 50 | 51 | 52 | def readLangs(lang1, lang2, reverse=False): 53 | print("Reading lines...") 54 | 55 | # Read the file and split into lines 56 | lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\ 57 | read().strip().split('\n') 58 | 59 | # Split every line into pairs and normalize 60 | pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines] 61 | 62 | # Reverse pairs, make Lang instances 63 | if reverse: 64 | pairs = [list(reversed(p)) for p in pairs] 65 | input_lang = Lang(lang2) 66 | output_lang = Lang(lang1) 67 | else: 68 | input_lang = Lang(lang1) 69 | output_lang = Lang(lang2) 70 | 71 | return input_lang, output_lang, pairs 72 | 73 | 74 | eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", 75 | "you are", "you re ", "we are", "we re ", "they are", 76 | "they re ") 77 | 78 | 79 | def filterPair(p): 80 | return len(p[0].split(' ')) < MAX_LENGTH and \ 81 | len(p[1].split(' ')) < MAX_LENGTH and \ 82 | p[1].startswith(eng_prefixes) 83 | 84 | 85 | def filterPairs(pairs): 86 | return [pair for pair in pairs if filterPair(pair)] 87 | 88 | 89 | def prepareData(lang1, lang2, reverse=False): 90 | input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse) 91 | print("Read %s sentence pairs" % len(pairs)) 92 | pairs = filterPairs(pairs) 93 | print("Trimmed to %s sentence pairs" % len(pairs)) 94 | print("Counting words...") 95 | for pair in pairs: 96 | input_lang.addSentence(pair[0]) 97 | output_lang.addSentence(pair[1]) 98 | print("Counted words:") 99 | print(input_lang.name, input_lang.n_words) 100 | print(output_lang.name, output_lang.n_words) 101 | print(random.choice(pairs)) 102 | return input_lang, output_lang, pairs 103 | 104 | 105 | def indexesFromSentence(lang, sentence): 106 | return [lang.word2index[word] for word in sentence.split(' ')] 107 | 108 | 109 | def tensorFromSentence(lang, sentence): 110 | indexes = indexesFromSentence(lang, sentence) 111 | indexes.append(EOS_token) 112 | result = torch.LongTensor(indexes) 113 | return result 114 | 115 | 116 | def tensorFromPair(input_lang, output_lang, pair): 117 | input_tensor = tensorFromSentence(input_lang, pair[0]) 118 | target_tensor = tensorFromSentence(output_lang, pair[1]) 119 | return input_tensor, target_tensor 120 | 121 | 122 | class TextDataset(Dataset): 123 | def __init__(self, dataload=prepareData, lang=['eng', 'fra']): 124 | self.input_lang, self.output_lang, self.pairs = dataload( 125 | lang[0], lang[1], reverse=True) 126 | self.input_lang_words = self.input_lang.n_words 127 | self.output_lang_words = self.output_lang.n_words 128 | 129 | def __getitem__(self, index): 130 | return tensorFromPair(self.input_lang, self.output_lang, 131 | self.pairs[index]) 132 | 133 | def __len__(self): 134 | return len(self.pairs) 135 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/seq2seq-translation/evaluate.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | from dataset import TextDataset 7 | from model.seq2seq import AttnDecoderRNN, DecoderRNN, EncoderRNN 8 | import matplotlib.pyplot as plt 9 | SOS_token = 0 10 | EOS_token = 1 11 | MAX_LENGTH = 10 12 | use_attn = True 13 | use_cuda = torch.cuda.is_available() 14 | lang_dataset = TextDataset() 15 | print('*' * 10) 16 | 17 | 18 | def evaluate(encoder, decoder, in_lang, max_length=MAX_LENGTH): 19 | if use_cuda: 20 | in_lang = in_lang.cuda() 21 | input_variable = Variable(in_lang) 22 | input_variable = input_variable.unsqueeze(0) 23 | input_length = input_variable.size(1) 24 | encoder_hidden = encoder.initHidden() 25 | 26 | encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)) 27 | encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs 28 | 29 | for ei in range(input_length): 30 | encoder_output, encoder_hidden = encoder(input_variable[:, ei], 31 | encoder_hidden) 32 | encoder_outputs[ei] = encoder_output[0][0] 33 | 34 | decoder_input = Variable(torch.LongTensor([[SOS_token]])) # SOS 35 | decoder_input = decoder_input.cuda() if use_cuda else decoder_input 36 | 37 | decoder_hidden = encoder_hidden 38 | 39 | decoded_words = [] 40 | decoder_attentions = torch.zeros(max_length, max_length) 41 | 42 | if use_attn: 43 | for di in range(max_length): 44 | decoder_output, decoder_hidden, decoder_attention = decoder( 45 | decoder_input, decoder_hidden, encoder_outputs) 46 | decoder_attentions[di] = decoder_attention.data 47 | topv, topi = decoder_output.data.topk(1) 48 | ni = topi[0][0] 49 | if ni == EOS_token: 50 | decoded_words.append('') 51 | break 52 | else: 53 | decoded_words.append(lang_dataset.output_lang.index2word[ni]) 54 | 55 | decoder_input = Variable(torch.LongTensor([[ni]])) 56 | decoder_input = decoder_input.cuda() if use_cuda else decoder_input 57 | else: 58 | for di in range(max_length): 59 | decoder_output, decoder_hidden = decoder(decoder_input, 60 | decoder_hidden) 61 | topv, topi = decoder_output.data.topk(1) 62 | ni = topi[0][0] 63 | if ni == EOS_token: 64 | decoded_words.append('') 65 | break 66 | else: 67 | decoded_words.append(lang_dataset.output_lang.index2word[ni]) 68 | 69 | decoder_input = Variable(torch.LongTensor([[ni]])) 70 | decoder_input = decoder_input.cuda() if use_cuda else decoder_input 71 | if use_attn: 72 | return decoded_words, decoder_attentions[:di + 1] 73 | else: 74 | return decoded_words 75 | 76 | 77 | def evaluateRandomly(encoder, decoder, n=10): 78 | for i in range(n): 79 | pair_idx = random.choice(list(range(len(lang_dataset)))) 80 | pair = lang_dataset.pairs[pair_idx] 81 | in_lang, out_lang = lang_dataset[pair_idx] 82 | print('>', pair[0]) 83 | print('=', pair[1]) 84 | if use_attn: 85 | output_words, attentions = evaluate(encoder, decoder, in_lang) 86 | else: 87 | output_words = evaluate(encoder, decoder, in_lang) 88 | output_sentence = ' '.join(output_words) 89 | print('<', output_sentence) 90 | print('') 91 | 92 | 93 | input_size = lang_dataset.input_lang_words 94 | hidden_size = 256 95 | output_size = lang_dataset.output_lang_words 96 | 97 | encoder = EncoderRNN(input_size, hidden_size) 98 | encoder.load_state_dict(torch.load('./encoder.pth')) 99 | if use_attn: 100 | decoder = AttnDecoderRNN(hidden_size, output_size, n_layers=2) 101 | decoder.load_state_dict(torch.load('./attn_decoder.pth')) 102 | else: 103 | decoder = DecoderRNN(hidden_size, output_size, n_layers=2) 104 | decoder.load_state_dict(torch.load('./decoder.pth')) 105 | 106 | if use_cuda: 107 | encoder = encoder.cuda() 108 | decoder = decoder.cuda() 109 | 110 | evaluateRandomly(encoder, decoder) 111 | 112 | if use_attn: 113 | pair_idx = random.choice(list(range(len(lang_dataset)))) 114 | pairs = lang_dataset.pairs[pair_idx] 115 | print('>') 116 | print(pairs[0]) 117 | in_lang, out_lang = lang_dataset[pair_idx] 118 | output_words, attentions = evaluate(encoder, decoder, in_lang) 119 | plt.matshow(attentions.cpu().numpy()) 120 | plt.show() -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/seq2seq-translation/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter10_Natural-Language-Process/seq2seq-translation/model/__init__.py -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/seq2seq-translation/model/seq2seq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from torch.autograd import Variable 5 | 6 | MAX_LENGTH = 10 7 | use_cuda = torch.cuda.is_available() 8 | 9 | 10 | class EncoderRNN(nn.Module): 11 | def __init__(self, input_size, hidden_size, n_layers=1): 12 | super(EncoderRNN, self).__init__() 13 | self.n_layers = n_layers 14 | self.hidden_size = hidden_size 15 | 16 | self.embedding = nn.Embedding(input_size, hidden_size) 17 | self.gru = nn.GRU(hidden_size, hidden_size) 18 | 19 | def forward(self, input, hidden): 20 | input = input.unsqueeze(1) 21 | embedded = self.embedding(input) # batch, hidden 22 | output = embedded.permute(1, 0, 2) 23 | for i in range(self.n_layers): 24 | output, hidden = self.gru(output, hidden) 25 | return output, hidden 26 | 27 | def initHidden(self): 28 | result = Variable(torch.zeros(1, 1, self.hidden_size)) 29 | if use_cuda: 30 | return result.cuda() 31 | else: 32 | return result 33 | 34 | 35 | class DecoderRNN(nn.Module): 36 | def __init__(self, hidden_size, output_size, n_layers=1): 37 | super(DecoderRNN, self).__init__() 38 | self.n_layers = n_layers 39 | self.hidden_size = hidden_size 40 | 41 | self.embedding = nn.Embedding(output_size, hidden_size) 42 | self.gru = nn.GRU(hidden_size, hidden_size) 43 | self.out = nn.Linear(hidden_size, output_size) 44 | self.softmax = nn.LogSoftmax() 45 | 46 | def forward(self, input, hidden): 47 | output = self.embedding(input) # batch, 1, hidden 48 | output = output.permute(1, 0, 2) # 1, batch, hidden 49 | for i in range(self.n_layers): 50 | output = F.relu(output) 51 | output, hidden = self.gru(output, hidden) 52 | output = self.softmax(self.out(output[0])) 53 | return output, hidden 54 | 55 | def initHidden(self): 56 | result = Variable(torch.zeros(1, 1, self.hidden_size)) 57 | if use_cuda: 58 | return result.cuda() 59 | else: 60 | return result 61 | 62 | 63 | class AttnDecoderRNN(nn.Module): 64 | def __init__(self, 65 | hidden_size, 66 | output_size, 67 | n_layers=1, 68 | dropout_p=0.1, 69 | max_length=MAX_LENGTH): 70 | super(AttnDecoderRNN, self).__init__() 71 | self.hidden_size = hidden_size 72 | self.output_size = output_size 73 | self.n_layers = n_layers 74 | self.dropout_p = dropout_p 75 | self.max_length = max_length 76 | 77 | self.embedding = nn.Embedding(self.output_size, self.hidden_size) 78 | self.attn = nn.Linear(self.hidden_size * 2, self.max_length) 79 | self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size) 80 | self.dropout = nn.Dropout(self.dropout_p) 81 | self.gru = nn.GRU(self.hidden_size, self.hidden_size) 82 | self.out = nn.Linear(self.hidden_size, self.output_size) 83 | 84 | def forward(self, input, hidden, encoder_outputs): 85 | ''' 86 | input: batch, 1 87 | hidden: 1, batch, hidden 88 | encoder_outputs: length, hidden 89 | ''' 90 | embedded = self.embedding(input) # batch, 1, hidden 91 | embedded = self.dropout(embedded) 92 | embedded = embedded.squeeze(1) # batch, hidden 93 | 94 | attn_weights = F.softmax( 95 | self.attn(torch.cat((embedded, hidden[0]), 1))) 96 | # batch, max_length 97 | encoder_outputs = encoder_outputs.unsqueeze(0) 98 | # batch, max_length, hidden 99 | attn_applied = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs) 100 | # batch, 1, hidden 101 | output = torch.cat((embedded, attn_applied.squeeze(1)), 1) 102 | # batch, 2xhidden 103 | output = self.attn_combine(output).unsqueeze(0) 104 | #1, batch, hidden 105 | 106 | for i in range(self.n_layers): 107 | output = F.relu(output) 108 | output, hidden = self.gru(output, hidden) 109 | 110 | output = F.log_softmax(self.out(output.squeeze(0))) 111 | return output, hidden, attn_weights 112 | 113 | def initHidden(self): 114 | result = Variable(torch.zeros(1, 1, self.hidden_size)) 115 | if use_cuda: 116 | return result.cuda() 117 | else: 118 | return result 119 | -------------------------------------------------------------------------------- /chapter10_Natural-Language-Process/seq2seq-translation/train.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import torch 6 | from torch import nn, optim 7 | from torch.autograd import Variable 8 | from torch.utils.data import DataLoader 9 | 10 | from dataset import TextDataset 11 | from model.seq2seq import AttnDecoderRNN, DecoderRNN, EncoderRNN 12 | 13 | SOS_token = 0 14 | EOS_token = 1 15 | MAX_LENGTH = 10 16 | lang_dataset = TextDataset() 17 | lang_dataloader = DataLoader(lang_dataset, shuffle=True) 18 | print() 19 | 20 | input_size = lang_dataset.input_lang_words 21 | hidden_size = 256 22 | output_size = lang_dataset.output_lang_words 23 | total_epoch = 20 24 | 25 | encoder = EncoderRNN(input_size, hidden_size) 26 | decoder = DecoderRNN(hidden_size, output_size, n_layers=2) 27 | attn_decoder = AttnDecoderRNN(hidden_size, output_size, n_layers=2) 28 | use_attn = True 29 | 30 | if torch.cuda.is_available(): 31 | encoder = encoder.cuda() 32 | decoder = decoder.cuda() 33 | attn_decoder = attn_decoder.cuda() 34 | 35 | 36 | def showPlot(points): 37 | plt.figure() 38 | x = np.arange(len(points)) 39 | plt.plot(x, points) 40 | plt.show() 41 | 42 | 43 | def train(encoder, decoder, total_epoch, use_attn): 44 | 45 | param = list(encoder.parameters()) + list(decoder.parameters()) 46 | optimizer = optim.Adam(param, lr=1e-3) 47 | criterion = nn.NLLLoss() 48 | plot_losses = [] 49 | for epoch in range(total_epoch): 50 | since = time.time() 51 | running_loss = 0 52 | print_loss_total = 0 53 | total_loss = 0 54 | for i, data in enumerate(lang_dataloader): 55 | in_lang, out_lang = data 56 | if torch.cuda.is_available(): 57 | in_lang = in_lang.cuda() 58 | out_lang = out_lang.cuda() 59 | in_lang = Variable(in_lang) # batch=1, length 60 | out_lang = Variable(out_lang) 61 | 62 | encoder_outputs = Variable( 63 | torch.zeros(MAX_LENGTH, encoder.hidden_size)) 64 | if torch.cuda.is_available(): 65 | encoder_outputs = encoder_outputs.cuda() 66 | encoder_hidden = encoder.initHidden() 67 | for ei in range(in_lang.size(1)): 68 | encoder_output, encoder_hidden = encoder( 69 | in_lang[:, ei], encoder_hidden) 70 | encoder_outputs[ei] = encoder_output[0][0] 71 | 72 | decoder_input = Variable(torch.LongTensor([[SOS_token]])) 73 | if torch.cuda.is_available(): 74 | decoder_input = decoder_input.cuda() 75 | decoder_hidden = encoder_hidden 76 | loss = 0 77 | if use_attn: 78 | for di in range(out_lang.size(1)): 79 | decoder_output, decoder_hidden, decoder_attention = attn_decoder( 80 | decoder_input, decoder_hidden, encoder_outputs) 81 | loss += criterion(decoder_output, out_lang[:, di]) 82 | topv, topi = decoder_output.data.topk(1) 83 | ni = topi[0][0] 84 | 85 | decoder_input = Variable(torch.LongTensor([[ni]])) 86 | if torch.cuda.is_available(): 87 | decoder_input = decoder_input.cuda() 88 | if ni == EOS_token: 89 | break 90 | else: 91 | for di in range(out_lang.size(1)): 92 | decoder_output, decoder_hidden = decoder( 93 | decoder_input, decoder_hidden) 94 | loss += criterion(decoder_output, out_lang[:, di]) 95 | topv, topi = decoder_output.data.topk(1) 96 | ni = topi[0][0] 97 | 98 | decoder_input = Variable(torch.LongTensor([[ni]])) 99 | if torch.cuda.is_available(): 100 | decoder_input = decoder_input.cuda() 101 | if ni == EOS_token: 102 | break 103 | optimizer.zero_grad() 104 | loss.backward() 105 | optimizer.step() 106 | running_loss += loss.data[0] 107 | print_loss_total += loss.data[0] 108 | total_loss += loss.data[0] 109 | if (i + 1) % 5000 == 0: 110 | print('{}/{}, Loss:{:.6f}'.format( 111 | i + 1, len(lang_dataloader), running_loss / 5000)) 112 | running_loss = 0 113 | if (i + 1) % 100 == 0: 114 | plot_loss = print_loss_total / 100 115 | plot_losses.append(plot_loss) 116 | print_loss_total = 0 117 | during = time.time() - since 118 | print('Finish {}/{} , Loss:{:.6f}, Time:{:.0f}s'.format( 119 | epoch + 1, total_epoch, total_loss / len(lang_dataset), during)) 120 | print() 121 | showPlot(plot_losses) 122 | 123 | 124 | if use_attn: 125 | train(encoder, attn_decoder, total_epoch, use_attn=True) 126 | else: 127 | train(encoder, decoder, total_epoch, use_attn=False) 128 | 129 | print('finish training!') 130 | if use_attn: 131 | torch.save(encoder.state_dict(), './encoder.pth') 132 | torch.save(attn_decoder.state_dict(), './attn_decoder.pth') 133 | else: 134 | torch.save(encoder.state_dict(), './encoder.pth') 135 | torch.save(decoder.state_dict(), './decoder.pth') 136 | -------------------------------------------------------------------------------- /chapter2_PyTorch-Basics/PyTorch-introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fmebdrkuawj30b3032a9w.jpg)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# PyTorch 介绍" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "PyTorch 是由 Torch7 团队开源的,这也是Facebook 的 AI 研究团队发布了一个 Python 工具包,据该项目官网介绍,是一个 Python 优先的深度学习框架,能够在强大的 GPU 加速基础上实现张量和动态神经网络。\n", 22 | "\n", 23 | "- [官网](http://pytorch.org/)\n", 24 | "- [Github](https://github.com/pytorch/pytorch)\n", 25 | "\n", 26 | "目前除了 Facebook 之外,也有大量的机构正在使用 PyTorch\n", 27 | "\n", 28 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fmebl3ayfij30kk0c2aac.jpg)" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "PyTorch 的前身是 Torch,其是一个十分老牌、对多维矩阵数据进行操作的张量(tensor )库,在机器学习和其他数学密集型应用有广泛应用,但由于其语言采用 Lua,导致在国内一直很小众,如今使用 Python 语言强势归来,快速的赢得了大量使用者。\n", 36 | "\n", 37 | "PyTorch 提供了两种高层面的功能:\n", 38 | "- 使用强大的 GPU 加速的 Tensor 计算(类似 numpy)\n", 39 | "- 构建于基于 autograd 系统的深度神经网络\n", 40 | "\n", 41 | "所以使用 PyTorch 的原因通常有两个:\n", 42 | "- 作为 numpy 的替代,以便使用强大的 GPU 加速;\n", 43 | "- 将其作为一个能提供最大灵活性和速度的深度学习研究平台" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "PyTorch 作为一个 Python 优先的动态图框架,有下面几个特点" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Python 优先\n", 58 | "PyTorch 不是简单地在整体 C++ 框架上绑定 Python,他深入构建在 Python 之上,你可以像使用 numpy/scipy/scikit-learn 那样轻松地使用 PyTorch,也可以用你喜欢的库和包在 PyTorch 中编写新的神经网络层,尽量让你不用重新发明轮子。\n", 59 | "\n", 60 | "### 命令式体验\n", 61 | "PyTorch 的设计思路是线性、直观且易于使用。当你需要执行一行代码时,它会忠实执行。PyTorch 没有异步的世界观。当你打开调试器,或接收到错误代码和 stack trace 时,你会发现理解这些信息是非常轻松的。Stack-trace 点将会直接指向代码定义的确切位置。我们不希望你在 debug 时会因为错误的指向或异步和不透明的引擎而浪费时间。\n", 62 | "\n", 63 | "### 快速精益\n", 64 | "PyTorch 具有轻巧的框架,集成了各种加速库,如 Intel MKL、英伟达的 CuDNN 和 NCCL 来优化速度。在其核心,它的 CPU 和 GPU Tensor 与神经网络后端(TH、THC、THNN、THCUNN)被编写成了独立的库,带有 C99 API。" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "## 安装\n", 72 | "PyTorch 的安装非常方便,可以使用 Anaconda 进行安装,也可以使用 pip 进行安装,比如\n", 73 | "\n", 74 | "使用 conda 进行安装 \n", 75 | "`conda install pytorch torchvision -c pytorch`\n", 76 | "\n", 77 | "或者使用 pip \n", 78 | "`pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl \n", 79 | "pip install torchvision`\n", 80 | "\n", 81 | "目前只支持 Mac OSX 和 Linux 系统,Windows 系统在不久之后也会支持,更多详细信息可以访问[官网](http://pytorch.org/)" 82 | ] 83 | } 84 | ], 85 | "metadata": { 86 | "kernelspec": { 87 | "display_name": "mx", 88 | "language": "python", 89 | "name": "mx" 90 | }, 91 | "language_info": { 92 | "codemirror_mode": { 93 | "name": "ipython", 94 | "version": 3 95 | }, 96 | "file_extension": ".py", 97 | "mimetype": "text/x-python", 98 | "name": "python", 99 | "nbconvert_exporter": "python", 100 | "pygments_lexer": "ipython3", 101 | "version": "3.6.0" 102 | } 103 | }, 104 | "nbformat": 4, 105 | "nbformat_minor": 2 106 | } 107 | -------------------------------------------------------------------------------- /chapter2_PyTorch-Basics/dynamic-graph.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 动态图和静态图\n", 8 | "目前神经网络框架分为静态图框架和动态图框架,PyTorch 和 TensorFlow、Caffe 等框架最大的区别就是他们拥有不同的计算图表现形式。 TensorFlow 使用静态图,这意味着我们先定义计算图,然后不断使用它,而在 PyTorch 中,每次都会重新构建一个新的计算图。通过这次课程,我们会了解静态图和动态图之间的优缺点。\n", 9 | "\n", 10 | "对于使用者来说,两种形式的计算图有着非常大的区别,同时静态图和动态图都有他们各自的优点,比如动态图比较方便debug,使用者能够用任何他们喜欢的方式进行debug,同时非常直观,而静态图是通过先定义后运行的方式,之后再次运行的时候就不再需要重新构建计算图,所以速度会比动态图更快。" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "![](https://ws3.sinaimg.cn/large/006tNc79ly1fmai482qumg30rs0fmq6e.gif)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "下面我们比较 while 循环语句在 TensorFlow 和 PyTorch 中的定义" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## TensorFlow" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 1, 37 | "metadata": { 38 | "collapsed": true 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "# tensorflow\n", 43 | "import tensorflow as tf\n", 44 | "\n", 45 | "first_counter = tf.constant(0)\n", 46 | "second_counter = tf.constant(10)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "def cond(first_counter, second_counter, *args):\n", 58 | " return first_counter < second_counter\n", 59 | "\n", 60 | "def body(first_counter, second_counter):\n", 61 | " first_counter = tf.add(first_counter, 2)\n", 62 | " second_counter = tf.add(second_counter, 1)\n", 63 | " return first_counter, second_counter" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "c1, c2 = tf.while_loop(cond, body, [first_counter, second_counter])" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "with tf.Session() as sess:\n", 86 | " counter_1_res, counter_2_res = sess.run([c1, c2])" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "20\n", 101 | "20\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "print(counter_1_res)\n", 107 | "print(counter_2_res)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "可以看到 TensorFlow 需要将整个图构建成静态的,换句话说,每次运行的时候图都是一样的,是不能够改变的,所以不能直接使用 Python 的 while 循环语句,需要使用辅助函数 `tf.while_loop` 写成 TensorFlow 内部的形式\n", 115 | "\n", 116 | "这是非常反直觉的,学习成本也是比较高的\n", 117 | "\n", 118 | "下面我们来看看 PyTorch 的动态图机制,这使得我们能够使用 Python 的 while 写循环,非常方便" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## PyTorch" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 6, 131 | "metadata": { 132 | "collapsed": true 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "# pytorch\n", 137 | "import torch\n", 138 | "first_counter = torch.Tensor([0])\n", 139 | "second_counter = torch.Tensor([10])" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 11, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "while (first_counter < second_counter)[0]:\n", 151 | " first_counter += 2\n", 152 | " second_counter += 1" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 12, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "\n", 167 | " 20\n", 168 | "[torch.FloatTensor of size 1]\n", 169 | "\n", 170 | "\n", 171 | " 20\n", 172 | "[torch.FloatTensor of size 1]\n", 173 | "\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "print(first_counter)\n", 179 | "print(second_counter)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "可以看到 PyTorch 的写法跟 Python 的写法是完全一致的,没有任何额外的学习成本\n", 187 | "\n", 188 | "上面的例子展示如何使用静态图和动态图构建 while 循环,看起来动态图的方式更加简单且直观,你觉得呢?" 189 | ] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "mx", 195 | "language": "python", 196 | "name": "mx" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.6.0" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 2 213 | } 214 | -------------------------------------------------------------------------------- /chapter3_NN/bp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 反向传播算法\n", 8 | "\n", 9 | "前面我们介绍了三个模型,整个处理的基本流程都是定义模型,读入数据,给出损失函数$f$,通过梯度下降法更新参数。PyTorch 提供了非常简单的自动求导帮助我们求解导数,对于比较简单的模型,我们也能手动求出参数的梯度,但是对于非常复杂的模型,比如一个 100 层的网络,我们如何能够有效地手动求出这个梯度呢?这里就需要引入反向传播算法,自动求导本质是就是一个反向传播算法。\n", 10 | "\n", 11 | "反向传播算法是一个有效地求解梯度的算法,本质上其实就是一个链式求导法则的应用,然而这个如此简单而且显而易见的方法却是在 Roseblatt 提出感知机算法后将近 30 年才被发明和普及的,对此 Bengio 这样说道:“很多看似显而易见的想法只有在事后才变得的显而易见。”\n", 12 | "\n", 13 | "下面我们就来详细将一讲什么是反向传播算法。" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "## 链式法则\n", 21 | "\n", 22 | "首先来简单地介绍一下链式法则,考虑一个简单的函数,比如\n", 23 | "$$f(x, y, z) = (x + y)z$$\n", 24 | "\n", 25 | "我们当然可以直接求出这个函数的微分,但是这里我们要使用链式法则,令\n", 26 | "$$q=x+y$$\n", 27 | "\n", 28 | "那么\n", 29 | "\n", 30 | "$$f = qz$$\n", 31 | "\n", 32 | "对于这两个式子,我们可以分别求出他们的微分 \n", 33 | "\n", 34 | "$$\\frac{\\partial f}{\\partial q} = z, \\frac{\\partial f}{\\partial z}=q$$\n", 35 | "\n", 36 | "同时$q$是$x$和$y$的求和,所以我们能够得到\n", 37 | "\n", 38 | "$$\\frac{\\partial q}{x} = 1, \\frac{\\partial q}{y} = 1$$\n", 39 | "\n", 40 | "我们关心的问题是\n", 41 | "\n", 42 | "$$\\frac{\\partial f}{\\partial x}, \\frac{\\partial f}{\\partial y}, \\frac{\\partial f}{\\partial z}$$\n", 43 | "\n", 44 | "链式法则告诉我们如何来计算出他们的值\n", 45 | "\n", 46 | "$$\n", 47 | "\\frac{\\partial f}{\\partial x} = \\frac{\\partial f}{\\partial q}\\frac{\\partial q}{\\partial x}\n", 48 | "$$\n", 49 | "$$\n", 50 | "\\frac{\\partial f}{\\partial y} = \\frac{\\partial f}{\\partial q}\\frac{\\partial q}{\\partial y}\n", 51 | "$$\n", 52 | "$$\n", 53 | "\\frac{\\partial f}{\\partial z} = q\n", 54 | "$$\n", 55 | "\n", 56 | "通过链式法则我们知道如果我们需要对其中的元素求导,那么我们可以一层一层求导然后将结果乘起来,这就是链式法则的核心,也是反向传播算法的核心,更多关于链式法则的算法,可以访问这个[文档](https://zh.wikipedia.org/wiki/%E9%93%BE%E5%BC%8F%E6%B3%95%E5%88%99)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## 反向传播算法\n", 64 | "\n", 65 | "了解了链式法则,我们就可以开始介绍反向传播算法了,本质上反向传播算法只是链式法则的一个应用。我们还是使用之前那个相同的例子$q=x+y, f=qz$,通过计算图可以将这个计算过程表达出来\n", 66 | "\n", 67 | "![](https://ws1.sinaimg.cn/large/006tNc79ly1fmiozcinyzj30c806vglk.jpg)\n", 68 | "\n", 69 | "上面绿色的数字表示其数值,下面红色的数字表示求出的梯度,我们可以一步一步看看反向传播算法的实现。首先从最后开始,梯度当然是1,然后计算\n", 70 | "\n", 71 | "$$\\frac{\\partial f}{\\partial q} = z = -4,\\ \\frac{\\partial f}{\\partial z} = q = 3$$\n", 72 | "\n", 73 | "接着我们计算\n", 74 | "$$\\frac{\\partial f}{\\partial x} = \\frac{\\partial f}{\\partial q} \\frac{\\partial q}{\\partial x} = -4 \\times 1 = -4,\\ \\frac{\\partial f}{\\partial y} = \\frac{\\partial f}{\\partial q} \\frac{\\partial q}{\\partial y} = -4 \\times 1 = -4$$\n", 75 | "\n", 76 | "这样一步一步我们就求出了$\\nabla f(x, y, z)$。\n", 77 | "\n", 78 | "直观上看反向传播算法是一个优雅的局部过程,每次求导只是对当前的运算求导,求解每层网络的参数都是通过链式法则将前面的结果求出不断迭代到这一层,所以说这是一个传播过程\n", 79 | "\n", 80 | "### Sigmoid函数举例\n", 81 | "\n", 82 | "下面我们通过Sigmoid函数来演示反向传播过程在一个复杂的函数上是如何进行的。\n", 83 | "\n", 84 | "$$\n", 85 | "f(w, x) = \\frac{1}{1+e^{-(w_0 x_0 + w_1 x_1 + w_2)}}\n", 86 | "$$\n", 87 | "\n", 88 | "我们需要求解出\n", 89 | "$$\\frac{\\partial f}{\\partial w_0}, \\frac{\\partial f}{\\partial w_1}, \\frac{\\partial f}{\\partial w_2}$$\n", 90 | "\n", 91 | "首先我们将这个函数抽象成一个计算图来表示,即\n", 92 | "$$\n", 93 | " f(x) = \\frac{1}{x} \\\\\n", 94 | " f_c(x) = 1 + x \\\\\n", 95 | " f_e(x) = e^x \\\\\n", 96 | " f_w(x) = -(w_0 x_0 + w_1 x_1 + w_2)\n", 97 | "$$\n", 98 | "\n", 99 | "这样我们就能够画出下面的计算图\n", 100 | "\n", 101 | "![](https://ws1.sinaimg.cn/large/006tNc79ly1fmip1va5qjj30lb08e0t0.jpg)\n", 102 | "\n", 103 | "同样上面绿色的数子表示数值,下面红色的数字表示梯度,我们从后往前计算一下各个参数的梯度。首先最后面的梯度是1,,然后经过$\\frac{1}{x}$这个函数,这个函数的梯度是$-\\frac{1}{x^2}$,所以往前传播的梯度是$1 \\times -\\frac{1}{1.37^2} = -0.53$,然后是$+1$这个操作,梯度不变,接着是$e^x$这个运算,它的梯度就是$-0.53 \\times e^{-1} = -0.2$,这样不断往后传播就能够求得每个参数的梯度。" 104 | ] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "mx", 110 | "language": "python", 111 | "name": "mx" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.6.0" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 2 128 | } 129 | -------------------------------------------------------------------------------- /chapter3_NN/logistic-regression/data.txt: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /chapter4_CNN/cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter4_CNN/cat.png -------------------------------------------------------------------------------- /chapter4_CNN/regularization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 正则化\n", 8 | "前面我们讲了数据增强和 dropout,而在实际使用中,现在的网络往往不使用 dropout,而是用另外一个技术,叫正则化。\n", 9 | "\n", 10 | "正则化是机器学习中提出来的一种方法,有 L1 和 L2 正则化,目前使用较多的是 L2 正则化,引入正则化相当于在 loss 函数上面加上一项,比如\n", 11 | "\n", 12 | "$$\n", 13 | "f = loss + \\lambda \\sum_{p \\in params} ||p||_2^2\n", 14 | "$$\n", 15 | "\n", 16 | "就是在 loss 的基础上加上了参数的二范数作为一个正则化,我们在训练网络的时候,不仅要最小化 loss 函数,同时还要最小化参数的二范数,也就是说我们会对参数做一些限制,不让它变得太大。" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "如果我们对新的损失函数 f 求导进行梯度下降,就有\n", 24 | "\n", 25 | "$$\n", 26 | "\\frac{\\partial f}{\\partial p_j} = \\frac{\\partial loss}{\\partial p_j} + 2 \\lambda p_j\n", 27 | "$$\n", 28 | "\n", 29 | "那么在更新参数的时候就有\n", 30 | "\n", 31 | "$$\n", 32 | "p_j \\rightarrow p_j - \\eta (\\frac{\\partial loss}{\\partial p_j} + 2 \\lambda p_j) = p_j - \\eta \\frac{\\partial loss}{\\partial p_j} - 2 \\eta \\lambda p_j \n", 33 | "$$\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "可以看到 $p_j - \\eta \\frac{\\partial loss}{\\partial p_j}$ 和没加正则项要更新的部分一样,而后面的 $2\\eta \\lambda p_j$ 就是正则项的影响,可以看到加完正则项之后会对参数做更大程度的更新,这也被称为权重衰减(weight decay),在 pytorch 中正则项就是通过这种方式来加入的,比如想在随机梯度下降法中使用正则项,或者说权重衰减,`torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-4)` 就可以了,这个 `weight_decay` 系数就是上面公式中的 $\\lambda$,非常方便\n", 41 | "\n", 42 | "注意正则项的系数的大小非常重要,如果太大,会极大的抑制参数的更新,导致欠拟合,如果太小,那么正则项这个部分基本没有贡献,所以选择一个合适的权重衰减系数非常重要,这个需要根据具体的情况去尝试,初步尝试可以使用 `1e-4` 或者 `1e-3` \n", 43 | "\n", 44 | "下面我们在训练 cifar 10 中添加正则项" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 1, 50 | "metadata": { 51 | "ExecuteTime": { 52 | "end_time": "2017-12-24T08:02:11.903459Z", 53 | "start_time": "2017-12-24T08:02:11.383170Z" 54 | }, 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "import sys\n", 60 | "sys.path.append('..')\n", 61 | "\n", 62 | "import numpy as np\n", 63 | "import torch\n", 64 | "from torch import nn\n", 65 | "import torch.nn.functional as F\n", 66 | "from torch.autograd import Variable\n", 67 | "from torchvision.datasets import CIFAR10\n", 68 | "from utils import train, resnet\n", 69 | "from torchvision import transforms as tfs" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "metadata": { 76 | "ExecuteTime": { 77 | "end_time": "2017-12-24T08:02:13.120502Z", 78 | "start_time": "2017-12-24T08:02:11.905617Z" 79 | }, 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "def data_tf(x):\n", 85 | " im_aug = tfs.Compose([\n", 86 | " tfs.Resize(96),\n", 87 | " tfs.ToTensor(),\n", 88 | " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", 89 | " ])\n", 90 | " x = im_aug(x)\n", 91 | " return x\n", 92 | "\n", 93 | "train_set = CIFAR10('./data', train=True, transform=data_tf)\n", 94 | "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=4)\n", 95 | "test_set = CIFAR10('./data', train=False, transform=data_tf)\n", 96 | "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False, num_workers=4)\n", 97 | "\n", 98 | "net = resnet(3, 10)\n", 99 | "optimizer = torch.optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-4) # 增加正则项\n", 100 | "criterion = nn.CrossEntropyLoss()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 3, 106 | "metadata": { 107 | "ExecuteTime": { 108 | "end_time": "2017-12-24T08:11:36.106177Z", 109 | "start_time": "2017-12-24T08:02:13.122785Z" 110 | } 111 | }, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "Epoch 0. Train Loss: 1.429834, Train Acc: 0.476982, Valid Loss: 1.261334, Valid Acc: 0.546776, Time 00:00:26\n", 118 | "Epoch 1. Train Loss: 0.994539, Train Acc: 0.645400, Valid Loss: 1.310620, Valid Acc: 0.554688, Time 00:00:27\n", 119 | "Epoch 2. Train Loss: 0.788570, Train Acc: 0.723585, Valid Loss: 1.256101, Valid Acc: 0.577433, Time 00:00:28\n", 120 | "Epoch 3. Train Loss: 0.629832, Train Acc: 0.780411, Valid Loss: 1.222015, Valid Acc: 0.609474, Time 00:00:27\n", 121 | "Epoch 4. Train Loss: 0.500406, Train Acc: 0.825288, Valid Loss: 0.831702, Valid Acc: 0.720332, Time 00:00:27\n", 122 | "Epoch 5. Train Loss: 0.388376, Train Acc: 0.868646, Valid Loss: 0.829582, Valid Acc: 0.726760, Time 00:00:27\n", 123 | "Epoch 6. Train Loss: 0.291237, Train Acc: 0.902094, Valid Loss: 1.499777, Valid Acc: 0.623714, Time 00:00:28\n", 124 | "Epoch 7. Train Loss: 0.222401, Train Acc: 0.925072, Valid Loss: 1.832660, Valid Acc: 0.558643, Time 00:00:28\n", 125 | "Epoch 8. Train Loss: 0.157753, Train Acc: 0.947990, Valid Loss: 1.255313, Valid Acc: 0.668117, Time 00:00:28\n", 126 | "Epoch 9. Train Loss: 0.111407, Train Acc: 0.963595, Valid Loss: 1.004693, Valid Acc: 0.724782, Time 00:00:27\n", 127 | "Epoch 10. Train Loss: 0.084960, Train Acc: 0.972926, Valid Loss: 0.867961, Valid Acc: 0.775119, Time 00:00:27\n", 128 | "Epoch 11. Train Loss: 0.066854, Train Acc: 0.979280, Valid Loss: 1.011263, Valid Acc: 0.749604, Time 00:00:28\n", 129 | "Epoch 12. Train Loss: 0.048280, Train Acc: 0.985534, Valid Loss: 2.438345, Valid Acc: 0.576938, Time 00:00:27\n", 130 | "Epoch 13. Train Loss: 0.046176, Train Acc: 0.985614, Valid Loss: 1.008425, Valid Acc: 0.756527, Time 00:00:27\n", 131 | "Epoch 14. Train Loss: 0.039515, Train Acc: 0.988411, Valid Loss: 0.945017, Valid Acc: 0.766317, Time 00:00:27\n", 132 | "Epoch 15. Train Loss: 0.025882, Train Acc: 0.992667, Valid Loss: 0.918691, Valid Acc: 0.784217, Time 00:00:27\n", 133 | "Epoch 16. Train Loss: 0.018592, Train Acc: 0.994985, Valid Loss: 1.507427, Valid Acc: 0.680281, Time 00:00:27\n", 134 | "Epoch 17. Train Loss: 0.021062, Train Acc: 0.994246, Valid Loss: 2.976452, Valid Acc: 0.558940, Time 00:00:27\n", 135 | "Epoch 18. Train Loss: 0.021458, Train Acc: 0.993926, Valid Loss: 0.927871, Valid Acc: 0.785898, Time 00:00:27\n", 136 | "Epoch 19. Train Loss: 0.015656, Train Acc: 0.995824, Valid Loss: 0.962502, Valid Acc: 0.782832, Time 00:00:27\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "from utils import train\n", 142 | "train(net, train_data, test_data, 20, optimizer, criterion)" 143 | ] 144 | } 145 | ], 146 | "metadata": { 147 | "kernelspec": { 148 | "display_name": "Python 3", 149 | "language": "python", 150 | "name": "python3" 151 | }, 152 | "language_info": { 153 | "codemirror_mode": { 154 | "name": "ipython", 155 | "version": 3 156 | }, 157 | "file_extension": ".py", 158 | "mimetype": "text/x-python", 159 | "name": "python", 160 | "nbconvert_exporter": "python", 161 | "pygments_lexer": "ipython3", 162 | "version": "3.6.3" 163 | } 164 | }, 165 | "nbformat": 4, 166 | "nbformat_minor": 2 167 | } 168 | -------------------------------------------------------------------------------- /chapter4_CNN/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | 9 | def get_acc(output, label): 10 | total = output.shape[0] 11 | _, pred_label = output.max(1) 12 | num_correct = (pred_label == label).sum().data[0] 13 | return num_correct / total 14 | 15 | 16 | def train(net, train_data, valid_data, num_epochs, optimizer, criterion): 17 | if torch.cuda.is_available(): 18 | net = net.cuda() 19 | prev_time = datetime.now() 20 | for epoch in range(num_epochs): 21 | train_loss = 0 22 | train_acc = 0 23 | net = net.train() 24 | for im, label in train_data: 25 | if torch.cuda.is_available(): 26 | im = Variable(im.cuda()) # (bs, 3, h, w) 27 | label = Variable(label.cuda()) # (bs, h, w) 28 | else: 29 | im = Variable(im) 30 | label = Variable(label) 31 | # forward 32 | output = net(im) 33 | loss = criterion(output, label) 34 | # backward 35 | optimizer.zero_grad() 36 | loss.backward() 37 | optimizer.step() 38 | 39 | train_loss += loss.data[0] 40 | train_acc += get_acc(output, label) 41 | 42 | cur_time = datetime.now() 43 | h, remainder = divmod((cur_time - prev_time).seconds, 3600) 44 | m, s = divmod(remainder, 60) 45 | time_str = "Time %02d:%02d:%02d" % (h, m, s) 46 | if valid_data is not None: 47 | valid_loss = 0 48 | valid_acc = 0 49 | net = net.eval() 50 | for im, label in valid_data: 51 | if torch.cuda.is_available(): 52 | im = Variable(im.cuda(), volatile=True) 53 | label = Variable(label.cuda(), volatile=True) 54 | else: 55 | im = Variable(im, volatile=True) 56 | label = Variable(label, volatile=True) 57 | output = net(im) 58 | loss = criterion(output, label) 59 | valid_loss += loss.data[0] 60 | valid_acc += get_acc(output, label) 61 | epoch_str = ( 62 | "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, " 63 | % (epoch, train_loss / len(train_data), 64 | train_acc / len(train_data), valid_loss / len(valid_data), 65 | valid_acc / len(valid_data))) 66 | else: 67 | epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " % 68 | (epoch, train_loss / len(train_data), 69 | train_acc / len(train_data))) 70 | prev_time = cur_time 71 | print(epoch_str + time_str) 72 | 73 | 74 | def conv3x3(in_channel, out_channel, stride=1): 75 | return nn.Conv2d( 76 | in_channel, out_channel, 3, stride=stride, padding=1, bias=False) 77 | 78 | 79 | class residual_block(nn.Module): 80 | def __init__(self, in_channel, out_channel, same_shape=True): 81 | super(residual_block, self).__init__() 82 | self.same_shape = same_shape 83 | stride = 1 if self.same_shape else 2 84 | 85 | self.conv1 = conv3x3(in_channel, out_channel, stride=stride) 86 | self.bn1 = nn.BatchNorm2d(out_channel) 87 | 88 | self.conv2 = conv3x3(out_channel, out_channel) 89 | self.bn2 = nn.BatchNorm2d(out_channel) 90 | if not self.same_shape: 91 | self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride) 92 | 93 | def forward(self, x): 94 | out = self.conv1(x) 95 | out = F.relu(self.bn1(out), True) 96 | out = self.conv2(out) 97 | out = F.relu(self.bn2(out), True) 98 | 99 | if not self.same_shape: 100 | x = self.conv3(x) 101 | return F.relu(x + out, True) 102 | 103 | 104 | class resnet(nn.Module): 105 | def __init__(self, in_channel, num_classes, verbose=False): 106 | super(resnet, self).__init__() 107 | self.verbose = verbose 108 | 109 | self.block1 = nn.Conv2d(in_channel, 64, 7, 2) 110 | 111 | self.block2 = nn.Sequential( 112 | nn.MaxPool2d(3, 2), residual_block(64, 64), residual_block(64, 64)) 113 | 114 | self.block3 = nn.Sequential( 115 | residual_block(64, 128, False), residual_block(128, 128)) 116 | 117 | self.block4 = nn.Sequential( 118 | residual_block(128, 256, False), residual_block(256, 256)) 119 | 120 | self.block5 = nn.Sequential( 121 | residual_block(256, 512, False), 122 | residual_block(512, 512), nn.AvgPool2d(3)) 123 | 124 | self.classifier = nn.Linear(512, num_classes) 125 | 126 | def forward(self, x): 127 | x = self.block1(x) 128 | if self.verbose: 129 | print('block 1 output: {}'.format(x.shape)) 130 | x = self.block2(x) 131 | if self.verbose: 132 | print('block 2 output: {}'.format(x.shape)) 133 | x = self.block3(x) 134 | if self.verbose: 135 | print('block 3 output: {}'.format(x.shape)) 136 | x = self.block4(x) 137 | if self.verbose: 138 | print('block 4 output: {}'.format(x.shape)) 139 | x = self.block5(x) 140 | if self.verbose: 141 | print('block 5 output: {}'.format(x.shape)) 142 | x = x.view(x.shape[0], -1) 143 | x = self.classifier(x) 144 | return x 145 | -------------------------------------------------------------------------------- /chapter5_RNN/nlp/word-embedding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 词嵌入\n", 8 | "前面讲了循环神经网络做简单的图像分类问题和飞机流量时序预测,但是现在循环神经网络最火热的应用是自然语言处理,下面我们介绍一下自然语言处理中如果运用循环神经网络,首先我们介绍一下第一个概念,词嵌入。" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "对于图像分类问题,我们可以使用 one-hot 的类型去编码,比如一共有 5 类,那么属于第二类就可以用 (0, 1, 0, 0, 0) 去表示,对于分类问题,这样当然忒别简单,但是在自然语言处理中,因为单词的数目过多,这样做就行不通了,比如有 10000 个不同的词,那么使用 one-hot 不仅效率低,同时还没有办法表达出单词的特点,这个时候就引入了词嵌入去表达每一个单词。" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "词向量简单来说就是用一个向量去表示一个词语,但是这个向量并不是随机的,因为这样并没有任何意义,所以我们需要对每个词有一个特定的向量去表示他们,而有一些词的词性是相近的,比如”(love)喜欢”和”(like)爱”,对于这种词性相近的词,我们需要他们的向量表示也能够相近,如何去度量和定义向量之间的相近呢?非常简单,就是使用两个向量的夹角,夹角越小,越相近,这样就有了一个完备的定义。" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "我们举一个例子,下面有 4 段话\n", 30 | "\n", 31 | "1. The cat likes playing wool.\n", 32 | "\n", 33 | "2. The kitty likes playing wool.\n", 34 | "\n", 35 | "3. The dog likes playing ball.\n", 36 | "\n", 37 | "4. The boy does not like playing ball or wool.\n", 38 | "\n", 39 | "这里面有 4 个词,分别是 cat, kitty, dog 和 boy。下面我们使用一个二维的词向量 (a, b) 来表示每一个词,其中 a,b 分别代表着这个词的一种属性,比如 a 代表是否喜欢玩球,b 代表是否喜欢玩毛线,数值越大表示越喜欢,那么我们就能够用数值来定义每一个单词。\n", 40 | "\n", 41 | "对于 cat,我们可以定义它的词嵌入为 (-1, 4),因为他不喜欢玩球,喜欢玩毛线,同时可以定义 kitty 为 (-2, 5),dog 为 (3, 2) 以及 boy 为 (-2, -3),那么把这四个向量在坐标系中表示出来,就是\n", 42 | "\n", 43 | "" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "可以看到,上面这张图就显示了不同词嵌入之间的夹角,kitty 和 cat 之间的夹角比较小,所以他们更相似,dog 和 boy 之间的夹角很大,所以他们是不相似的。\n", 51 | "\n", 52 | "下面我们看看 pytorch 中如何调用词向量" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## PyTorch 实现\n", 60 | "词嵌入在 pytorch 中非常简单,只需要调用 `torch.nn.Embedding(m, n)` 就可以了,m 表示单词的总数目,n 表示词嵌入的维度,其实词嵌入就相当于是一个大矩阵,矩阵的每一行表示一个单词" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 9, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "import torch\n", 72 | "from torch import nn\n", 73 | "from torch.autograd import Variable" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "# 定义词嵌入\n", 85 | "embeds = nn.Embedding(2, 5) # 2 个单词,维度 5" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 5, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "data": { 95 | "text/plain": [ 96 | "Parameter containing:\n", 97 | "-1.3426 0.7316 -0.2437 0.4925 -0.0191\n", 98 | "-0.8326 0.3367 0.2135 0.5059 0.8326\n", 99 | "[torch.FloatTensor of size 2x5]" 100 | ] 101 | }, 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "# 得到词嵌入矩阵\n", 109 | "embeds.weight" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "我们通过 `weight` 得到了整个词嵌入的矩阵,注意,这个矩阵是一个可以改变的 parameter,在网络的训练中会不断更新,同时词嵌入的数值可以直接进行修改,比如我们可以读入一个预训练好的词嵌入等等" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 8, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "Parameter containing:\n", 128 | " 1 1 1 1 1\n", 129 | " 1 1 1 1 1\n", 130 | "[torch.FloatTensor of size 2x5]" 131 | ] 132 | }, 133 | "execution_count": 8, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "# 直接手动修改词嵌入的值\n", 140 | "embeds.weight.data = torch.ones(2, 5)\n", 141 | "embeds.weight" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 11, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "# 访问第 50 个词的词向量\n", 153 | "embeds = nn.Embedding(100, 10)\n", 154 | "single_word_embed = embeds(Variable(torch.LongTensor([50])))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 12, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "Variable containing:\n", 166 | "-1.4954 -1.8475 0.2913 -0.9674 -2.1250 -0.5783 -0.6717 0.5638 0.7038 0.4437\n", 167 | "[torch.FloatTensor of size 1x10]" 168 | ] 169 | }, 170 | "execution_count": 12, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "single_word_embed" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "可以看到如果我们要访问其中一个单词的词向量,我们可以直接调用定义好的词嵌入,但是输入必须传入一个 Variable,且类型是 LongTensor" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "虽然我们知道了如何定义词向量的相似性,但是我们仍然不知道如何得到词嵌入,因为如果一个词嵌入式 100 维,这显然不可能人为去赋值,所以为了得到词向量,需要介绍 skip-gram 模型。" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "## Skip-Gram 模型\n", 198 | "Skip Gram 模型是 [Word2Vec](https://arxiv.org/pdf/1301.3781.pdf) 这篇论文的网络架构,下面我们来讲一讲这个模型。\n", 199 | "\n", 200 | "## 模型结构\n", 201 | "skip-gram 模型非常简单,我们在一段文本中训练一个简单的网络,这个网络的任务是通过一个词周围的词来预测这个词,然而我们实际上要做的就是训练我们的词嵌入。\n", 202 | "\n", 203 | "比如我们给定一句话中的一个词,看看它周围的词,然后随机挑选一个,我们希望网络能够输出一个概率值,这个概率值能够告诉我们到底这个词离我们选择的词的远近程度,比如这么一句话 'A dog is playing with a ball',如果我们选的词是 'ball',那么 'playing' 就要比 'dog' 离我们选择的词更近。\n", 204 | "\n", 205 | "对于一段话,我们可以按照顺序选择不同的词,然后构建训练样本和 label,比如\n", 206 | "\n", 207 | "![](https://ws2.sinaimg.cn/large/006tNc79gy1fmwlpfp3loj30hh0ah75l.jpg)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "对于这个例子,我们依次取一个词以及其周围的词构成一个训练样本,比如第一次选择的词是 'the',那么我们取其前后两个词作为训练样本,这个也可以被称为一个滑动窗口,对于第一个词,其左边没有单词,所以训练集就是三个词,然后我们在这三个词中选择 'the' 作为输入,另外两个词都是他的输出,就构成了两个训练样本,又比如选择 'fox' 这个词,那么加上其左边两个词,右边两个词,一共是 5 个词,然后选择 'fox' 作为输入,那么输出就是其周围的四个词,一共可以构成 4 个训练样本,通过这个办法,我们就能够训练出需要的词嵌入。\n", 215 | "\n", 216 | "下次课,我们会讲一讲词嵌入到底有什么用。" 217 | ] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "Python 3", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.6.3" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 2 241 | } 242 | -------------------------------------------------------------------------------- /chapter5_RNN/rnn-for-image.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RNN 做图像分类\n", 8 | "前面我们讲了 RNN 特别适合做序列类型的数据,那么 RNN 能不能想 CNN 一样用来做图像分类呢?下面我们用 mnist 手写字体的例子来展示一下如何用 RNN 做图像分类,但是这种方法并不是主流,这里我们只是作为举例。" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "对于一张手写字体的图片,其大小是 28 * 28,我们可以将其看做是一个长为 28 的序列,每个序列的特征都是 28,也就是\n", 16 | "\n", 17 | "![](https://ws4.sinaimg.cn/large/006tKfTcly1fmu7d0byfkj30n60djdg5.jpg)" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "这样我们解决了输入序列的问题,对于输出序列怎么办呢?其实非常简单,虽然我们的输出是一个序列,但是我们只需要保留其中一个作为输出结果就可以了,这样的话肯定保留最后一个结果是最好的,因为最后一个结果有前面所有序列的信息,就像下面这样\n", 25 | "\n", 26 | "![](https://ws3.sinaimg.cn/large/006tKfTcly1fmu7fpqri0j30c407yjr8.jpg)\n", 27 | "\n", 28 | "下面我们直接通过例子展示" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": { 35 | "ExecuteTime": { 36 | "end_time": "2017-12-26T08:01:44.502896Z", 37 | "start_time": "2017-12-26T08:01:44.062542Z" 38 | }, 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import sys\n", 44 | "sys.path.append('..')\n", 45 | "\n", 46 | "import torch\n", 47 | "from torch.autograd import Variable\n", 48 | "from torch import nn\n", 49 | "from torch.utils.data import DataLoader\n", 50 | "\n", 51 | "from torchvision import transforms as tfs\n", 52 | "from torchvision.datasets import MNIST" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": { 59 | "ExecuteTime": { 60 | "end_time": "2017-12-26T08:01:50.714439Z", 61 | "start_time": "2017-12-26T08:01:50.650872Z" 62 | }, 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "# 定义数据\n", 68 | "data_tf = tfs.Compose([\n", 69 | " tfs.ToTensor(),\n", 70 | " tfs.Normalize([0.5], [0.5]) # 标准化\n", 71 | "])\n", 72 | "\n", 73 | "train_set = MNIST('./data', train=True, transform=data_tf)\n", 74 | "test_set = MNIST('./data', train=False, transform=data_tf)\n", 75 | "\n", 76 | "train_data = DataLoader(train_set, 64, True, num_workers=4)\n", 77 | "test_data = DataLoader(test_set, 128, False, num_workers=4)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 3, 83 | "metadata": { 84 | "ExecuteTime": { 85 | "end_time": "2017-12-26T08:01:51.165144Z", 86 | "start_time": "2017-12-26T08:01:51.115807Z" 87 | }, 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "# 定义模型\n", 93 | "class rnn_classify(nn.Module):\n", 94 | " def __init__(self, in_feature=28, hidden_feature=100, num_class=10, num_layers=2):\n", 95 | " super(rnn_classify, self).__init__()\n", 96 | " self.rnn = nn.LSTM(in_feature, hidden_feature, num_layers) # 使用两层 lstm\n", 97 | " self.classifier = nn.Linear(hidden_feature, num_class) # 将最后一个 rnn 的输出使用全连接得到最后的分类结果\n", 98 | " \n", 99 | " def forward(self, x):\n", 100 | " '''\n", 101 | " x 大小为 (batch, 1, 28, 28),所以我们需要将其转换成 RNN 的输入形式,即 (28, batch, 28)\n", 102 | " '''\n", 103 | " x = x.squeeze() # 去掉 (batch, 1, 28, 28) 中的 1,变成 (batch, 28, 28)\n", 104 | " x = x.permute(2, 0, 1) # 将最后一维放到第一维,变成 (28, batch, 28)\n", 105 | " out, _ = self.rnn(x) # 使用默认的隐藏状态,得到的 out 是 (28, batch, hidden_feature)\n", 106 | " out = out[-1, :, :] # 取序列中的最后一个,大小是 (batch, hidden_feature)\n", 107 | " out = self.classifier(out) # 得到分类结果\n", 108 | " return out" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 4, 114 | "metadata": { 115 | "ExecuteTime": { 116 | "end_time": "2017-12-26T08:01:51.252533Z", 117 | "start_time": "2017-12-26T08:01:51.244612Z" 118 | }, 119 | "collapsed": true 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "net = rnn_classify()\n", 124 | "criterion = nn.CrossEntropyLoss()\n", 125 | "\n", 126 | "optimzier = torch.optim.Adadelta(net.parameters(), 1e-1)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 5, 132 | "metadata": { 133 | "ExecuteTime": { 134 | "end_time": "2017-12-26T08:03:36.739732Z", 135 | "start_time": "2017-12-26T08:01:51.607967Z" 136 | } 137 | }, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "Epoch 0. Train Loss: 1.858605, Train Acc: 0.318347, Valid Loss: 1.147508, Valid Acc: 0.578125, Time 00:00:09\n", 144 | "Epoch 1. Train Loss: 0.503072, Train Acc: 0.848514, Valid Loss: 0.300552, Valid Acc: 0.912579, Time 00:00:09\n", 145 | "Epoch 2. Train Loss: 0.224762, Train Acc: 0.934785, Valid Loss: 0.176321, Valid Acc: 0.946499, Time 00:00:09\n", 146 | "Epoch 3. Train Loss: 0.157010, Train Acc: 0.953392, Valid Loss: 0.155280, Valid Acc: 0.954015, Time 00:00:09\n", 147 | "Epoch 4. Train Loss: 0.125926, Train Acc: 0.962137, Valid Loss: 0.105295, Valid Acc: 0.969640, Time 00:00:09\n", 148 | "Epoch 5. Train Loss: 0.104938, Train Acc: 0.968450, Valid Loss: 0.091477, Valid Acc: 0.972805, Time 00:00:10\n", 149 | "Epoch 6. Train Loss: 0.089124, Train Acc: 0.973481, Valid Loss: 0.104799, Valid Acc: 0.969343, Time 00:00:09\n", 150 | "Epoch 7. Train Loss: 0.077920, Train Acc: 0.976679, Valid Loss: 0.084242, Valid Acc: 0.976661, Time 00:00:10\n", 151 | "Epoch 8. Train Loss: 0.070259, Train Acc: 0.978795, Valid Loss: 0.078536, Valid Acc: 0.977749, Time 00:00:09\n", 152 | "Epoch 9. Train Loss: 0.063089, Train Acc: 0.981093, Valid Loss: 0.066984, Valid Acc: 0.980716, Time 00:00:09\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "# 开始训练\n", 158 | "from utils import train\n", 159 | "train(net, train_data, test_data, 10, optimzier, criterion)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "可以看到,训练 10 次在简单的 mnist 数据集上也取得的了 98% 的准确率,所以说 RNN 也可以做做简单的图像分类,但是这并不是他的主战场,下次课我们会讲到 RNN 的一个使用场景,时间序列预测。" 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.6.3" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 2 191 | } 192 | -------------------------------------------------------------------------------- /chapter5_RNN/time-series/data.csv: -------------------------------------------------------------------------------- 1 | "Month","International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60" 2 | "1949-01",112 3 | "1949-02",118 4 | "1949-03",132 5 | "1949-04",129 6 | "1949-05",121 7 | "1949-06",135 8 | "1949-07",148 9 | "1949-08",148 10 | "1949-09",136 11 | "1949-10",119 12 | "1949-11",104 13 | "1949-12",118 14 | "1950-01",115 15 | "1950-02",126 16 | "1950-03",141 17 | "1950-04",135 18 | "1950-05",125 19 | "1950-06",149 20 | "1950-07",170 21 | "1950-08",170 22 | "1950-09",158 23 | "1950-10",133 24 | "1950-11",114 25 | "1950-12",140 26 | "1951-01",145 27 | "1951-02",150 28 | "1951-03",178 29 | "1951-04",163 30 | "1951-05",172 31 | "1951-06",178 32 | "1951-07",199 33 | "1951-08",199 34 | "1951-09",184 35 | "1951-10",162 36 | "1951-11",146 37 | "1951-12",166 38 | "1952-01",171 39 | "1952-02",180 40 | "1952-03",193 41 | "1952-04",181 42 | "1952-05",183 43 | "1952-06",218 44 | "1952-07",230 45 | "1952-08",242 46 | "1952-09",209 47 | "1952-10",191 48 | "1952-11",172 49 | "1952-12",194 50 | "1953-01",196 51 | "1953-02",196 52 | "1953-03",236 53 | "1953-04",235 54 | "1953-05",229 55 | "1953-06",243 56 | "1953-07",264 57 | "1953-08",272 58 | "1953-09",237 59 | "1953-10",211 60 | "1953-11",180 61 | "1953-12",201 62 | "1954-01",204 63 | "1954-02",188 64 | "1954-03",235 65 | "1954-04",227 66 | "1954-05",234 67 | "1954-06",264 68 | "1954-07",302 69 | "1954-08",293 70 | "1954-09",259 71 | "1954-10",229 72 | "1954-11",203 73 | "1954-12",229 74 | "1955-01",242 75 | "1955-02",233 76 | "1955-03",267 77 | "1955-04",269 78 | "1955-05",270 79 | "1955-06",315 80 | "1955-07",364 81 | "1955-08",347 82 | "1955-09",312 83 | "1955-10",274 84 | "1955-11",237 85 | "1955-12",278 86 | "1956-01",284 87 | "1956-02",277 88 | "1956-03",317 89 | "1956-04",313 90 | "1956-05",318 91 | "1956-06",374 92 | "1956-07",413 93 | "1956-08",405 94 | "1956-09",355 95 | "1956-10",306 96 | "1956-11",271 97 | "1956-12",306 98 | "1957-01",315 99 | "1957-02",301 100 | "1957-03",356 101 | "1957-04",348 102 | "1957-05",355 103 | "1957-06",422 104 | "1957-07",465 105 | "1957-08",467 106 | "1957-09",404 107 | "1957-10",347 108 | "1957-11",305 109 | "1957-12",336 110 | "1958-01",340 111 | "1958-02",318 112 | "1958-03",362 113 | "1958-04",348 114 | "1958-05",363 115 | "1958-06",435 116 | "1958-07",491 117 | "1958-08",505 118 | "1958-09",404 119 | "1958-10",359 120 | "1958-11",310 121 | "1958-12",337 122 | "1959-01",360 123 | "1959-02",342 124 | "1959-03",406 125 | "1959-04",396 126 | "1959-05",420 127 | "1959-06",472 128 | "1959-07",548 129 | "1959-08",559 130 | "1959-09",463 131 | "1959-10",407 132 | "1959-11",362 133 | "1959-12",405 134 | "1960-01",417 135 | "1960-02",391 136 | "1960-03",419 137 | "1960-04",461 138 | "1960-05",472 139 | "1960-06",535 140 | "1960-07",622 141 | "1960-08",606 142 | "1960-09",508 143 | "1960-10",461 144 | "1960-11",390 145 | "1960-12",432 146 | 147 | International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60 148 | 149 | -------------------------------------------------------------------------------- /chapter5_RNN/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | 9 | def get_acc(output, label): 10 | total = output.shape[0] 11 | _, pred_label = output.max(1) 12 | num_correct = (pred_label == label).sum().data[0] 13 | return num_correct / total 14 | 15 | 16 | def train(net, train_data, valid_data, num_epochs, optimizer, criterion): 17 | if torch.cuda.is_available(): 18 | net = net.cuda() 19 | prev_time = datetime.now() 20 | for epoch in range(num_epochs): 21 | train_loss = 0 22 | train_acc = 0 23 | net = net.train() 24 | for im, label in train_data: 25 | if torch.cuda.is_available(): 26 | im = Variable(im.cuda()) # (bs, 3, h, w) 27 | label = Variable(label.cuda()) # (bs, h, w) 28 | else: 29 | im = Variable(im) 30 | label = Variable(label) 31 | # forward 32 | output = net(im) 33 | loss = criterion(output, label) 34 | # backward 35 | optimizer.zero_grad() 36 | loss.backward() 37 | optimizer.step() 38 | 39 | train_loss += loss.data[0] 40 | train_acc += get_acc(output, label) 41 | 42 | cur_time = datetime.now() 43 | h, remainder = divmod((cur_time - prev_time).seconds, 3600) 44 | m, s = divmod(remainder, 60) 45 | time_str = "Time %02d:%02d:%02d" % (h, m, s) 46 | if valid_data is not None: 47 | valid_loss = 0 48 | valid_acc = 0 49 | net = net.eval() 50 | for im, label in valid_data: 51 | if torch.cuda.is_available(): 52 | im = Variable(im.cuda(), volatile=True) 53 | label = Variable(label.cuda(), volatile=True) 54 | else: 55 | im = Variable(im, volatile=True) 56 | label = Variable(label, volatile=True) 57 | output = net(im) 58 | loss = criterion(output, label) 59 | valid_loss += loss.data[0] 60 | valid_acc += get_acc(output, label) 61 | epoch_str = ( 62 | "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, " 63 | % (epoch, train_loss / len(train_data), 64 | train_acc / len(train_data), valid_loss / len(valid_data), 65 | valid_acc / len(valid_data))) 66 | else: 67 | epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " % 68 | (epoch, train_loss / len(train_data), 69 | train_acc / len(train_data))) 70 | prev_time = cur_time 71 | print(epoch_str + time_str) 72 | 73 | 74 | def conv3x3(in_channel, out_channel, stride=1): 75 | return nn.Conv2d( 76 | in_channel, out_channel, 3, stride=stride, padding=1, bias=False) 77 | 78 | 79 | class residual_block(nn.Module): 80 | def __init__(self, in_channel, out_channel, same_shape=True): 81 | super(residual_block, self).__init__() 82 | self.same_shape = same_shape 83 | stride = 1 if self.same_shape else 2 84 | 85 | self.conv1 = conv3x3(in_channel, out_channel, stride=stride) 86 | self.bn1 = nn.BatchNorm2d(out_channel) 87 | 88 | self.conv2 = conv3x3(out_channel, out_channel) 89 | self.bn2 = nn.BatchNorm2d(out_channel) 90 | if not self.same_shape: 91 | self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride) 92 | 93 | def forward(self, x): 94 | out = self.conv1(x) 95 | out = F.relu(self.bn1(out), True) 96 | out = self.conv2(out) 97 | out = F.relu(self.bn2(out), True) 98 | 99 | if not self.same_shape: 100 | x = self.conv3(x) 101 | return F.relu(x + out, True) 102 | 103 | 104 | class resnet(nn.Module): 105 | def __init__(self, in_channel, num_classes, verbose=False): 106 | super(resnet, self).__init__() 107 | self.verbose = verbose 108 | 109 | self.block1 = nn.Conv2d(in_channel, 64, 7, 2) 110 | 111 | self.block2 = nn.Sequential( 112 | nn.MaxPool2d(3, 2), residual_block(64, 64), residual_block(64, 64)) 113 | 114 | self.block3 = nn.Sequential( 115 | residual_block(64, 128, False), residual_block(128, 128)) 116 | 117 | self.block4 = nn.Sequential( 118 | residual_block(128, 256, False), residual_block(256, 256)) 119 | 120 | self.block5 = nn.Sequential( 121 | residual_block(256, 512, False), 122 | residual_block(512, 512), nn.AvgPool2d(3)) 123 | 124 | self.classifier = nn.Linear(512, num_classes) 125 | 126 | def forward(self, x): 127 | x = self.block1(x) 128 | if self.verbose: 129 | print('block 1 output: {}'.format(x.shape)) 130 | x = self.block2(x) 131 | if self.verbose: 132 | print('block 2 output: {}'.format(x.shape)) 133 | x = self.block3(x) 134 | if self.verbose: 135 | print('block 3 output: {}'.format(x.shape)) 136 | x = self.block4(x) 137 | if self.verbose: 138 | print('block 4 output: {}'.format(x.shape)) 139 | x = self.block5(x) 140 | if self.verbose: 141 | print('block 5 output: {}'.format(x.shape)) 142 | x = x.view(x.shape[0], -1) 143 | x = self.classifier(x) 144 | return x 145 | -------------------------------------------------------------------------------- /chapter6_GAN/vae.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# 变分自动编码器\n", 10 | "变分编码器是自动编码器的升级版本,其结构跟自动编码器是类似的,也由编码器和解码器构成。\n", 11 | "\n", 12 | "回忆一下,自动编码器有个问题,就是并不能任意生成图片,因为我们没有办法自己去构造隐藏向量,需要通过一张图片输入编码我们才知道得到的隐含向量是什么,这时我们就可以通过变分自动编码器来解决这个问题。\n", 13 | "\n", 14 | "其实原理特别简单,只需要在编码过程给它增加一些限制,迫使其生成的隐含向量能够粗略的遵循一个标准正态分布,这就是其与一般的自动编码器最大的不同。\n", 15 | "\n", 16 | "这样我们生成一张新图片就很简单了,我们只需要给它一个标准正态分布的随机隐含向量,这样通过解码器就能够生成我们想要的图片,而不需要给它一张原始图片先编码。\n", 17 | "\n", 18 | "一般来讲,我们通过 encoder 得到的隐含向量并不是一个标准的正态分布,为了衡量两种分布的相似程度,我们使用 KL divergence,利用其来表示隐含向量与标准正态分布之间差异的 loss,另外一个 loss 仍然使用生成图片与原图片的均方误差来表示。\n", 19 | "\n", 20 | "KL divergence 的公式如下\n", 21 | "\n", 22 | "$$\n", 23 | "D{KL} (P || Q) = \\int_{-\\infty}^{\\infty} p(x) \\log \\frac{p(x)}{q(x)} dx\n", 24 | "$$" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## 重参数\n", 32 | "为了避免计算 KL divergence 中的积分,我们使用重参数的技巧,不是每次产生一个隐含向量,而是生成两个向量,一个表示均值,一个表示标准差,这里我们默认编码之后的隐含向量服从一个正态分布的之后,就可以用一个标准正态分布先乘上标准差再加上均值来合成这个正态分布,最后 loss 就是希望这个生成的正态分布能够符合一个标准正态分布,也就是希望均值为 0,方差为 1\n", 33 | "\n", 34 | "所以标准的变分自动编码器如下\n", 35 | "\n", 36 | "![](https://ws4.sinaimg.cn/large/006tKfTcgy1fn15cq6n7pj30k007t0sv.jpg)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "所以最后我们可以将我们的 loss 定义为下面的函数,由均方误差和 KL divergence 求和得到一个总的 loss\n", 44 | "\n", 45 | "```\n", 46 | "def loss_function(recon_x, x, mu, logvar):\n", 47 | " \"\"\"\n", 48 | " recon_x: generating images\n", 49 | " x: origin images\n", 50 | " mu: latent mean\n", 51 | " logvar: latent log variance\n", 52 | " \"\"\"\n", 53 | " MSE = reconstruction_function(recon_x, x)\n", 54 | " # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)\n", 55 | " KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)\n", 56 | " KLD = torch.sum(KLD_element).mul_(-0.5)\n", 57 | " # KL divergence\n", 58 | " return MSE + KLD\n", 59 | "```" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "下面我们用 mnist 数据集来简单说明一下变分自动编码器" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 1, 72 | "metadata": { 73 | "ExecuteTime": { 74 | "end_time": "2018-01-01T10:41:05.738797Z", 75 | "start_time": "2018-01-01T10:41:05.215490Z" 76 | }, 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "import os\n", 82 | "\n", 83 | "import torch\n", 84 | "from torch.autograd import Variable\n", 85 | "import torch.nn.functional as F\n", 86 | "from torch import nn\n", 87 | "from torch.utils.data import DataLoader\n", 88 | "\n", 89 | "from torchvision.datasets import MNIST\n", 90 | "from torchvision import transforms as tfs\n", 91 | "from torchvision.utils import save_image" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 2, 97 | "metadata": { 98 | "ExecuteTime": { 99 | "end_time": "2018-01-01T10:41:05.769643Z", 100 | "start_time": "2018-01-01T10:41:05.741302Z" 101 | }, 102 | "collapsed": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "im_tfs = tfs.Compose([\n", 107 | " tfs.ToTensor(),\n", 108 | " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) # 标准化\n", 109 | "])\n", 110 | "\n", 111 | "train_set = MNIST('./mnist', transform=im_tfs)\n", 112 | "train_data = DataLoader(train_set, batch_size=128, shuffle=True)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 3, 118 | "metadata": { 119 | "ExecuteTime": { 120 | "end_time": "2018-01-01T10:41:06.397118Z", 121 | "start_time": "2018-01-01T10:41:06.306479Z" 122 | }, 123 | "collapsed": true 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "class VAE(nn.Module):\n", 128 | " def __init__(self):\n", 129 | " super(VAE, self).__init__()\n", 130 | "\n", 131 | " self.fc1 = nn.Linear(784, 400)\n", 132 | " self.fc21 = nn.Linear(400, 20) # mean\n", 133 | " self.fc22 = nn.Linear(400, 20) # var\n", 134 | " self.fc3 = nn.Linear(20, 400)\n", 135 | " self.fc4 = nn.Linear(400, 784)\n", 136 | "\n", 137 | " def encode(self, x):\n", 138 | " h1 = F.relu(self.fc1(x))\n", 139 | " return self.fc21(h1), self.fc22(h1)\n", 140 | "\n", 141 | " def reparametrize(self, mu, logvar):\n", 142 | " std = logvar.mul(0.5).exp_()\n", 143 | " eps = torch.FloatTensor(std.size()).normal_()\n", 144 | " if torch.cuda.is_available():\n", 145 | " eps = Variable(eps.cuda())\n", 146 | " else:\n", 147 | " eps = Variable(eps)\n", 148 | " return eps.mul(std).add_(mu)\n", 149 | "\n", 150 | " def decode(self, z):\n", 151 | " h3 = F.relu(self.fc3(z))\n", 152 | " return F.tanh(self.fc4(h3))\n", 153 | "\n", 154 | " def forward(self, x):\n", 155 | " mu, logvar = self.encode(x) # 编码\n", 156 | " z = self.reparametrize(mu, logvar) # 重新参数化成正态分布\n", 157 | " return self.decode(z), mu, logvar # 解码,同时输出均值方差" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 4, 163 | "metadata": { 164 | "ExecuteTime": { 165 | "end_time": "2018-01-01T10:41:10.056600Z", 166 | "start_time": "2018-01-01T10:41:06.430817Z" 167 | }, 168 | "collapsed": true 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "net = VAE() # 实例化网络\n", 173 | "if torch.cuda.is_available():\n", 174 | " net = net.cuda()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 5, 180 | "metadata": { 181 | "ExecuteTime": { 182 | "end_time": "2018-01-01T10:41:10.409900Z", 183 | "start_time": "2018-01-01T10:41:10.059597Z" 184 | }, 185 | "collapsed": true 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "x, _ = train_set[0]\n", 190 | "x = x.view(x.shape[0], -1)\n", 191 | "if torch.cuda.is_available():\n", 192 | " x = x.cuda()\n", 193 | "x = Variable(x)\n", 194 | "_, mu, var = net(x)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 8, 200 | "metadata": { 201 | "ExecuteTime": { 202 | "end_time": "2018-01-01T10:41:29.753678Z", 203 | "start_time": "2018-01-01T10:41:29.749178Z" 204 | } 205 | }, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "Variable containing:\n", 212 | "\n", 213 | "Columns 0 to 9 \n", 214 | "-0.0307 -0.1439 -0.0435 0.3472 0.0368 -0.0339 0.0274 -0.5608 0.0280 0.2742\n", 215 | "\n", 216 | "Columns 10 to 19 \n", 217 | "-0.6221 -0.0894 -0.0933 0.4241 0.1611 0.3267 0.5755 -0.0237 0.2714 -0.2806\n", 218 | "[torch.cuda.FloatTensor of size 1x20 (GPU 0)]\n", 219 | "\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "print(mu)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "可以看到,对于输入,网络可以输出隐含变量的均值和方差,这里的均值方差还没有训练\n", 232 | "\n", 233 | "下面开始训练" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 6, 239 | "metadata": { 240 | "ExecuteTime": { 241 | "end_time": "2018-01-01T10:13:54.560436Z", 242 | "start_time": "2018-01-01T10:13:54.530108Z" 243 | }, 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "reconstruction_function = nn.MSELoss(size_average=False)\n", 249 | "\n", 250 | "def loss_function(recon_x, x, mu, logvar):\n", 251 | " \"\"\"\n", 252 | " recon_x: generating images\n", 253 | " x: origin images\n", 254 | " mu: latent mean\n", 255 | " logvar: latent log variance\n", 256 | " \"\"\"\n", 257 | " MSE = reconstruction_function(recon_x, x)\n", 258 | " # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)\n", 259 | " KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)\n", 260 | " KLD = torch.sum(KLD_element).mul_(-0.5)\n", 261 | " # KL divergence\n", 262 | " return MSE + KLD\n", 263 | "\n", 264 | "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n", 265 | "\n", 266 | "def to_img(x):\n", 267 | " '''\n", 268 | " 定义一个函数将最后的结果转换回图片\n", 269 | " '''\n", 270 | " x = 0.5 * (x + 1.)\n", 271 | " x = x.clamp(0, 1)\n", 272 | " x = x.view(x.shape[0], 1, 28, 28)\n", 273 | " return x" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 7, 279 | "metadata": { 280 | "ExecuteTime": { 281 | "end_time": "2018-01-01T10:35:01.115877Z", 282 | "start_time": "2018-01-01T10:13:54.562533Z" 283 | } 284 | }, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "epoch: 20, Loss: 61.5803\n", 291 | "epoch: 40, Loss: 62.9573\n", 292 | "epoch: 60, Loss: 63.4285\n", 293 | "epoch: 80, Loss: 64.7138\n", 294 | "epoch: 100, Loss: 63.3343\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "for e in range(100):\n", 300 | " for im, _ in train_data:\n", 301 | " im = im.view(im.shape[0], -1)\n", 302 | " im = Variable(im)\n", 303 | " if torch.cuda.is_available():\n", 304 | " im = im.cuda()\n", 305 | " recon_im, mu, logvar = net(im)\n", 306 | " loss = loss_function(recon_im, im, mu, logvar) / im.shape[0] # 将 loss 平均\n", 307 | " optimizer.zero_grad()\n", 308 | " loss.backward()\n", 309 | " optimizer.step()\n", 310 | "\n", 311 | " if (e + 1) % 20 == 0:\n", 312 | " print('epoch: {}, Loss: {:.4f}'.format(e + 1, loss.data[0]))\n", 313 | " save = to_img(recon_im.cpu().data)\n", 314 | " if not os.path.exists('./vae_img'):\n", 315 | " os.mkdir('./vae_img')\n", 316 | " save_image(save, './vae_img/image_{}.png'.format(e + 1))" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "可以看看使用变分自动编码器得到的结果,可以发现效果比一般的编码器要好很多\n", 324 | "\n", 325 | "![](https://ws1.sinaimg.cn/large/006tKfTcgy1fn1ag8832zj306q0a2gmz.jpg)\n", 326 | "\n", 327 | "我们可以输出其中的均值看看" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 14, 333 | "metadata": { 334 | "ExecuteTime": { 335 | "end_time": "2018-01-01T10:40:36.481622Z", 336 | "start_time": "2018-01-01T10:40:36.463332Z" 337 | }, 338 | "collapsed": true 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "x, _ = train_set[0]\n", 343 | "x = x.view(x.shape[0], -1)\n", 344 | "if torch.cuda.is_available():\n", 345 | " x = x.cuda()\n", 346 | "x = Variable(x)\n", 347 | "_, mu, _ = net(x)" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 15, 353 | "metadata": { 354 | "ExecuteTime": { 355 | "end_time": "2018-01-01T10:40:37.490484Z", 356 | "start_time": "2018-01-01T10:40:37.485127Z" 357 | } 358 | }, 359 | "outputs": [ 360 | { 361 | "name": "stdout", 362 | "output_type": "stream", 363 | "text": [ 364 | "Variable containing:\n", 365 | "\n", 366 | "Columns 0 to 9 \n", 367 | " 0.3861 0.5561 1.1995 -1.6773 0.9867 0.1244 -0.3443 -1.6658 1.3332 1.1606\n", 368 | "\n", 369 | "Columns 10 to 19 \n", 370 | " 0.6898 0.3042 2.1044 -2.4588 0.0504 0.9743 1.1136 0.7872 -0.0777 1.6101\n", 371 | "[torch.cuda.FloatTensor of size 1x20 (GPU 0)]\n", 372 | "\n" 373 | ] 374 | } 375 | ], 376 | "source": [ 377 | "print(mu)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "变分自动编码器虽然比一般的自动编码器效果要好,而且也限制了其输出的编码 (code) 的概率分布,但是它仍然是通过直接计算生成图片和原始图片的均方误差来生成 loss,这个方式并不好,在下一章生成对抗网络中,我们会讲一讲这种方式计算 loss 的局限性,然后会介绍一种新的训练办法,就是通过生成对抗的训练方式来训练网络而不是直接比较两张图片的每个像素点的均方误差" 385 | ] 386 | } 387 | ], 388 | "metadata": { 389 | "kernelspec": { 390 | "display_name": "Python 3", 391 | "language": "python", 392 | "name": "python3" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.6.3" 405 | } 406 | }, 407 | "nbformat": 4, 408 | "nbformat_minor": 2 409 | } 410 | -------------------------------------------------------------------------------- /chapter7_RL/dqn.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import gym 8 | 9 | # 定义一些超参数 10 | 11 | 12 | batch_size = 32 13 | lr = 0.01 14 | epsilon = 0.9 15 | gamma = 0.9 16 | target_replace_iter = 100 17 | memory_capacity = 2000 18 | env = gym.make('CartPole-v0') 19 | env = env.unwrapped 20 | n_actions = env.action_space.n 21 | n_states = env.observation_space.shape[0] 22 | 23 | 24 | class q_net(nn.Module): 25 | def __init__(self, hidden=50): 26 | super(q_net, self).__init__() 27 | self.fc = nn.Sequential( 28 | nn.Linear(n_states, hidden), 29 | nn.ReLU(True), 30 | nn.Linear(hidden, n_actions) 31 | ) 32 | 33 | nn.init.normal(self.fc[0].weight, std=0.1) # 使用标准差是 0.1 的正态分布初始化 34 | nn.init.normal(self.fc[2].weight, std=0.1) # 使用标准差是 0.1 的正态分布初始化 35 | 36 | def forward(self, x): 37 | actions_value = self.fc(x) 38 | return actions_value 39 | 40 | 41 | class DQN(object): 42 | def __init__(self): 43 | self.eval_net, self.target_net = q_net(), q_net() 44 | 45 | self.learn_step_counter = 0 46 | self.memory_counter = 0 47 | self.memory = np.zeros((memory_capacity, n_states * 2 + 2)) # 当前的状态和动作,之后的状态和动作 48 | self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=lr) 49 | self.criterion = nn.MSELoss() 50 | 51 | def choose_action(self, s): 52 | ''' 53 | 根据输入的状态得到所有可行动作的价值估计 54 | ''' 55 | s = Variable(torch.unsqueeze(torch.FloatTensor(s), 0)) 56 | # input only one sample 57 | if np.random.uniform() < epsilon: # greedy 贪婪算法 58 | actions_value = self.eval_net(s) 59 | action = torch.max(actions_value, 1)[1].data[0] 60 | else: # random 随机选择 61 | action = np.random.randint(0, n_actions) 62 | return action 63 | 64 | def store_transition(self, s, a, r, s_): 65 | transition = np.hstack((s, [a, r], s_)) 66 | # 用新的记忆替换旧的记忆 67 | index = self.memory_counter % memory_capacity 68 | self.memory[index, :] = transition 69 | self.memory_counter += 1 70 | 71 | def learn(self): 72 | # target net 的参数更新 73 | if self.learn_step_counter % target_replace_iter == 0: 74 | self.target_net.load_state_dict(self.eval_net.state_dict()) 75 | self.learn_step_counter += 1 76 | 77 | # 取样记忆中的经历 78 | sample_index = np.random.choice(memory_capacity, batch_size) 79 | b_memory = self.memory[sample_index, :] 80 | b_s = Variable(torch.FloatTensor(b_memory[:, :n_states])) 81 | b_a = Variable( 82 | torch.LongTensor(b_memory[:, n_states:n_states + 1].astype(int))) 83 | b_r = Variable( 84 | torch.FloatTensor(b_memory[:, n_states + 1:n_states + 2])) 85 | b_s_ = Variable(torch.FloatTensor(b_memory[:, -n_states:])) 86 | 87 | # q_eval net 评估状态下动作的 value 88 | q_eval = self.eval_net(b_s).gather(1, b_a) # shape (batch, 1) 选择对应 action 的动作 89 | q_next = self.target_net( 90 | b_s_).detach() # detach from graph, don't backpropagate 91 | q_target = b_r + gamma * q_next.max(1)[0].view(batch_size, 1) # shape (batch, 1) 92 | loss = self.criterion(q_eval, q_target) # mse 作为 loss 函数 93 | # 更新网络 94 | self.optimizer.zero_grad() 95 | loss.backward() 96 | self.optimizer.step() 97 | 98 | 99 | dqn_trainer = DQN() 100 | 101 | print('collecting experience ... ') 102 | all_reward = [] 103 | for i_episode in range(300): 104 | s = env.reset() 105 | reward = 0 106 | while True: 107 | if dqn_trainer.memory_counter > memory_capacity: 108 | env.render() 109 | a = dqn_trainer.choose_action(s) 110 | 111 | # 环境采取动作得到结果 112 | s_, r, done, info = env.step(a) 113 | 114 | # 修改奖励以便更快收敛 115 | x, x_dot, theta, theta_dot = s_ 116 | r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 117 | r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 118 | r = r1 + r2 119 | 120 | dqn_trainer.store_transition(s, a, r, s_) 121 | 122 | reward += r 123 | if dqn_trainer.memory_counter > memory_capacity: # 记忆收集够开始学习 124 | dqn_trainer.learn() 125 | if done: 126 | print('Ep: {} | reward: {:.3f}'.format(i_episode, round(reward, 3))) 127 | all_reward.append(reward) 128 | break 129 | 130 | if done: 131 | break 132 | s = s_ 133 | -------------------------------------------------------------------------------- /chapter7_RL/mount-car.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | 5 | n_states = 40 # 取样 40 个状态 6 | iter_max = 10000 7 | 8 | initial_lr = 1.0 # Learning rate 9 | min_lr = 0.003 10 | gamma = 1.0 11 | t_max = 10000 12 | eps = 0.02 13 | 14 | 15 | def run_episode(env, policy=None, render=False): 16 | obs = env.reset() 17 | total_reward = 0 18 | step_idx = 0 19 | for _ in range(t_max): 20 | if render: 21 | env.render() 22 | if policy is None: # 如果没有策略,就随机取样 23 | action = env.action_space.sample() 24 | else: 25 | a, b = obs_to_state(env, obs) 26 | action = policy[a][b] 27 | obs, reward, done, _ = env.step(action) 28 | total_reward += gamma ** step_idx * reward 29 | step_idx += 1 30 | if done: 31 | break 32 | return total_reward 33 | 34 | 35 | def obs_to_state(env, obs): 36 | """ 37 | 将观察的连续环境映射到离散的输入的状态 38 | """ 39 | env_low = env.observation_space.low 40 | env_high = env.observation_space.high 41 | env_dx = (env_high - env_low) / n_states 42 | a = int((obs[0] - env_low[0]) / env_dx[0]) 43 | b = int((obs[1] - env_low[1]) / env_dx[1]) 44 | return a, b 45 | 46 | 47 | if __name__ == '__main__': 48 | env_name = 'MountainCar-v0' 49 | env = gym.make(env_name) 50 | env.seed(0) 51 | np.random.seed(0) 52 | print('----- using Q Learning -----') 53 | q_table = np.zeros((n_states, n_states, 3)) 54 | for i in range(iter_max): 55 | obs = env.reset() 56 | total_reward = 0 57 | ## eta: 每一步学习率都不断减小 58 | eta = max(min_lr, initial_lr * (0.85 ** (i // 100))) 59 | for j in range(t_max): 60 | x, y = obs_to_state(env, obs) 61 | if np.random.uniform(0, 1) < eps: # greedy 贪心算法 62 | action = np.random.choice(env.action_space.n) 63 | else: 64 | logits = q_table[x, y, :] 65 | logits_exp = np.exp(logits) 66 | probs = logits_exp / np.sum(logits_exp) # 算出三个动作的概率 67 | action = np.random.choice(env.action_space.n, p=probs) # 依概率来选择动作 68 | obs, reward, done, _ = env.step(action) 69 | total_reward += reward 70 | # 更新 q 表 71 | x_, y_ = obs_to_state(env, obs) 72 | q_table[x, y, action] = q_table[x, y, action] + eta * ( 73 | reward + gamma * np.max(q_table[x_, y_, :]) - 74 | q_table[x, y, action]) 75 | if done: 76 | break 77 | if i % 100 == 0: 78 | print('Iteration #%d -- Total reward = %d.' % (i + 1, 79 | total_reward)) 80 | solution_policy = np.argmax(q_table, axis=2) # 在 q 表中每个状态下都取最大的值得动作 81 | solution_policy_scores = [ 82 | run_episode(env, solution_policy, False) for _ in range(100) 83 | ] 84 | print("Average score of solution = ", np.mean(solution_policy_scores)) 85 | # Animate it 86 | run_episode(env, solution_policy, True) 87 | -------------------------------------------------------------------------------- /chapter7_RL/open_ai_gym.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# Gym 介绍\n", 10 | "前面我们简单的介绍了强化学习的例子,从这个例子可以发现,构建强化学习的环境非常麻烦,需要耗费我们大量的时间,这个时候我们可以使用一个开源的工具,叫做 gym,是由 open ai 开发的。\n", 11 | "\n", 12 | "在这个库中从简单的走格子到毁灭战士,提供了各种各样的游戏环境可以让大家放自己的 AI 进去玩耍。取名叫 gym 也很有意思,可以想象一群 AI 在健身房里各种锻炼,磨练技术。\n", 13 | "\n", 14 | "使用起来也非常方便,首先在终端内输入如下代码进行安装。\n", 15 | "\n", 16 | "```\n", 17 | "# Github源\n", 18 | "git clone https://github.com/openai/gym\n", 19 | "cd gym\n", 20 | "pip install -e .[all]\n", 21 | "\n", 22 | "# 直接下载gym包\n", 23 | "pip install gym[all]\n", 24 | "```\n", 25 | "\n", 26 | "我们可以访问这个页面看到 gym 所[包含的环境和介绍](https://github.com/openai/gym/wiki)。" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "在上面的环境页面,可以 gym 内置了很多环境,我们可以使用前面讲过的 q learning 尝试一个 gym 中的小例子,[mountain car](https://github.com/openai/gym/wiki/MountainCar-v0)。在 mounttain car,我们能够观察到环境中小车的位置,也就是坐标,我们能够采取的动作是向左或者向右。\n", 34 | "\n", 35 | "为了使用 q learning,我们必须要建立 q 表,而这里的状态空间是连续不可数的,所以我们需要离散化连续空间,将 x 坐标和 y 坐标都平均分成很多份,具体的实现可以运行 `mount-car.py` 看看结果。\n", 36 | "\n", 37 | "如果运行完之后,可以看到 q 表的收敛非常慢,reward 一直都很难变化,我们需要很久才能将小车推到终点,这个时候我们需要一个更加强大的武器,那就 deep q network。" 38 | ] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "Python 3", 44 | "language": "python", 45 | "name": "python3" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.6.3" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 2 62 | } 63 | -------------------------------------------------------------------------------- /chapter7_RL/q-learning-intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Q Learning 介绍\n", 8 | "在增强学习中,有一种很有名的算法,叫做 q-learning,我们下面会从原理入手,然后通过一个简单的小例子讲一讲 q-learning。\n", 9 | "\n", 10 | "## q-learning 的原理\n", 11 | "我们使用一个简单的例子来导入 q-learning,假设一个屋子有 5 个房间,某一些房间之间相连,我们希望能够走出这个房间,示意图如下\n", 12 | "\n", 13 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn70q0n91lj30h40a8aaf.jpg)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "那么我们可以将其简化成一些节点和图的形式,每个房间作为一个节点,两个房间有门相连,就在两个节点之间连接一条线,可以得到下面的图片\n", 21 | "\n", 22 | "![](https://ws4.sinaimg.cn/large/006tNc79ly1fn70r6c6koj30h60b2gm0.jpg)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "为了模拟整个过程,我们放置一个智能体在任何一个房间,希望它能够走出这个房间,也就是说希望其能够走到了 5 号节点。为了能够让智能体知道 5 号节点是目标房间,我们需要设置一些奖励,对于每一条边,我们都关联一个奖励值:直接连到目标房间的边的奖励值设置为 100,其他的边可以设置为 0,注意 5 号房间有一个指向自己的箭头,奖励值也设置为 100,其他直接指向 5 号房间的也设置为 100,这样当智能体到达 5 号房间之后,他就会选择一只待在 5 号房间,这也称为吸收目标,效果如下\n", 30 | "\n", 31 | "![](https://ws4.sinaimg.cn/large/006tNc79ly1fn71gf4idrj30c207u74i.jpg)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "想想一下智能体可以不断学习,每次我们将其放在其中一个房间,然后它可以不断探索,根据奖励值走到 5 号房间,也就是走出这个屋子。比如现在这个智能体在 2 号房间,我们希望其能够不断探索走到 5 号房间。\n", 39 | "\n", 40 | "### 状态和动作\n", 41 | "q-learning 中有两个重要的概念,一个是状态,一个是动作,我们将每一个房间都称为一个状态,而智能体从一个房间走到另外一个房间称为一个动作,对应于上面的图就是每个节点是一个状态,每一个箭头都是一种行动。假如智能体处在状态 4,从状态 4 其可以选择走到状态 0,或者状态 3 或者状态 5,如果其走到了状态 3,也可以选择走到状态 2 或者状态 1 或者 状态 4。\n", 42 | "\n", 43 | "我们可以根据状态和动作得到的奖励来建立一个奖励表,用 -1 表示相应节点之间没有边相连,而没有到达终点的边奖励都记为 0,如下\n", 44 | "\n", 45 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71o8jlinj307t055wek.jpg)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "类似的,我们可以让智能体通过和环境的交互来不断学习环境中的知识,让智能体根据每个状态来估计每种行动可能得到的收益,这个矩阵被称为 Q 表,每一行表示状态,每一列表示不同的动作,对于状态未知的情景,我们可以随机让智能体从任何的位置出发,然后去探索新的环境来尽可能的得到所有的状态。刚开始智能体对于环境一无所知,所以数值全部初始化为 0,如下\n", 53 | "\n", 54 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71t3h3wnj306u053jrf.jpg)\n", 55 | "\n", 56 | "我们的智能体通过不断地学习来更新 Q 表中的结果,最后依据 Q 表中的值来做决策。" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "### Q-learning 算法\n", 64 | "有了奖励表和 Q 表,我们需要知道智能体是如何通过学习来更新 Q 表,以便最后能够根据 Q 表进行决策,这个时候就需要讲一讲 Q-learning 的算法。\n", 65 | "\n", 66 | "Q-learning 的算法特别简单,状态转移公式如下\n", 67 | "\n", 68 | "$$Q(s, a) = R(s, a) + \\gamma \\mathop{max}_{\\tilde{a}}\\{ Q(\\tilde{s}, \\tilde{a}) \\}$$\n", 69 | "\n", 70 | "其中 s, a 表示当前的状态和行动,$\\tilde{s}, \\tilde{a}$ 分别表示 s 采取 a 的动作之后的下一个状态和该状态对应所有的行动,参数 $\\gamma$ 是一个常数,$0 \\leq \\gamma \\le 1 $表示对未来奖励的一个衰减程度,形象地比喻就是一个人对于未来的远见程度。\n", 71 | "\n", 72 | "解释一下就是智能体通过经验进行自主学习,不断从一个状态转移到另外一个状态进行探索,并在这个过程中不断更新 Q 表,直到到达目标位置,Q 表就像智能体的大脑,更新越多就越强。我们称智能体的每一次探索为 episode,每个 episode 都表示智能体从任意初始状态到达目标状态,当智能体到达一个目标状态,那么当前的 episode 结束,进入下一个 episode。" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "下面给出 q-learning 的整个算法流程\n", 80 | "- step1 给定参数 $\\gamma$ 和奖励矩阵 R\n", 81 | "- step2 令 Q:= 0\n", 82 | "- step3 For each episode:\n", 83 | " - 3.1 随机选择一个初始状态 s\n", 84 | " - 3.2 若未到达目标状态,则执行以下几步\n", 85 | " - (1)在当前状态 s 的所有可能行动中选取一个行为 a\n", 86 | " - (2)利用选定的行为 a,得到下一个状态 $\\tilde{s}$\n", 87 | " - (3)按照前面的转移公式计算 Q(s, a)\n", 88 | " - (4)令 $s: = \\tilde{s}$" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "### 单步演示\n", 96 | "为了更好地理解 q-learning,我们可以示例其中一步。\n", 97 | "\n", 98 | "首先选择 $\\gamma = 0.8$,初始状态为 1,Q 初始化为零矩阵\n", 99 | "\n", 100 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71t3h3wnj306u053jrf.jpg)\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71o8jlinj307t055wek.jpg)\n", 108 | "\n", 109 | "因为是状态 1,所以我们观察 R 矩阵的第二行,负数表示非法行为,所以下一个状态只有两种可能,走到状态 3 或者走到状态 5,随机地,我们可以选择走到状态 5。\n", 110 | "\n", 111 | "当我们走到状态 5 之后,会发生什么事情呢?观察 R 矩阵的第 6 行可以发现,其对应于三个可能采取的动作:转至状态 1,4 或者 5,根据上面的转移公式,我们有\n", 112 | "\n", 113 | "$$Q(1, 5) = R(1, 5) + 0.8 * max\\{Q(5, 1), Q(5, 4), Q(5, 5)\\} = 100 + 0.8 * max\\{0, 0, 0\\} = 100$$\n", 114 | "\n", 115 | "所以现在 Q 矩阵进行了更新,变为了\n", 116 | "\n", 117 | "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn8182u6xlj306y04mmx6.jpg)\n", 118 | "\n", 119 | "现在我们的状态由 1 变成了 5,因为 5 是最终的目标状态,所以一次 episode 便完成了,进入下一个 episode。\n", 120 | "\n", 121 | "在下一个 episode 中又随机选择一个初始状态开始,不断更新 Q 矩阵,在经过了很多个 episode 之后,矩阵 Q 接近收敛,那么我们的智能体就学会了从任意状态转移到目标状态的最优路径。" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "从上面的原理,我们知道了 q-learning 最重要的状态转移公式,这个公式也叫做 Bellman Equation,通过这个公式我们能够不断地进行更新 Q 矩阵,最后得到一个收敛的 Q 矩阵。\n", 129 | "\n", 130 | "下面我们通过代码来实现这个过程\n", 131 | "\n", 132 | "我们定义一个简单的走迷宫过程,也就是\n", 133 | "\n", 134 | "![](https://ws1.sinaimg.cn/large/006tNc79ly1fn82ja4dkwj308d08d3yj.jpg)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "初始位置随机在 state 0, state 1 和 state 2 上,然后希望智能体能够走到 state 3 获得宝藏,上面可行的行动路线已经用箭头标注了" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 1, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "import numpy as np\n", 153 | "import random" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "下面定义奖励矩阵,一共是 4 行,5 列,每一行分别表示 state 0 到 state 3 这四个状态,每一列分别表示上下左右和静止 5 种状态,奖励矩阵中的 0 表示不可行的路线,比如第一个行,上走和左走都是不可行的路线,都用 0 表示,向下走会走到陷阱,所以使用 -10 表示奖励,向右走和静止都给与 -1 的奖励,因为既没有触发陷阱,也没有到达宝藏,但是过程中浪费了时间。" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 2, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "reward = np.array([[0, -10, 0, -1, -1],\n", 172 | " [0, 10, -1, 0, -1],\n", 173 | " [-1, 0, 0, 10, -10],\n", 174 | " [-1, 0, -10, 0, 10]])" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "接下来定义一个初始化为 0 的 q 矩阵" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 3, 187 | "metadata": { 188 | "collapsed": true 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "q_matrix = np.zeros((4, 5))" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "然后定义一个转移矩阵,也就是从一个状态,采取一个可行的动作之后到达的状态,因为这里的状态和动作都是有限的,所以我们可以将他们存下来,比如第一行表示 state 0,向上和向左都是不可行的路线,所以给 -1 的值表示,向下走到达了 state 2,所以第二个值为 2,向右走到达了 state 1,所以第四个值是 1,保持不同还是在 state 0,所以最后一个标注为 0,另外几行类似。" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 7, 205 | "metadata": { 206 | "collapsed": true 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "transition_matrix = np.array([[-1, 2, -1, 1, 0],\n", 211 | " [-1, 3, 0, -1, 1],\n", 212 | " [0, -1, -1, 3, 2],\n", 213 | " [1, -1, 2, -1, 3]])" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "最后定义每个状态的有效行动,比如 state 0 的有效行动就是下、右和静止,对应于 1,3 和 4" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 8, 226 | "metadata": { 227 | "collapsed": true 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "valid_actions = np.array([[1, 3, 4],\n", 232 | " [1, 2, 4],\n", 233 | " [0, 3, 4],\n", 234 | " [0, 2, 4]])" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 9, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "# 定义 bellman equation 中的 gamma\n", 246 | "gamma = 0.8" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "最后开始让智能体与环境交互,不断地使用 bellman 方程来更新 q 矩阵,我们跑 10 个 episode" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 10, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "name": "stdout", 263 | "output_type": "stream", 264 | "text": [ 265 | "episode: 0, q matrix: \n", 266 | "[[ 0. 0. 0. -1. -1.]\n", 267 | " [ 0. 10. -1. 0. -1.]\n", 268 | " [ 0. 0. 0. 0. 0.]\n", 269 | " [ 0. 0. 0. 0. 0.]]\n", 270 | "\n", 271 | "episode: 1, q matrix: \n", 272 | "[[ 0. 0. 0. -1. -1.]\n", 273 | " [ 0. 10. -1. 0. -1.]\n", 274 | " [ 0. 0. 0. 10. 0.]\n", 275 | " [ 0. 0. 0. 0. 0.]]\n", 276 | "\n", 277 | "episode: 2, q matrix: \n", 278 | "[[ 0. -2. 0. 7. 4.6]\n", 279 | " [ 0. 10. 4.6 0. 7. ]\n", 280 | " [ -1.8 0. 0. 10. -2. ]\n", 281 | " [ 0. 0. 0. 0. 0. ]]\n", 282 | "\n", 283 | "episode: 3, q matrix: \n", 284 | "[[ 0. -2. 0. 7. 4.6]\n", 285 | " [ 0. 10. 4.6 0. 7. ]\n", 286 | " [ 4.6 0. 0. 10. -2. ]\n", 287 | " [ 0. 0. 0. 0. 0. ]]\n", 288 | "\n", 289 | "episode: 4, q matrix: \n", 290 | "[[ 0. -2. 0. 7. 4.6]\n", 291 | " [ 0. 10. 4.6 0. 7. ]\n", 292 | " [ 4.6 0. 0. 10. -2. ]\n", 293 | " [ 0. 0. 0. 0. 0. ]]\n", 294 | "\n", 295 | "episode: 5, q matrix: \n", 296 | "[[ 0. -2. 0. 7. 4.6]\n", 297 | " [ 0. 10. 4.6 0. 7. ]\n", 298 | " [ 4.6 0. 0. 10. -2. ]\n", 299 | " [ 0. 0. 0. 0. 0. ]]\n", 300 | "\n", 301 | "episode: 6, q matrix: \n", 302 | "[[ 0. -2. 0. 7. 4.6]\n", 303 | " [ 0. 10. 4.6 0. 7. ]\n", 304 | " [ 4.6 0. 0. 10. -2. ]\n", 305 | " [ 0. 0. 0. 0. 0. ]]\n", 306 | "\n", 307 | "episode: 7, q matrix: \n", 308 | "[[ 0. -2. 0. 7. 4.6]\n", 309 | " [ 0. 10. 4.6 0. 7. ]\n", 310 | " [ 4.6 0. 0. 10. -2. ]\n", 311 | " [ 0. 0. 0. 0. 0. ]]\n", 312 | "\n", 313 | "episode: 8, q matrix: \n", 314 | "[[ 0. -2. 0. 7. 4.6]\n", 315 | " [ 0. 10. 4.6 0. 7. ]\n", 316 | " [ 4.6 0. 0. 10. -2. ]\n", 317 | " [ 0. 0. 0. 0. 0. ]]\n", 318 | "\n", 319 | "episode: 9, q matrix: \n", 320 | "[[ 0. -2. 0. 7. 4.6]\n", 321 | " [ 0. 10. 4.6 0. 7. ]\n", 322 | " [ 4.6 0. 0. 10. -2. ]\n", 323 | " [ 0. 0. 0. 0. 0. ]]\n", 324 | "\n" 325 | ] 326 | } 327 | ], 328 | "source": [ 329 | "for i in range(10):\n", 330 | " start_state = np.random.choice([0, 1, 2], size=1)[0] # 随机初始起点\n", 331 | " current_state = start_state\n", 332 | " while current_state != 3: # 判断是否到达终点\n", 333 | " action = random.choice(valid_actions[current_state]) # greedy 随机选择当前状态下的有效动作\n", 334 | " next_state = transition_matrix[current_state][action] # 通过选择的动作得到下一个状态\n", 335 | " future_rewards = []\n", 336 | " for action_nxt in valid_actions[next_state]:\n", 337 | " future_rewards.append(q_matrix[next_state][action_nxt]) # 得到下一个状态所有可能动作的奖励\n", 338 | " q_state = reward[current_state][action] + gamma * max(future_rewards) # bellman equation\n", 339 | " q_matrix[current_state][action] = q_state # 更新 q 矩阵\n", 340 | " current_state = next_state # 将下一个状态变成当前状态\n", 341 | " \n", 342 | " print('episode: {}, q matrix: \\n{}'.format(i, q_matrix))\n", 343 | " print()" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": { 349 | "collapsed": true 350 | }, 351 | "source": [ 352 | "可以看到在第一次 episode 之后,智能体就学会了在 state 2 的时候向下走能够得到奖励,通过不断地学习,在 10 个 episode 之后,智能体知道,在 state 0,向右走能得到奖励,在 state 1 向下走能够得到奖励,在 state 3 向右 走能得到奖励,这样在这个环境中任何一个状态智能体都能够知道如何才能够最快地到达宝藏的位置\n", 353 | "\n", 354 | "从上面的例子我们简单的演示了 q-learning,可以看出自己来构建整个环境是非常麻烦的,所以我们可以通过一些第三方库来帮我们搭建强化学习的环境,其中最有名的就是 open-ai 的 gym 模块,下一章我们将介绍一下 gym。" 355 | ] 356 | } 357 | ], 358 | "metadata": { 359 | "kernelspec": { 360 | "display_name": "Python 3", 361 | "language": "python", 362 | "name": "python3" 363 | }, 364 | "language_info": { 365 | "codemirror_mode": { 366 | "name": "ipython", 367 | "version": 3 368 | }, 369 | "file_extension": ".py", 370 | "mimetype": "text/x-python", 371 | "name": "python", 372 | "nbconvert_exporter": "python", 373 | "pygments_lexer": "ipython3", 374 | "version": "3.6.3" 375 | } 376 | }, 377 | "nbformat": 4, 378 | "nbformat_minor": 2 379 | } 380 | -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_1/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_1/1.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_1/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_1/2.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_1/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_1/3.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_2/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_2/10.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_2/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_2/11.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_2/12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_2/12.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_3/16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_3/16.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_3/17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_3/17.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/example_data/image/class_3/18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_3/18.png -------------------------------------------------------------------------------- /chapter8_PyTorch-Advances/tensorboard.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorBoard 可视化\n", 8 | "[github](https://github.com/lanpa/tensorboard-pytorch)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "metadata": { 15 | "ExecuteTime": { 16 | "end_time": "2017-12-24T09:39:39.910789Z", 17 | "start_time": "2017-12-24T09:39:39.398570Z" 18 | }, 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import sys\n", 24 | "sys.path.append('..')\n", 25 | "\n", 26 | "import numpy as np\n", 27 | "import torch\n", 28 | "from torch import nn\n", 29 | "import torch.nn.functional as F\n", 30 | "from torch.autograd import Variable\n", 31 | "from torchvision.datasets import CIFAR10\n", 32 | "from utils import resnet\n", 33 | "from torchvision import transforms as tfs\n", 34 | "from datetime import datetime\n", 35 | "from tensorboardX import SummaryWriter" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 2, 41 | "metadata": { 42 | "ExecuteTime": { 43 | "end_time": "2017-12-24T09:39:41.981293Z", 44 | "start_time": "2017-12-24T09:39:40.621895Z" 45 | }, 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "# 使用数据增强\n", 51 | "def train_tf(x):\n", 52 | " im_aug = tfs.Compose([\n", 53 | " tfs.Resize(120),\n", 54 | " tfs.RandomHorizontalFlip(),\n", 55 | " tfs.RandomCrop(96),\n", 56 | " tfs.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),\n", 57 | " tfs.ToTensor(),\n", 58 | " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", 59 | " ])\n", 60 | " x = im_aug(x)\n", 61 | " return x\n", 62 | "\n", 63 | "def test_tf(x):\n", 64 | " im_aug = tfs.Compose([\n", 65 | " tfs.Resize(96),\n", 66 | " tfs.ToTensor(),\n", 67 | " tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n", 68 | " ])\n", 69 | " x = im_aug(x)\n", 70 | " return x\n", 71 | "\n", 72 | "train_set = CIFAR10('./data', train=True, transform=train_tf)\n", 73 | "train_data = torch.utils.data.DataLoader(train_set, batch_size=256, shuffle=True, num_workers=4)\n", 74 | "valid_set = CIFAR10('./data', train=False, transform=test_tf)\n", 75 | "valid_data = torch.utils.data.DataLoader(valid_set, batch_size=256, shuffle=False, num_workers=4)\n", 76 | "\n", 77 | "net = resnet(3, 10)\n", 78 | "optimizer = torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-4)\n", 79 | "criterion = nn.CrossEntropyLoss()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "metadata": { 86 | "ExecuteTime": { 87 | "end_time": "2017-12-24T09:53:40.434024Z", 88 | "start_time": "2017-12-24T09:39:41.984480Z" 89 | }, 90 | "collapsed": false 91 | }, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "Epoch 0. Train Loss: 1.877906, Train Acc: 0.315410, Valid Loss: 2.198587, Valid Acc: 0.293164, Time 00:00:26\n", 98 | "Epoch 1. Train Loss: 1.398501, Train Acc: 0.498657, Valid Loss: 1.877540, Valid Acc: 0.400098, Time 00:00:27\n", 99 | "Epoch 2. Train Loss: 1.141419, Train Acc: 0.597628, Valid Loss: 1.872355, Valid Acc: 0.446777, Time 00:00:27\n", 100 | "Epoch 3. Train Loss: 0.980048, Train Acc: 0.658367, Valid Loss: 1.672951, Valid Acc: 0.475391, Time 00:00:27\n", 101 | "Epoch 4. Train Loss: 0.871448, Train Acc: 0.695073, Valid Loss: 1.263234, Valid Acc: 0.578613, Time 00:00:28\n", 102 | "Epoch 5. Train Loss: 0.794649, Train Acc: 0.723992, Valid Loss: 2.142715, Valid Acc: 0.466699, Time 00:00:27\n", 103 | "Epoch 6. Train Loss: 0.736611, Train Acc: 0.741554, Valid Loss: 1.701331, Valid Acc: 0.500391, Time 00:00:27\n", 104 | "Epoch 7. Train Loss: 0.695095, Train Acc: 0.756816, Valid Loss: 1.385478, Valid Acc: 0.597656, Time 00:00:28\n", 105 | "Epoch 8. Train Loss: 0.652659, Train Acc: 0.773796, Valid Loss: 1.029726, Valid Acc: 0.676465, Time 00:00:27\n", 106 | "Epoch 9. Train Loss: 0.623829, Train Acc: 0.784144, Valid Loss: 0.933388, Valid Acc: 0.682520, Time 00:00:27\n", 107 | "Epoch 10. Train Loss: 0.581615, Train Acc: 0.798792, Valid Loss: 1.291557, Valid Acc: 0.635938, Time 00:00:27\n", 108 | "Epoch 11. Train Loss: 0.559358, Train Acc: 0.805708, Valid Loss: 1.430408, Valid Acc: 0.586426, Time 00:00:28\n", 109 | "Epoch 12. Train Loss: 0.534197, Train Acc: 0.816853, Valid Loss: 0.960802, Valid Acc: 0.704785, Time 00:00:27\n", 110 | "Epoch 13. Train Loss: 0.512111, Train Acc: 0.822389, Valid Loss: 0.923353, Valid Acc: 0.716602, Time 00:00:27\n", 111 | "Epoch 14. Train Loss: 0.494577, Train Acc: 0.828225, Valid Loss: 1.023517, Valid Acc: 0.687207, Time 00:00:27\n", 112 | "Epoch 15. Train Loss: 0.473396, Train Acc: 0.835212, Valid Loss: 0.842679, Valid Acc: 0.727930, Time 00:00:27\n", 113 | "Epoch 16. Train Loss: 0.459708, Train Acc: 0.840290, Valid Loss: 0.826854, Valid Acc: 0.726953, Time 00:00:28\n", 114 | "Epoch 17. Train Loss: 0.433836, Train Acc: 0.847931, Valid Loss: 0.730658, Valid Acc: 0.764258, Time 00:00:27\n", 115 | "Epoch 18. Train Loss: 0.422375, Train Acc: 0.854401, Valid Loss: 0.677953, Valid Acc: 0.778125, Time 00:00:27\n", 116 | "Epoch 19. Train Loss: 0.410208, Train Acc: 0.857370, Valid Loss: 0.787286, Valid Acc: 0.754102, Time 00:00:27\n", 117 | "Epoch 20. Train Loss: 0.395556, Train Acc: 0.862923, Valid Loss: 0.859754, Valid Acc: 0.738965, Time 00:00:27\n", 118 | "Epoch 21. Train Loss: 0.382050, Train Acc: 0.866554, Valid Loss: 1.266704, Valid Acc: 0.651660, Time 00:00:27\n", 119 | "Epoch 22. Train Loss: 0.368614, Train Acc: 0.871213, Valid Loss: 0.912465, Valid Acc: 0.738672, Time 00:00:27\n", 120 | "Epoch 23. Train Loss: 0.358302, Train Acc: 0.873964, Valid Loss: 0.963238, Valid Acc: 0.706055, Time 00:00:27\n", 121 | "Epoch 24. Train Loss: 0.347568, Train Acc: 0.879620, Valid Loss: 0.777171, Valid Acc: 0.751855, Time 00:00:27\n", 122 | "Epoch 25. Train Loss: 0.339247, Train Acc: 0.882215, Valid Loss: 0.707863, Valid Acc: 0.777734, Time 00:00:27\n", 123 | "Epoch 26. Train Loss: 0.329292, Train Acc: 0.885830, Valid Loss: 0.682976, Valid Acc: 0.790527, Time 00:00:27\n", 124 | "Epoch 27. Train Loss: 0.313049, Train Acc: 0.890761, Valid Loss: 0.665912, Valid Acc: 0.795410, Time 00:00:27\n", 125 | "Epoch 28. Train Loss: 0.305482, Train Acc: 0.891944, Valid Loss: 0.880263, Valid Acc: 0.743848, Time 00:00:27\n", 126 | "Epoch 29. Train Loss: 0.301507, Train Acc: 0.895289, Valid Loss: 1.062325, Valid Acc: 0.708398, Time 00:00:27\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "writer = SummaryWriter()\n", 132 | "\n", 133 | "def get_acc(output, label):\n", 134 | " total = output.shape[0]\n", 135 | " _, pred_label = output.max(1)\n", 136 | " num_correct = (pred_label == label).sum().data[0]\n", 137 | " return num_correct / total\n", 138 | "\n", 139 | "if torch.cuda.is_available():\n", 140 | " net = net.cuda()\n", 141 | "prev_time = datetime.now()\n", 142 | "for epoch in range(30):\n", 143 | " train_loss = 0\n", 144 | " train_acc = 0\n", 145 | " net = net.train()\n", 146 | " for im, label in train_data:\n", 147 | " if torch.cuda.is_available():\n", 148 | " im = Variable(im.cuda()) # (bs, 3, h, w)\n", 149 | " label = Variable(label.cuda()) # (bs, h, w)\n", 150 | " else:\n", 151 | " im = Variable(im)\n", 152 | " label = Variable(label)\n", 153 | " # forward\n", 154 | " output = net(im)\n", 155 | " loss = criterion(output, label)\n", 156 | " # backward\n", 157 | " optimizer.zero_grad()\n", 158 | " loss.backward()\n", 159 | " optimizer.step()\n", 160 | "\n", 161 | " train_loss += loss.data[0]\n", 162 | " train_acc += get_acc(output, label)\n", 163 | " cur_time = datetime.now()\n", 164 | " h, remainder = divmod((cur_time - prev_time).seconds, 3600)\n", 165 | " m, s = divmod(remainder, 60)\n", 166 | " time_str = \"Time %02d:%02d:%02d\" % (h, m, s)\n", 167 | " valid_loss = 0\n", 168 | " valid_acc = 0\n", 169 | " net = net.eval()\n", 170 | " for im, label in valid_data:\n", 171 | " if torch.cuda.is_available():\n", 172 | " im = Variable(im.cuda(), volatile=True)\n", 173 | " label = Variable(label.cuda(), volatile=True)\n", 174 | " else:\n", 175 | " im = Variable(im, volatile=True)\n", 176 | " label = Variable(label, volatile=True)\n", 177 | " output = net(im)\n", 178 | " loss = criterion(output, label)\n", 179 | " valid_loss += loss.data[0]\n", 180 | " valid_acc += get_acc(output, label)\n", 181 | " epoch_str = (\n", 182 | " \"Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, \"\n", 183 | " % (epoch, train_loss / len(train_data),\n", 184 | " train_acc / len(train_data), valid_loss / len(valid_data),\n", 185 | " valid_acc / len(valid_data)))\n", 186 | " prev_time = cur_time\n", 187 | " # ====================== 使用 tensorboard ==================\n", 188 | " writer.add_scalars('Loss', {'train': train_loss / len(train_data),\n", 189 | " 'valid': valid_loss / len(valid_data)}, epoch)\n", 190 | " writer.add_scalars('Acc', {'train': train_acc / len(train_data),\n", 191 | " 'valid': valid_acc / len(valid_data)}, epoch)\n", 192 | " # =========================================================\n", 193 | " print(epoch_str + time_str)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "![](https://ws1.sinaimg.cn/large/006tNc79ly1fms31s3i4yj31gc0qimy6.jpg)" 201 | ] 202 | } 203 | ], 204 | "metadata": { 205 | "kernelspec": { 206 | "display_name": "mx", 207 | "language": "python", 208 | "name": "mx" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.6.0" 221 | } 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/README.md: -------------------------------------------------------------------------------- 1 | # Deep-Dream 2 | PyTorch implement of Google Deep Dream 3 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/backward/backward.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | 4 | # simple gradient 5 | a = Variable(torch.FloatTensor([2, 3]), requires_grad=True) 6 | b = a + 3 7 | c = b * b * 3 8 | out = c.mean() 9 | out.backward() 10 | print('*' * 10) 11 | print('=====simple gradient======') 12 | print('input') 13 | print(a.data) 14 | print('compute result is') 15 | print(out.data[0]) 16 | print('input gradients are') 17 | print(a.grad.data) 18 | 19 | # backward on non-scalar output 20 | m = Variable(torch.FloatTensor([[2, 3]]), requires_grad=True) 21 | n = Variable(torch.zeros(1, 2)) 22 | n[0, 0] = m[0, 0]**2 23 | n[0, 1] = m[0, 1]**3 24 | n.backward(torch.FloatTensor([[1, 1]])) 25 | print('*' * 10) 26 | print('=====non scalar output======') 27 | print('input') 28 | print(m.data) 29 | print('input gradients are') 30 | print(m.grad.data) 31 | 32 | # jacobian 33 | j = torch.zeros(2, 2) 34 | k = Variable(torch.zeros(1, 2)) 35 | m.grad.data.zero_() 36 | k[0, 0] = m[0, 0]**2 + 3 * m[0, 1] 37 | k[0, 1] = m[0, 1]**2 + 2 * m[0, 0] 38 | k.backward(torch.FloatTensor([[1, 0]]), retain_variables=True) 39 | j[:, 0] = m.grad.data 40 | m.grad.data.zero_() 41 | k.backward(torch.FloatTensor([[0, 1]])) 42 | j[:, 1] = m.grad.data 43 | print('jacobian matrix is') 44 | print(j) 45 | 46 | # compute jacobian matrix 47 | x = torch.FloatTensor([2, 1]).view(1, 2) 48 | x = Variable(x, requires_grad=True) 49 | y = Variable(torch.FloatTensor([[1, 2], [3, 4]])) 50 | 51 | z = torch.mm(x, y) 52 | jacobian = torch.zeros((2, 2)) 53 | z.backward( 54 | torch.FloatTensor([[1, 0]]), retain_variables=True) # dz1/dx1, dz2/dx1 55 | jacobian[:, 0] = x.grad.data 56 | x.grad.data.zero_() 57 | z.backward(torch.FloatTensor([[0, 1]])) # dz1/dx2, dz2/dx2 58 | jacobian[:, 1] = x.grad.data 59 | print('=========jacobian========') 60 | print('x') 61 | print(x.data) 62 | print('y') 63 | print(y.data) 64 | print('compute result') 65 | print(z.data) 66 | print('jacobian matrix is') 67 | print(jacobian) -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/deepdream.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from util import showtensor 4 | import scipy.ndimage as nd 5 | from torch.autograd import Variable 6 | 7 | 8 | def objective_L2(dst, guide_features): 9 | return dst.data 10 | 11 | 12 | def make_step(img, model, control=None, distance=objective_L2): 13 | mean = np.array([0.485, 0.456, 0.406]).reshape([3, 1, 1]) 14 | std = np.array([0.229, 0.224, 0.225]).reshape([3, 1, 1]) 15 | 16 | learning_rate = 2e-2 17 | max_jitter = 32 18 | num_iterations = 20 19 | show_every = 10 20 | end_layer = 3 21 | guide_features = control 22 | 23 | for i in range(num_iterations): 24 | shift_x, shift_y = np.random.randint(-max_jitter, max_jitter + 1, 2) 25 | img = np.roll(np.roll(img, shift_x, -1), shift_y, -2) 26 | # apply jitter shift 27 | model.zero_grad() 28 | img_tensor = torch.Tensor(img) 29 | if torch.cuda.is_available(): 30 | img_variable = Variable(img_tensor.cuda(), requires_grad=True) 31 | else: 32 | img_variable = Variable(img_tensor, requires_grad=True) 33 | 34 | act_value = model.forward(img_variable, end_layer) 35 | diff_out = distance(act_value, guide_features) 36 | act_value.backward(diff_out) 37 | ratio = np.abs(img_variable.grad.data.cpu().numpy()).mean() 38 | learning_rate_use = learning_rate / ratio 39 | img_variable.data.add_(img_variable.grad.data * learning_rate_use) 40 | img = img_variable.data.cpu().numpy() # b, c, h, w 41 | img = np.roll(np.roll(img, -shift_x, -1), -shift_y, -2) 42 | img[0, :, :, :] = np.clip(img[0, :, :, :], -mean / std, 43 | (1 - mean) / std) 44 | if i == 0 or (i + 1) % show_every == 0: 45 | showtensor(img) 46 | return img 47 | 48 | 49 | def dream(model, 50 | base_img, 51 | octave_n=6, 52 | octave_scale=1.4, 53 | control=None, 54 | distance=objective_L2): 55 | octaves = [base_img] 56 | for i in range(octave_n - 1): 57 | octaves.append( 58 | nd.zoom( 59 | octaves[-1], (1, 1, 1.0 / octave_scale, 1.0 / octave_scale), 60 | order=1)) 61 | 62 | detail = np.zeros_like(octaves[-1]) 63 | for octave, octave_base in enumerate(octaves[::-1]): 64 | h, w = octave_base.shape[-2:] 65 | if octave > 0: 66 | h1, w1 = detail.shape[-2:] 67 | detail = nd.zoom( 68 | detail, (1, 1, 1.0 * h / h1, 1.0 * w / w1), order=1) 69 | 70 | input_oct = octave_base + detail 71 | print(input_oct.shape) 72 | out = make_step(input_oct, model, control, distance=distance) 73 | detail = out - octave_base 74 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/guide_image/flower.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/guide_image/flower.jpg -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/guide_image/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/guide_image/input.png -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/guide_image/kitten.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/guide_image/kitten.jpg -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/resnet.py: -------------------------------------------------------------------------------- 1 | __author__ = 'SherlockLiao' 2 | 3 | import torch 4 | from torch import nn 5 | from torchvision import models 6 | import torch.utils.model_zoo as model_zoo 7 | 8 | 9 | model_urls = { 10 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 11 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 12 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 13 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 14 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 15 | } 16 | 17 | 18 | class Bottleneck(nn.Module): 19 | expansion = 4 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(Bottleneck, self).__init__() 23 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 29 | self.bn3 = nn.BatchNorm2d(planes * 4) 30 | self.relu = nn.ReLU(inplace=True) 31 | self.downsample = downsample 32 | self.stride = stride 33 | 34 | def forward(self, x): 35 | residual = x 36 | 37 | out = self.conv1(x) 38 | out = self.bn1(out) 39 | out = self.relu(out) 40 | 41 | out = self.conv2(out) 42 | out = self.bn2(out) 43 | out = self.relu(out) 44 | 45 | out = self.conv3(out) 46 | out = self.bn3(out) 47 | 48 | if self.downsample is not None: 49 | residual = self.downsample(x) 50 | 51 | out += residual 52 | out = self.relu(out) 53 | 54 | return out 55 | 56 | 57 | class CustomResNet(models.resnet.ResNet): 58 | def forward(self, x, end_layer): 59 | """ 60 | end_layer range from 1 to 4 61 | """ 62 | x = self.conv1(x) 63 | x = self.bn1(x) 64 | x = self.relu(x) 65 | x = self.maxpool(x) 66 | 67 | layers = [self.layer1, self.layer2, self.layer3, self.layer4] 68 | for i in range(end_layer): 69 | x = layers[i](x) 70 | return x 71 | 72 | 73 | def resnet50(pretrained=False, **kwargs): 74 | model = CustomResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 75 | if pretrained: 76 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 77 | return model 78 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/sky.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/sky.jpg -------------------------------------------------------------------------------- /chapter9_Computer-Vision/Deep-Dream/util.py: -------------------------------------------------------------------------------- 1 | import PIL.Image 2 | from io import BytesIO 3 | from IPython.display import clear_output, Image, display 4 | import numpy as np 5 | 6 | 7 | def showarray(a, fmt='jpeg'): 8 | a = np.uint8(np.clip(a, 0, 255)) 9 | f = BytesIO() 10 | PIL.Image.fromarray(a).save(f, fmt) 11 | display(Image(data=f.getvalue())) 12 | 13 | 14 | def showtensor(a): 15 | mean = np.array([0.485, 0.456, 0.406]).reshape([1, 1, 3]) 16 | std = np.array([0.229, 0.224, 0.225]).reshape([1, 1, 3]) 17 | inp = a[0, :, :, :] 18 | inp = inp.transpose(1, 2, 0) 19 | inp = std * inp + mean 20 | inp *= 255 21 | showarray(inp) 22 | clear_output(wait=True) 23 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/fine_tune/READMD.md: -------------------------------------------------------------------------------- 1 | ## Fine Tune 教程 2 | 3 | ### Requirements 4 | 5 | [PyTorch 0.3](http://pytorch.org/) 6 | 7 | [MxTorch](https://github.com/SherlockLiao/mxtorch) 8 | 9 | [tensorboardX](https://github.com/lanpa/tensorboard-pytorch) 10 | 11 | 按照 pytorch 官网安装 pytorch,将 mxtorch 下载下来,放到根目录,安装 tensorboardX 实现 tensorboard 可视化 12 | 13 | ```bash 14 | \fine_tune 15 | \mxtorch 16 | \hymenoptera_data 17 | \train 18 | \val 19 | \checkpoints 20 | config.py 21 | main.py 22 | get_data.sh 23 | ``` 24 | 25 | 26 | 27 | ### 下载数据 28 | 29 | 打开终端,运行 bash 脚本来获取数据 30 | 31 | ```bash 32 | bash get_data.sh 33 | ``` 34 | 35 | 36 | 37 | ### 训练模型 38 | 39 | 所有的配置文件都放在 config.py 里面,通过下面的代码来训练模型 40 | 41 | ```bash 42 | python main.py train 43 | ``` 44 | 45 | 也可以在终端修改配置,比如改变 epochs 和 batch_size 46 | 47 | ```bash 48 | python main.py train \ 49 | --max_epochs=100 \ 50 | --batch_size=16 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/fine_tune/config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import warnings 7 | from pprint import pprint 8 | 9 | 10 | class DefaultConfig(object): 11 | model = 'resnet50' 12 | # Dataset. 13 | train_data_path = './hymenoptera_data/train/' 14 | test_data_path = './hymenoptera_data/val/' 15 | 16 | # Store result and save models. 17 | # result_file = 'result.txt' 18 | save_file = './checkpoints/' 19 | save_freq = 30 # save model every N epochs 20 | save_best = True # If save best test metric model. 21 | 22 | # Visualization results on tensorboard. 23 | # vis_dir = './vis/' 24 | plot_freq = 100 # plot in tensorboard every N iterations 25 | 26 | # Model hyperparameters. 27 | use_gpu = True # use GPU or not 28 | ctx = 0 # running on which cuda device 29 | batch_size = 64 # batch size 30 | num_workers = 4 # how many workers for loading data 31 | max_epoch = 30 32 | lr = 1e-2 # initial learning rate 33 | momentum = 0 34 | weight_decay = 1e-4 35 | lr_decay = 0.95 36 | # lr_decay_freq = 10 37 | 38 | def _parse(self, kwargs): 39 | for k, v in kwargs.items(): 40 | if not hasattr(self, k): 41 | warnings.warn("Warning: opt has not attribut %s" % k) 42 | setattr(self, k, v) 43 | 44 | print('=========user config==========') 45 | pprint(self._state_dict()) 46 | print('============end===============') 47 | 48 | def _state_dict(self): 49 | return {k: getattr(self, k) for k, _ in DefaultConfig.__dict__.items() 50 | if not k.startswith('_')} 51 | 52 | 53 | opt = DefaultConfig() 54 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/fine_tune/get_data.sh: -------------------------------------------------------------------------------- 1 | # Download data. 2 | wget https://download.pytorch.org/tutorial/hymenoptera_data.zip 3 | 4 | unzip hymenoptera_data.zip -------------------------------------------------------------------------------- /chapter9_Computer-Vision/fine_tune/main.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import copy 7 | 8 | import torch 9 | from config import opt 10 | from mxtorch import meter 11 | from mxtorch import transforms as tfs 12 | from mxtorch.trainer import * 13 | from mxtorch.vision import model_zoo 14 | from torch import nn 15 | from torch.autograd import Variable 16 | from torch.utils.data import DataLoader 17 | from torchvision.datasets import ImageFolder 18 | from tqdm import tqdm 19 | 20 | train_tf = tfs.Compose([ 21 | tfs.RandomResizedCrop(224), 22 | tfs.RandomHorizontalFlip(), 23 | tfs.ToTensor(), 24 | tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 25 | ]) 26 | 27 | 28 | def test_tf(img): 29 | img = tfs.Resize(256)(img) 30 | img, _ = tfs.CenterCrop(224)(img) 31 | normalize = tfs.Compose([ 32 | tfs.ToTensor(), 33 | tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 34 | ]) 35 | img = normalize(img) 36 | return img 37 | 38 | 39 | def get_train_data(): 40 | train_set = ImageFolder(opt.train_data_path, train_tf) 41 | return DataLoader( 42 | train_set, opt.batch_size, True, num_workers=opt.num_workers) 43 | 44 | 45 | def get_test_data(): 46 | test_set = ImageFolder(opt.test_data_path, test_tf) 47 | return DataLoader( 48 | test_set, opt.batch_size, True, num_workers=opt.num_workers) 49 | 50 | 51 | def get_model(): 52 | model = model_zoo.resnet50(pretrained=True) 53 | model.fc = nn.Linear(2048, 2) 54 | if opt.use_gpu: 55 | model = model.cuda(opt.ctx) 56 | return model 57 | 58 | 59 | def get_loss(score, label): 60 | return nn.CrossEntropyLoss()(score, label) 61 | 62 | 63 | def get_optimizer(model): 64 | optimizer = torch.optim.SGD( 65 | model.parameters(), 66 | lr=opt.lr, 67 | momentum=opt.momentum, 68 | weight_decay=opt.weight_decay) 69 | return ScheduledOptim(optimizer) 70 | 71 | 72 | class FineTuneTrainer(Trainer): 73 | def __init__(self): 74 | model = get_model() 75 | criterion = get_loss 76 | optimizer = get_optimizer(model) 77 | super().__init__(model, criterion, optimizer) 78 | 79 | self.metric_meter['loss'] = meter.AverageValueMeter() 80 | self.metric_meter['acc'] = meter.AverageValueMeter() 81 | 82 | def train(self, kwargs): 83 | self.reset_meter() 84 | self.model.train() 85 | train_data = kwargs['train_data'] 86 | for data in tqdm(train_data): 87 | img, label = data 88 | if opt.use_gpu: 89 | img = img.cuda(opt.ctx) 90 | label = label.cuda(opt.ctx) 91 | img = Variable(img) 92 | label = Variable(label) 93 | 94 | # Forward. 95 | score = self.model(img) 96 | loss = self.criterion(score, label) 97 | 98 | # Backward. 99 | self.optimizer.zero_grad() 100 | loss.backward() 101 | self.optimizer.step() 102 | 103 | # Update meters. 104 | acc = (score.max(1)[1] == label).float().mean() 105 | self.metric_meter['loss'].add(loss.data[0]) 106 | self.metric_meter['acc'].add(acc.data[0]) 107 | 108 | # Update to tensorboard. 109 | if (self.n_iter + 1) % opt.plot_freq == 0: 110 | self.writer.add_scalars( 111 | 'loss', {'train': self.metric_meter['loss'].value()[0]}, 112 | self.n_plot) 113 | self.writer.add_scalars( 114 | 'acc', {'train': self.metric_meter['acc'].value()[0]}, 115 | self.n_plot) 116 | self.n_plot += 1 117 | self.n_iter += 1 118 | 119 | # Log the train metric dict to print result. 120 | self.metric_log['train loss'] = self.metric_meter['loss'].value()[0] 121 | self.metric_log['train acc'] = self.metric_meter['acc'].value()[0] 122 | 123 | def test(self, kwargs): 124 | self.reset_meter() 125 | self.model.eval() 126 | test_data = kwargs['test_data'] 127 | for data in tqdm(test_data): 128 | img, label = data 129 | if opt.use_gpu: 130 | img = img.cuda(opt.ctx) 131 | label = label.cuda(opt.ctx) 132 | img = Variable(img, volatile=True) 133 | label = Variable(label, volatile=True) 134 | 135 | score = self.model(img) 136 | loss = self.criterion(score, label) 137 | acc = (score.max(1)[1] == label).float().mean() 138 | 139 | self.metric_meter['loss'].add(loss.data[0]) 140 | self.metric_meter['acc'].add(acc.data[0]) 141 | 142 | # Update to tensorboard. 143 | self.writer.add_scalars('loss', 144 | {'test': self.metric_meter['loss'].value()[0]}, 145 | self.n_plot) 146 | self.writer.add_scalars( 147 | 'acc', {'test': self.metric_meter['acc'].value()[0]}, self.n_plot) 148 | self.n_plot += 1 149 | 150 | # Log the test metric to dict. 151 | self.metric_log['test loss'] = self.metric_meter['loss'].value()[0] 152 | self.metric_log['test acc'] = self.metric_meter['acc'].value()[0] 153 | 154 | def get_best_model(self): 155 | if self.metric_log['test loss'] < self.best_metric: 156 | self.best_model = copy.deepcopy(self.model.state_dict()) 157 | self.best_metric = self.metric_log['test loss'] 158 | 159 | 160 | def train(**kwargs): 161 | opt._parse(kwargs) 162 | 163 | train_data = get_train_data() 164 | test_data = get_test_data() 165 | 166 | fine_tune_trainer = FineTuneTrainer() 167 | fine_tune_trainer.fit(train_data=train_data, test_data=test_data) 168 | 169 | 170 | if __name__ == '__main__': 171 | import fire 172 | 173 | fire.Fire() 174 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/kaggle_dog_vs_cat/README.md: -------------------------------------------------------------------------------- 1 | # kaggle competition 2 | ## dog vs cat 3 | 4 | This is my first competition in Kaggle. 5 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/kaggle_dog_vs_cat/model/dataset.py: -------------------------------------------------------------------------------- 1 | __author__ = 'SherlockLiao' 2 | 3 | import torch 4 | from torch.utils.data import Dataset 5 | import h5py 6 | 7 | 8 | class h5Dataset(Dataset): 9 | 10 | def __init__(self, h5py_list): 11 | label_file = h5py.File(h5py_list[0], 'r') 12 | self.label = torch.from_numpy(label_file['label'].value) 13 | self.nSamples = self.label.size(0) 14 | temp_dataset = torch.FloatTensor() 15 | for file in h5py_list: 16 | h5_file = h5py.File(file, 'r') 17 | dataset = torch.from_numpy(h5_file['data'].value) 18 | temp_dataset = torch.cat((temp_dataset, dataset), 1) 19 | 20 | self.dataset = temp_dataset 21 | 22 | def __len__(self): 23 | return self.nSamples 24 | 25 | def __getitem__(self, index): 26 | assert index < len(self), 'index range error' 27 | data = self.dataset[index] 28 | label = self.label[index] 29 | return (data, label) 30 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/kaggle_dog_vs_cat/model/feature_extraction.py: -------------------------------------------------------------------------------- 1 | __author__ = 'SherlockLiao' 2 | 3 | import os 4 | from tqdm import tqdm 5 | import h5py 6 | import numpy as np 7 | import argparse 8 | 9 | import torch 10 | from torchvision import models, transforms 11 | from torch import optim, nn 12 | from torch.autograd import Variable 13 | from torchvision.datasets import ImageFolder 14 | from torch.utils.data import DataLoader 15 | from net import feature_net, classifier 16 | 17 | parse = argparse.ArgumentParser() 18 | parse.add_argument( 19 | '--model', required=True, help='vgg, inceptionv3, resnet152') 20 | parse.add_argument('--bs', type=int, default=32) 21 | parse.add_argument('--phase', required=True, help='train, val') 22 | opt = parse.parse_args() 23 | print(opt) 24 | 25 | img_transform = transforms.Compose([ 26 | transforms.Scale(320), 27 | transforms.CenterCrop(299), 28 | transforms.ToTensor(), 29 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 30 | ]) 31 | 32 | root = '/media/sherlock/Files/kaggle_dog_vs_cat/data' 33 | data_folder = { 34 | 'train': ImageFolder(os.path.join(root, 'train'), transform=img_transform), 35 | 'val': ImageFolder(os.path.join(root, 'val'), transform=img_transform) 36 | } 37 | 38 | # define dataloader to load images 39 | batch_size = opt.bs 40 | dataloader = { 41 | 'train': 42 | DataLoader( 43 | data_folder['train'], 44 | batch_size=batch_size, 45 | shuffle=False, 46 | num_workers=4), 47 | 'val': 48 | DataLoader( 49 | data_folder['val'], 50 | batch_size=batch_size, 51 | shuffle=False, 52 | num_workers=4) 53 | } 54 | 55 | # get train data size and validation data size 56 | data_size = { 57 | 'train': len(dataloader['train'].dataset), 58 | 'val': len(dataloader['val'].dataset) 59 | } 60 | 61 | # get numbers of classes 62 | img_classes = len(dataloader['train'].dataset.classes) 63 | 64 | # test if using GPU 65 | use_gpu = torch.cuda.is_available() 66 | 67 | 68 | def CreateFeature(model, phase, outputPath='.'): 69 | """ 70 | Create h5py dataset for feature extraction. 71 | 72 | ARGS: 73 | outputPath : h5py output path 74 | model : used model 75 | labelList : list of corresponding groundtruth texts 76 | """ 77 | featurenet = feature_net(model) 78 | if use_gpu: 79 | featurenet.cuda() 80 | feature_map = torch.FloatTensor() 81 | label_map = torch.LongTensor() 82 | for data in tqdm(dataloader[phase]): 83 | img, label = data 84 | if use_gpu: 85 | img = Variable(img, volatile=True).cuda() 86 | else: 87 | img = Variable(img, volatile=True) 88 | out = featurenet(img) 89 | feature_map = torch.cat((feature_map, out.cpu().data), 0) 90 | label_map = torch.cat((label_map, label), 0) 91 | feature_map = feature_map.numpy() 92 | label_map = label_map.numpy() 93 | file_name = '_feature_{}.hd5f'.format(model) 94 | h5_path = os.path.join(outputPath, phase) + file_name 95 | with h5py.File(h5_path, 'w') as h: 96 | h.create_dataset('data', data=feature_map) 97 | h.create_dataset('label', data=label_map) 98 | 99 | 100 | CreateFeature(opt.model, opt.phase) 101 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/kaggle_dog_vs_cat/model/feature_train.py: -------------------------------------------------------------------------------- 1 | __author__ = 'SherlockLiao' 2 | 3 | import argparse 4 | import time 5 | import os 6 | 7 | import torch 8 | from torch import nn 9 | from torch.autograd import Variable 10 | from torch import optim 11 | from torch.utils.data import DataLoader 12 | 13 | from dataset import h5Dataset 14 | from net import classifier 15 | 16 | parse = argparse.ArgumentParser() 17 | parse.add_argument( 18 | '--model', 19 | nargs='+', 20 | help='inceptionv3, vgg, resnet152', 21 | default=['vgg', 'inceptionv3', 'resnet152']) 22 | parse.add_argument('--batch_size', type=int, default=64) 23 | parse.add_argument('--epoch', type=int, default=20) 24 | parse.add_argument('--n_classes', default=2, type=int) 25 | parse.add_argument('--num_workers', type=int, default=8) 26 | opt = parse.parse_args() 27 | print(opt) 28 | 29 | root = '/media/sherlock/Files/kaggle_dog_vs_cat/' 30 | train_list = ['train_feature_{}.hd5f'.format(i) for i in opt.model] 31 | val_list = ['val_feature_{}.hd5f'.format(i) for i in opt.model] 32 | 33 | dataset = {'train': h5Dataset(train_list), 'val': h5Dataset(val_list)} 34 | 35 | datasize = { 36 | 'train': dataset['train'].dataset.size(0), 37 | 'val': dataset['val'].dataset.size(0) 38 | } 39 | 40 | batch_size = opt.batch_size 41 | epoches = opt.epoch 42 | 43 | dataloader = { 44 | 'train': 45 | DataLoader( 46 | dataset['train'], 47 | batch_size=batch_size, 48 | shuffle=True, 49 | num_workers=opt.num_workers), 50 | 'val': 51 | DataLoader( 52 | dataset['val'], 53 | batch_size=batch_size, 54 | shuffle=False, 55 | num_workers=opt.num_workers) 56 | } 57 | 58 | dimension = dataset['train'].dataset.size(1) 59 | 60 | mynet = classifier(dimension, opt.n_classes) 61 | mynet.cuda() 62 | 63 | criterion = nn.CrossEntropyLoss() 64 | optimizer = optim.SGD(mynet.parameters(), lr=1e-3) 65 | # train 66 | for epoch in range(epoches): 67 | print('{}'.format(epoch + 1)) 68 | print('*' * 10) 69 | print('Train') 70 | mynet.train() 71 | since = time.time() 72 | 73 | running_loss = 0.0 74 | running_acc = 0.0 75 | for i, data in enumerate(dataloader['train'], 1): 76 | feature, label = data 77 | feature = Variable(feature).cuda() 78 | label = Variable(label).cuda() 79 | 80 | # forward 81 | out = mynet(feature) 82 | loss = criterion(out, label) 83 | # backward 84 | optimizer.zero_grad() 85 | loss.backward() 86 | optimizer.step() 87 | 88 | running_loss += loss.data[0] * label.size(0) 89 | _, pred = torch.max(out, 1) 90 | num_correct = torch.sum(pred == label) 91 | running_acc += num_correct.data[0] 92 | if i % 50 == 0: 93 | print('Loss: {:.6f}, Acc: {:.6f}'.format(running_loss / ( 94 | i * batch_size), running_acc / (i * batch_size))) 95 | 96 | running_loss /= datasize['train'] 97 | running_acc /= datasize['train'] 98 | eplise_time = time.time() - since 99 | print('Loss: {:.6f}, Acc: {:.6f}, Time: {:.0f}s'.format( 100 | running_loss, running_acc, eplise_time)) 101 | print('Validation') 102 | mynet.eval() 103 | num_correct = 0.0 104 | eval_loss = 0.0 105 | for data in dataloader['val']: 106 | feature, label = data 107 | feature = Variable(feature, volatile=True).cuda() 108 | label = Variable(label, volatile=True).cuda() 109 | # forward 110 | out = mynet(feature) 111 | loss = criterion(out, label) 112 | 113 | _, pred = torch.max(out, 1) 114 | correct = torch.sum(pred == label) 115 | num_correct += correct.data[0] 116 | eval_loss += loss.data[0] * label.size(0) 117 | 118 | print('Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / datasize['val'], 119 | num_correct / datasize['val'])) 120 | print('Finish Training!') 121 | 122 | save_path = os.path.join(root, 'model_save') 123 | if not os.path.exists(save_path): 124 | os.mkdir(save_path) 125 | 126 | torch.save(mynet.state_dict(), save_path + '/feature_model.pth') 127 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/kaggle_dog_vs_cat/model/fix_train.py: -------------------------------------------------------------------------------- 1 | __author__ = 'SherlockLiao' 2 | 3 | import os 4 | import time 5 | 6 | import torch 7 | from torchvision import models, transforms 8 | from torch import optim, nn 9 | from torch.autograd import Variable 10 | from torchvision.datasets import ImageFolder 11 | from torch.utils.data import DataLoader 12 | 13 | # define image transforms to do data augumentation 14 | data_transforms = { 15 | 'train': 16 | transforms.Compose([ 17 | transforms.RandomSizedCrop(299), 18 | transforms.RandomHorizontalFlip(), 19 | transforms.ToTensor(), 20 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 21 | ]), 22 | 'val': 23 | transforms.Compose([ 24 | transforms.Scale(320), 25 | transforms.CenterCrop(299), 26 | transforms.ToTensor(), 27 | transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 28 | ]) 29 | } 30 | 31 | # define data folder using ImageFolder to get images and classes from folder 32 | root = '/media/sherlock/Files/kaggle_dog_vs_cat/' 33 | data_folder = { 34 | 'train': 35 | ImageFolder( 36 | os.path.join(root, 'data/train'), transform=data_transforms['train']), 37 | 'val': 38 | ImageFolder( 39 | os.path.join(root, 'data/val'), transform=data_transforms['val']) 40 | } 41 | 42 | # define dataloader to load images 43 | batch_size = 32 44 | dataloader = { 45 | 'train': 46 | DataLoader( 47 | data_folder['train'], 48 | batch_size=batch_size, 49 | shuffle=True, 50 | num_workers=4), 51 | 'val': 52 | DataLoader(data_folder['val'], batch_size=batch_size, num_workers=4) 53 | } 54 | 55 | # get train data size and validation data size 56 | data_size = { 57 | 'train': len(dataloader['train'].dataset), 58 | 'val': len(dataloader['val'].dataset) 59 | } 60 | 61 | # get numbers of classes 62 | img_classes = len(dataloader['train'].dataset.classes) 63 | 64 | # test if using GPU 65 | use_gpu = torch.cuda.is_available() 66 | fix_param = True 67 | # define model 68 | transfer_model = models.resnet18(pretrained=True) 69 | if fix_param: 70 | for param in transfer_model.parameters(): 71 | param.requires_grad = False 72 | dim_in = transfer_model.fc.in_features 73 | transfer_model.fc = nn.Linear(dim_in, 2) 74 | if use_gpu: 75 | transfer_model = transfer_model.cuda() 76 | 77 | # define optimize function and loss function 78 | if fix_param: 79 | optimizer = optim.Adam(transfer_model.fc.parameters(), lr=1e-3) 80 | else: 81 | optimizer = optim.Adam(transfer_model.parameters(), lr=1e-3) 82 | criterion = nn.CrossEntropyLoss() 83 | 84 | # train 85 | num_epoch = 10 86 | 87 | for epoch in range(num_epoch): 88 | print('{}/{}'.format(epoch + 1, num_epoch)) 89 | print('*' * 10) 90 | print('Train') 91 | transfer_model.train() 92 | running_loss = 0.0 93 | running_acc = 0.0 94 | since = time.time() 95 | for i, data in enumerate(dataloader['train'], 1): 96 | img, label = data 97 | if use_gpu: 98 | img = img.cuda() 99 | label = label.cuda() 100 | img = Variable(img) 101 | label = Variable(label) 102 | 103 | # forward 104 | out = transfer_model(img) 105 | loss = criterion(out, label) 106 | _, pred = torch.max(out, 1) 107 | 108 | # backward 109 | optimizer.zero_grad() 110 | loss.backward() 111 | optimizer.step() 112 | 113 | running_loss += loss.data[0] * label.size(0) 114 | num_correct = torch.sum(pred == label) 115 | running_acc += num_correct.data[0] 116 | if i % 100 == 0: 117 | print('Loss: {:.6f}, Acc: {:.4f}'.format(running_loss / ( 118 | i * batch_size), running_acc / (i * batch_size))) 119 | running_loss /= data_size['train'] 120 | running_acc /= data_size['train'] 121 | elips_time = time.time() - since 122 | print('Loss: {:.6f}, Acc: {:.4f}, Time: {:.0f}s'.format( 123 | running_loss, running_acc, elips_time)) 124 | print('Validation') 125 | transfer_model.eval() 126 | num_correct = 0.0 127 | total = 0.0 128 | eval_loss = 0.0 129 | for data in dataloader['val']: 130 | img, label = data 131 | img = Variable(img, volatile=True).cuda() 132 | label = Variable(label, volatile=True).cuda() 133 | out = transfer_model(img) 134 | _, pred = torch.max(out.data, 1) 135 | loss = criterion(out, label) 136 | eval_loss += loss.data[0] * label.size(0) 137 | num_correct += (pred.cpu() == label.data.cpu()).sum() 138 | total += label.size(0) 139 | print('Loss: {:.6f} Acc: {:.4f}'.format(eval_loss / total, num_correct / 140 | total)) 141 | print() 142 | print('Finish Training!') 143 | print() 144 | save_path = os.path.join(root, 'model_save') 145 | if not os.path.exists(save_path): 146 | os.mkdir(save_path) 147 | torch.save(transfer_model.state_dict(), save_path + '/resnet18.pth') 148 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/kaggle_dog_vs_cat/model/net.py: -------------------------------------------------------------------------------- 1 | __author__ = 'SherlockLiao' 2 | 3 | import torch 4 | from torchvision import models 5 | from torch import nn 6 | 7 | 8 | class feature_net(nn.Module): 9 | def __init__(self, model): 10 | super(feature_net, self).__init__() 11 | 12 | if model == 'vgg': 13 | vgg = models.vgg19(pretrained=True) 14 | self.feature = nn.Sequential(*list(vgg.children())[:-1]) 15 | self.feature.add_module('global average', nn.AvgPool2d(9)) 16 | elif model == 'inceptionv3': 17 | inception = models.inception_v3(pretrained=True) 18 | self.feature = nn.Sequential(*list(inception.children())[:-1]) 19 | self.feature._modules.pop('13') 20 | self.feature.add_module('global average', nn.AvgPool2d(35)) 21 | elif model == 'resnet152': 22 | resnet = models.resnet152(pretrained=True) 23 | self.feature = nn.Sequential(*list(resnet.children())[:-1]) 24 | 25 | def forward(self, x): 26 | """ 27 | model includes vgg19, inceptionv3, resnet152 28 | """ 29 | x = self.feature(x) 30 | x = x.view(x.size(0), -1) 31 | return x 32 | 33 | 34 | class classifier(nn.Module): 35 | def __init__(self, dim, n_classes): 36 | super(classifier, self).__init__() 37 | self.fc = nn.Sequential( 38 | nn.Linear(dim, 1000), 39 | nn.ReLU(True), 40 | nn.Dropout(0.5), 41 | nn.Linear(1000, n_classes) 42 | ) 43 | 44 | def forward(self, x): 45 | x = self.fc(x) 46 | return x 47 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/README.md: -------------------------------------------------------------------------------- 1 | # neural-transfer 2 | This is my implement of neural-transfer according to http://pytorch.org/tutorials/advanced/neural_style_tutorial.html#sphx-glr-advanced-neural-style-tutorial-py 3 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/build_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models as models 4 | 5 | import loss 6 | 7 | vgg = models.vgg19(pretrained=True).features 8 | if torch.cuda.is_available(): 9 | vgg = vgg.cuda() 10 | 11 | content_layers_default = ['conv_4'] 12 | style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5'] 13 | 14 | 15 | def get_style_model_and_loss(style_img, 16 | content_img, 17 | cnn=vgg, 18 | style_weight=1000, 19 | content_weight=1, 20 | content_layers=content_layers_default, 21 | style_layers=style_layers_default): 22 | 23 | content_loss_list = [] 24 | style_loss_list = [] 25 | 26 | model = nn.Sequential() 27 | if torch.cuda.is_available(): 28 | model = model.cuda() 29 | gram = loss.Gram() 30 | if torch.cuda.is_available(): 31 | gram = gram.cuda() 32 | 33 | i = 1 34 | for layer in cnn: 35 | if isinstance(layer, nn.Conv2d): 36 | name = 'conv_' + str(i) 37 | model.add_module(name, layer) 38 | 39 | if name in content_layers_default: 40 | target = model(content_img) 41 | content_loss = loss.Content_Loss(target, content_weight) 42 | model.add_module('content_loss_' + str(i), content_loss) 43 | content_loss_list.append(content_loss) 44 | 45 | if name in style_layers_default: 46 | target = model(style_img) 47 | target = gram(target) 48 | style_loss = loss.Style_Loss(target, style_weight) 49 | model.add_module('style_loss_' + str(i), style_loss) 50 | style_loss_list.append(style_loss) 51 | 52 | i += 1 53 | if isinstance(layer, nn.MaxPool2d): 54 | name = 'pool_' + str(i) 55 | model.add_module(name, layer) 56 | 57 | if isinstance(layer, nn.ReLU): 58 | name = 'relu' + str(i) 59 | model.add_module(name, layer) 60 | 61 | return model, style_loss_list, content_loss_list 62 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/load_img.py: -------------------------------------------------------------------------------- 1 | import PIL.Image as Image 2 | import torchvision.transforms as transforms 3 | 4 | img_size = 512 5 | 6 | 7 | def load_img(img_path): 8 | img = Image.open(img_path).convert('RGB') 9 | img = img.resize((img_size, img_size)) 10 | img = transforms.ToTensor()(img) 11 | img = img.unsqueeze(0) 12 | return img 13 | 14 | 15 | def show_img(img): 16 | img = img.squeeze(0) 17 | img = transforms.ToPILImage()(img) 18 | img.show() 19 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | 5 | class Content_Loss(nn.Module): 6 | def __init__(self, target, weight): 7 | super(Content_Loss, self).__init__() 8 | self.weight = weight 9 | self.target = target.detach() * self.weight 10 | # 必须要用detach来分离出target,这时候target不再是一个Variable,这是为了动态计算梯度,否则forward会出错,不能向前传播 11 | self.criterion = nn.MSELoss() 12 | 13 | def forward(self, input): 14 | self.loss = self.criterion(input * self.weight, self.target) 15 | out = input.clone() 16 | return out 17 | 18 | def backward(self, retain_variabels=True): 19 | self.loss.backward(retain_variables=retain_variabels) 20 | return self.loss 21 | 22 | 23 | class Gram(nn.Module): 24 | def __init__(self): 25 | super(Gram, self).__init__() 26 | 27 | def forward(self, input): 28 | a, b, c, d = input.size() 29 | feature = input.view(a * b, c * d) 30 | gram = torch.mm(feature, feature.t()) 31 | gram /= (a * b * c * d) 32 | return gram 33 | 34 | 35 | class Style_Loss(nn.Module): 36 | def __init__(self, target, weight): 37 | super(Style_Loss, self).__init__() 38 | self.weight = weight 39 | self.target = target.detach() * self.weight 40 | self.gram = Gram() 41 | self.criterion = nn.MSELoss() 42 | 43 | def forward(self, input): 44 | G = self.gram(input) * self.weight 45 | self.loss = self.criterion(G, self.target) 46 | out = input.clone() 47 | return out 48 | 49 | def backward(self, retain_variabels=True): 50 | self.loss.backward(retain_variables=retain_variabels) 51 | return self.loss 52 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/picture/content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/neural-transfer/picture/content.png -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/picture/saved_picture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/neural-transfer/picture/saved_picture.png -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/picture/style.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/neural-transfer/picture/style.png -------------------------------------------------------------------------------- /chapter9_Computer-Vision/neural-transfer/run_code.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.optim as optim 3 | 4 | from build_model import get_style_model_and_loss 5 | 6 | 7 | def get_input_param_optimier(input_img): 8 | """ 9 | input_img is a Variable 10 | """ 11 | input_param = nn.Parameter(input_img.data) 12 | optimizer = optim.LBFGS([input_param]) 13 | return input_param, optimizer 14 | 15 | 16 | def run_style_transfer(content_img, style_img, input_img, num_epoches=300): 17 | print('Building the style transfer model..') 18 | model, style_loss_list, content_loss_list = get_style_model_and_loss( 19 | style_img, content_img) 20 | input_param, optimizer = get_input_param_optimier(input_img) 21 | 22 | print('Opimizing...') 23 | epoch = [0] 24 | while epoch[0] < num_epoches: 25 | 26 | def closure(): 27 | input_param.data.clamp_(0, 1) 28 | 29 | model(input_param) 30 | style_score = 0 31 | content_score = 0 32 | 33 | optimizer.zero_grad() 34 | for sl in style_loss_list: 35 | style_score += sl.backward() 36 | for cl in content_loss_list: 37 | content_score += cl.backward() 38 | 39 | epoch[0] += 1 40 | if epoch[0] % 50 == 0: 41 | print('run {}'.format(epoch)) 42 | print('Style Loss: {:.4f} Content Loss: {:.4f}'.format( 43 | style_score.data[0], content_score.data[0])) 44 | print() 45 | 46 | return style_score + content_score 47 | 48 | optimizer.step(closure) 49 | 50 | input_param.data.clamp_(0, 1) 51 | 52 | return input_param.data 53 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/README.md: -------------------------------------------------------------------------------- 1 | ## 语意分割教程 2 | 3 | ### Requirements 4 | 5 | [PyTorch 0.3](http://pytorch.org/) 6 | 7 | [MxTorch](https://github.com/SherlockLiao/mxtorch) 8 | 9 | [tensorboardX](https://github.com/lanpa/tensorboard-pytorch) 10 | 11 | 按照 pytorch 官网安装 pytorch,将 mxtorch 下载下来,放到根目录,安装 tensorboardX 实现 tensorboard 可视化 12 | 13 | ```bash 14 | \segmentation 15 | \mxtorch 16 | \data 17 | \models 18 | \dataset 19 | \checkpoints 20 | config.py 21 | main.py 22 | ``` 23 | 24 | 25 | 26 | ### 下载数据 27 | 28 | 打开终端,运行 bash 脚本来获取数据 29 | 30 | ```bash 31 | bash get_data.sh 32 | ``` 33 | 34 | 35 | 36 | ### 训练模型 37 | 38 | 所有的配置文件都放在 config.py 里面,通过下面的代码来训练模型 39 | 40 | ```bash 41 | python main.py train 42 | ``` 43 | 44 | 也可以在终端修改配置,比如改变 epochs 和 batch_size 45 | 46 | ```bash 47 | python main.py train \ 48 | --max_epochs=100 \ 49 | --batch_size=16 50 | ``` 51 | 52 | 53 | 54 | ### 训练效果 55 | 56 | #### 准确率,iou 和 loss 57 | 58 | ![](https://ws3.sinaimg.cn/large/006tNc79gy1fojg2ye52uj30td07sgm6.jpg) 59 | 60 | #### 分割效果 61 | 62 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fojg42xvvaj30us0haq4o.jpg) 63 | 64 | 65 | 66 | ![](https://ws3.sinaimg.cn/large/006tNc79gy1fojiid8vpbj30hk0fvq3l.jpg) -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import warnings 7 | from pprint import pprint 8 | 9 | 10 | class DefaultConfig(object): 11 | model = 'FcnResNet' 12 | 13 | # Dataset. 14 | voc_root = './dataset/VOCdevkit/VOC2012/' 15 | crop_size = (320, 480) 16 | num_classes = 21 17 | 18 | # Store result and save models. 19 | result_file = 'result.txt' 20 | save_file = './checkpoints/' 21 | save_freq = 20 # save model every N epochs 22 | save_best = True # If save best test metric model. 23 | 24 | # Visualization parameters. 25 | vis_dir = './vis/' 26 | plot_freq = 30 # plot in tensorboard every N iterations 27 | 28 | # Model hyperparameters. 29 | use_gpu = True # use GPU or not 30 | ctx = 0 # running on which cuda device 31 | batch_size = 32 # batch size 32 | num_workers = 4 # how many workers for loading data 33 | max_epoch = 80 34 | lr = 1e-2 # initial learning rate 35 | lr_decay = 0.1 36 | lr_decay_freq = 50 37 | weight_decay = 1e-4 38 | 39 | def _parse(self, kwargs): 40 | for k, v in kwargs.items(): 41 | if not hasattr(self, k): 42 | warnings.warn("Warning: opt has not attribute %s" % k) 43 | setattr(self, k, v) 44 | 45 | print('=========user config==========') 46 | pprint(self._state_dict()) 47 | print('============end===============') 48 | 49 | def _state_dict(self): 50 | return { 51 | k: getattr(self, k) 52 | for k, _ in DefaultConfig.__dict__.items() if not k.startswith('_') 53 | } 54 | 55 | 56 | opt = DefaultConfig() 57 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/data/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | from .voc import VocSegDataset, img_transforms, COLORMAP, CLASSES, inverse_normalization 7 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/data/voc.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import os 7 | 8 | import numpy as np 9 | import torch 10 | from PIL import Image 11 | from mxtorch import transforms as tfs 12 | 13 | 14 | def read_images(root, train): 15 | txt_fname = os.path.join(root, 'ImageSets/Segmentation/') + ('train.txt' if train else 'val.txt') 16 | with open(txt_fname, 'r') as f: 17 | images = f.read().split() 18 | data = [os.path.join(root, 'JPEGImages', i + '.jpg') for i in images] 19 | label = [os.path.join(root, 'SegmentationClass', i + '.png') for i in images] 20 | return data, label 21 | 22 | 23 | def random_crop(data, label, crop_size): 24 | height, width = crop_size 25 | data, rect = tfs.RandomCrop((height, width))(data) 26 | label = tfs.FixedCrop(*rect)(label) 27 | return data, label 28 | 29 | 30 | def image2label(img): 31 | cm2lbl = np.zeros(256 ** 3) 32 | for i, cm in enumerate(COLORMAP): 33 | cm2lbl[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i 34 | 35 | data = np.array(img, dtype=np.int32) 36 | idx = (data[:, :, 0] * 256 + data[:, :, 1] * 256 + data[:, :, 2]) 37 | return np.array(cm2lbl[idx], dtype=np.int64) 38 | 39 | 40 | def img_transforms(img, label, crop_size): 41 | img, label = random_crop(img, label, crop_size) 42 | img_tfs = tfs.Compose([ 43 | tfs.ToTensor(), 44 | tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 45 | ]) 46 | 47 | img = img_tfs(img) 48 | label = image2label(label) 49 | label = torch.from_numpy(label) 50 | return img, label 51 | 52 | 53 | def inverse_normalization(img): 54 | """Convert normalized image to origin image. 55 | 56 | :param img:(~torch.FloatTensor) normalized image, (C, H, W) 57 | :return: 58 | Origin image. 59 | """ 60 | img = img * torch.FloatTensor([0.229, 0.224, 0.225])[:, None, None] \ 61 | + torch.FloatTensor([0.485, 0.456, 0.406])[:, None, None] 62 | origin_img = torch.clamp(img, min=0, max=1) * 255 63 | origin_img = origin_img.permute(1, 2, 0).numpy() 64 | return origin_img.astype(np.uint8) 65 | 66 | 67 | class VocSegDataset(object): 68 | def __init__(self, voc_root, train, crop_size, transforms): 69 | self.crop_size = crop_size 70 | self.transforms = transforms 71 | data_list, label_list = read_images(voc_root, train) 72 | self.data_list = self._filter(data_list) 73 | self.label_list = self._filter(label_list) 74 | 75 | def _filter(self, images): 76 | return [img for img in images if (Image.open(img).size[1] >= self.crop_size[0] and 77 | Image.open(img).size[0] >= self.crop_size[1])] 78 | 79 | def __getitem__(self, item): 80 | img = self.data_list[item] 81 | label = self.label_list[item] 82 | img = Image.open(img) 83 | label = Image.open(label).convert('RGB') 84 | img, label = self.transforms(img, label, self.crop_size) 85 | return img, label 86 | 87 | def __len__(self): 88 | return len(self.data_list) 89 | 90 | 91 | CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 92 | 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 93 | 'dog', 'horse', 'motorbike', 'person', 'potted plant', 94 | 'sheep', 'sofa', 'train', 'tv/monitor'] 95 | 96 | # RGB color for each class. 97 | COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], 98 | [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0], 99 | [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128], 100 | [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0], 101 | [0, 192, 0], [128, 192, 0], [0, 64, 128]] 102 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/get_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 4 | 5 | if [ ! -e ./dataset ]; then 6 | mkdir ./dataset 7 | fi 8 | 9 | tar -xf VOCtrainval_11-May-2012.tar -C ./dataset 10 | rm VOCtrainval_11-May-2012.tar 11 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/main.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | import warnings 7 | from copy import deepcopy 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn.functional as F 12 | from mxtorch import meter 13 | from mxtorch.trainer import Trainer, ScheduledOptim 14 | from mxtorch.vision.eval_tools import eval_semantic_segmentation 15 | from torch.autograd import Variable 16 | from torch.utils.data import DataLoader 17 | from tqdm import tqdm 18 | 19 | import models 20 | from config import opt 21 | from data import VocSegDataset, img_transforms, COLORMAP, inverse_normalization 22 | 23 | warnings.filterwarnings('ignore') 24 | 25 | cm = np.array(COLORMAP, dtype=np.uint8) 26 | 27 | 28 | def get_data(is_train): 29 | voc_data = VocSegDataset(opt.voc_root, is_train, opt.crop_size, 30 | img_transforms) 31 | return DataLoader( 32 | voc_data, opt.batch_size, True, num_workers=opt.num_workers) 33 | 34 | 35 | def get_model(num_classes): 36 | model = getattr(models, opt.model)(num_classes) 37 | if opt.use_gpu: 38 | model.cuda() 39 | return model 40 | 41 | 42 | def get_optimizer(model): 43 | optimizer = torch.optim.SGD( 44 | model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) 45 | return ScheduledOptim(optimizer) 46 | 47 | 48 | def get_loss(scores, labels): 49 | scores = F.log_softmax(scores, dim=1) 50 | return torch.nn.NLLLoss2d()(scores, labels) 51 | 52 | 53 | all_metrcis = ['loss', 'acc', 'iou'] 54 | 55 | 56 | class FcnTrainer(Trainer): 57 | def __init__(self): 58 | model = get_model(opt.num_classes) 59 | criterion = get_loss 60 | optimizer = get_optimizer(model) 61 | 62 | super().__init__(model=model, criterion=criterion, optimizer=optimizer) 63 | 64 | self.config += ('Crop size: ' + str(opt.crop_size) + '\n') 65 | self.best_metric = 0 66 | for m in all_metrcis: 67 | self.metric_meter[m] = meter.AverageValueMeter() 68 | 69 | def train(self, kwargs): 70 | self.reset_meter() 71 | self.model.train() 72 | train_data = kwargs['train_data'] 73 | for data in tqdm(train_data): 74 | imgs, labels = data 75 | if opt.use_gpu: 76 | imgs = imgs.cuda() 77 | labels = labels.cuda() 78 | imgs = Variable(imgs) 79 | labels = Variable(labels) 80 | 81 | # Forward. 82 | scores = self.model(imgs) 83 | loss = self.criterion(scores, labels) 84 | 85 | # Backward. 86 | self.optimizer.zero_grad() 87 | loss.backward() 88 | self.optimizer.step() 89 | 90 | # Update to metrics. 91 | pred_labels = scores.max(dim=1)[1].data.cpu().numpy() 92 | pred_labels = [i for i in pred_labels] 93 | 94 | true_labels = labels.data.cpu().numpy() 95 | true_labels = [i for i in true_labels] 96 | 97 | eval_metrics = eval_semantic_segmentation(pred_labels, true_labels) 98 | self.metric_meter['loss'].add(loss.data[0]) 99 | self.metric_meter['acc'].add(eval_metrics['mean_class_accuracy']) 100 | self.metric_meter['iou'].add(eval_metrics['miou']) 101 | 102 | if (self.n_iter + 1) % opt.plot_freq == 0: 103 | # Plot metrics curve in tensorboard. 104 | self.writer.add_scalars( 105 | 'loss', {'train': self.metric_meter['loss'].value()[0]}, 106 | self.n_plot) 107 | self.writer.add_scalars( 108 | 'acc', {'train': self.metric_meter['acc'].value()[0]}, 109 | self.n_plot) 110 | self.writer.add_scalars( 111 | 'iou', {'train': self.metric_meter['iou'].value()[0]}, 112 | self.n_plot) 113 | 114 | # Show segmentation images. 115 | # Get prediction segmentation and ground truth segmentation. 116 | origin_image = inverse_normalization(imgs[0].cpu().data) 117 | pred_seg = cm[pred_labels[0]] 118 | gt_seg = cm[true_labels[0]] 119 | 120 | self.writer.add_image('train ori_img', origin_image, 121 | self.n_plot) 122 | self.writer.add_image('train gt', gt_seg, self.n_plot) 123 | self.writer.add_image('train pred', pred_seg, self.n_plot) 124 | self.n_plot += 1 125 | 126 | self.n_iter += 1 127 | 128 | self.metric_log['Train Loss'] = self.metric_meter['loss'].value()[0] 129 | self.metric_log['Train Mean Class Accuracy'] = self.metric_meter[ 130 | 'acc'].value()[0] 131 | self.metric_log['Train Mean IoU'] = self.metric_meter['iou'].value()[0] 132 | 133 | def test(self, kwargs): 134 | self.reset_meter() 135 | self.model.eval() 136 | test_data = kwargs['test_data'] 137 | for data in tqdm(test_data): 138 | imgs, labels = data 139 | if opt.use_gpu: 140 | imgs = imgs.cuda() 141 | labels = labels.cuda() 142 | imgs = Variable(imgs, volatile=True) 143 | labels = Variable(labels, volatile=True) 144 | 145 | # Forward. 146 | scores = self.model(imgs) 147 | loss = self.criterion(scores, labels) 148 | 149 | # Update to metrics. 150 | pred_labels = scores.max(dim=1)[1].data.cpu().numpy() 151 | pred_labels = [i for i in pred_labels] 152 | 153 | true_labels = labels.data.cpu().numpy() 154 | true_labels = [i for i in true_labels] 155 | 156 | eval_metrics = eval_semantic_segmentation(pred_labels, true_labels) 157 | self.metric_meter['loss'].add(loss.data[0]) 158 | self.metric_meter['acc'].add(eval_metrics['mean_class_accuracy']) 159 | self.metric_meter['iou'].add(eval_metrics['miou']) 160 | 161 | # Plot metrics curve in tensorboard. 162 | self.writer.add_scalars('loss', 163 | {'test': self.metric_meter['loss'].value()[0]}, 164 | self.n_plot) 165 | self.writer.add_scalars( 166 | 'acc', {'test': self.metric_meter['acc'].value()[0]}, self.n_plot) 167 | self.writer.add_scalars( 168 | 'iou', {'test': self.metric_meter['iou'].value()[0]}, self.n_plot) 169 | 170 | origin_img = inverse_normalization(imgs[0].cpu().data) 171 | pred_seg = cm[pred_labels[0]] 172 | gt_seg = cm[true_labels[0]] 173 | self.writer.add_image('test ori_img', origin_img, self.n_plot) 174 | self.writer.add_image('test gt', gt_seg, self.n_plot) 175 | self.writer.add_image('test pred', pred_seg, self.n_plot) 176 | 177 | self.n_plot += 1 178 | 179 | self.metric_log['Test Loss'] = self.metric_meter['loss'].value()[0] 180 | self.metric_log['Test Mean Class Accuracy'] = self.metric_meter[ 181 | 'acc'].value()[0] 182 | self.metric_log['Test Mean IoU'] = self.metric_meter['iou'].value()[0] 183 | 184 | def get_best_model(self): 185 | if self.metric_log['Test Mean IoU'] > self.best_metric: 186 | self.best_model = deepcopy(self.model.state_dict()) 187 | self.best_metric = self.metric_log['Test Mean IoU'] 188 | 189 | 190 | def train(**kwargs): 191 | opt._parse(kwargs) 192 | 193 | # Set default cuda device. 194 | torch.cuda.set_device(opt.ctx) 195 | 196 | fcn_trainer = FcnTrainer() 197 | train_data = get_data(is_train=True) 198 | test_data = get_data(is_train=False) 199 | fcn_trainer.fit( 200 | train_data=train_data, test_data=test_data, epochs=opt.max_epoch) 201 | 202 | 203 | if __name__ == '__main__': 204 | import fire 205 | 206 | fire.Fire() 207 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/models/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | """ 6 | 7 | from .fcn import FcnResNet 8 | -------------------------------------------------------------------------------- /chapter9_Computer-Vision/segmentation/models/fcn.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: xyliao 4 | @contact: xyliao1993@qq.com 5 | 6 | This file contains three FCN models according to paper "Fully Convolutional Networks for Semantic Segmentation" 7 | """ 8 | 9 | import numpy as np 10 | import torch 11 | from mxtorch.vision import model_zoo 12 | from torch import nn 13 | 14 | 15 | def bilinear_kernel(in_channels, out_channels, kernel_size): 16 | """Define a bilinear kernel according to in channels and out channels. 17 | 18 | Returns: 19 | return a bilinear filter tensor 20 | """ 21 | factor = (kernel_size + 1) // 2 22 | if kernel_size % 2 == 1: 23 | center = factor - 1 24 | else: 25 | center = factor - 0.5 26 | og = np.ogrid[:kernel_size, :kernel_size] 27 | bilinear_filter = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) 28 | weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32) 29 | weight[range(in_channels), range(out_channels), :, :] = bilinear_filter 30 | return torch.from_numpy(weight) 31 | 32 | 33 | pretrained_net = model_zoo.resnet34(pretrained=True) 34 | 35 | 36 | class FcnResNet(nn.Module): 37 | def __init__(self, num_classes): 38 | super().__init__() 39 | 40 | self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4]) 41 | self.stage2 = list(pretrained_net.children())[-4] 42 | self.stage3 = list(pretrained_net.children())[-3] 43 | 44 | self.scores1 = nn.Conv2d(512, num_classes, 1) 45 | self.scores2 = nn.Conv2d(256, num_classes, 1) 46 | self.scores3 = nn.Conv2d(128, num_classes, 1) 47 | 48 | self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False) 49 | self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16) 50 | 51 | self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False) 52 | self.upsample_4x.weight.data = bilinear_kernel(num_classes, num_classes, 4) 53 | 54 | self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False) 55 | self.upsample_2x.weight.data = bilinear_kernel(num_classes, num_classes, 4) 56 | 57 | def forward(self, x): 58 | x = self.stage1(x) 59 | s1 = x 60 | 61 | x = self.stage2(x) 62 | s2 = x 63 | 64 | x = self.stage3(x) 65 | s3 = x 66 | 67 | s3 = self.scores1(s3) 68 | s3 = self.upsample_2x(s3) 69 | s2 = self.scores2(s2) 70 | s2 = s2 + s3 71 | 72 | s1 = self.scores3(s1) 73 | s2 = self.upsample_4x(s2) 74 | s = s1 + s2 75 | 76 | s = self.upsample_8x(s) 77 | return s 78 | 79 | 80 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | 9 | def get_acc(output, label): 10 | total = output.shape[0] 11 | _, pred_label = output.max(1) 12 | num_correct = (pred_label == label).sum().data[0] 13 | return num_correct / total 14 | 15 | 16 | def train(net, train_data, valid_data, num_epochs, optimizer, criterion): 17 | if torch.cuda.is_available(): 18 | net = net.cuda() 19 | prev_time = datetime.now() 20 | for epoch in range(num_epochs): 21 | train_loss = 0 22 | train_acc = 0 23 | net = net.train() 24 | for im, label in train_data: 25 | if torch.cuda.is_available(): 26 | im = Variable(im.cuda()) # (bs, 3, h, w) 27 | label = Variable(label.cuda()) # (bs, h, w) 28 | else: 29 | im = Variable(im) 30 | label = Variable(label) 31 | # forward 32 | output = net(im) 33 | loss = criterion(output, label) 34 | # backward 35 | optimizer.zero_grad() 36 | loss.backward() 37 | optimizer.step() 38 | 39 | train_loss += loss.data[0] 40 | train_acc += get_acc(output, label) 41 | 42 | cur_time = datetime.now() 43 | h, remainder = divmod((cur_time - prev_time).seconds, 3600) 44 | m, s = divmod(remainder, 60) 45 | time_str = "Time %02d:%02d:%02d" % (h, m, s) 46 | if valid_data is not None: 47 | valid_loss = 0 48 | valid_acc = 0 49 | net = net.eval() 50 | for im, label in valid_data: 51 | if torch.cuda.is_available(): 52 | im = Variable(im.cuda(), volatile=True) 53 | label = Variable(label.cuda(), volatile=True) 54 | else: 55 | im = Variable(im, volatile=True) 56 | label = Variable(label, volatile=True) 57 | output = net(im) 58 | loss = criterion(output, label) 59 | valid_loss += loss.data[0] 60 | valid_acc += get_acc(output, label) 61 | epoch_str = ( 62 | "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, " 63 | % (epoch, train_loss / len(train_data), 64 | train_acc / len(train_data), valid_loss / len(valid_data), 65 | valid_acc / len(valid_data))) 66 | else: 67 | epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " % 68 | (epoch, train_loss / len(train_data), 69 | train_acc / len(train_data))) 70 | prev_time = cur_time 71 | print(epoch_str + time_str) 72 | 73 | 74 | def conv3x3(in_channel, out_channel, stride=1): 75 | return nn.Conv2d( 76 | in_channel, out_channel, 3, stride=stride, padding=1, bias=False) 77 | 78 | 79 | class residual_block(nn.Module): 80 | def __init__(self, in_channel, out_channel, same_shape=True): 81 | super(residual_block, self).__init__() 82 | self.same_shape = same_shape 83 | stride = 1 if self.same_shape else 2 84 | 85 | self.conv1 = conv3x3(in_channel, out_channel, stride=stride) 86 | self.bn1 = nn.BatchNorm2d(out_channel) 87 | 88 | self.conv2 = conv3x3(out_channel, out_channel) 89 | self.bn2 = nn.BatchNorm2d(out_channel) 90 | if not self.same_shape: 91 | self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride) 92 | 93 | def forward(self, x): 94 | out = self.conv1(x) 95 | out = F.relu(self.bn1(out), True) 96 | out = self.conv2(out) 97 | out = F.relu(self.bn2(out), True) 98 | 99 | if not self.same_shape: 100 | x = self.conv3(x) 101 | return F.relu(x + out, True) 102 | 103 | 104 | class resnet(nn.Module): 105 | def __init__(self, in_channel, num_classes, verbose=False): 106 | super(resnet, self).__init__() 107 | self.verbose = verbose 108 | 109 | self.block1 = nn.Conv2d(in_channel, 64, 7, 2) 110 | 111 | self.block2 = nn.Sequential( 112 | nn.MaxPool2d(3, 2), residual_block(64, 64), residual_block(64, 64)) 113 | 114 | self.block3 = nn.Sequential( 115 | residual_block(64, 128, False), residual_block(128, 128)) 116 | 117 | self.block4 = nn.Sequential( 118 | residual_block(128, 256, False), residual_block(256, 256)) 119 | 120 | self.block5 = nn.Sequential( 121 | residual_block(256, 512, False), 122 | residual_block(512, 512), nn.AvgPool2d(3)) 123 | 124 | self.classifier = nn.Linear(512, num_classes) 125 | 126 | def forward(self, x): 127 | x = self.block1(x) 128 | if self.verbose: 129 | print('block 1 output: {}'.format(x.shape)) 130 | x = self.block2(x) 131 | if self.verbose: 132 | print('block 2 output: {}'.format(x.shape)) 133 | x = self.block3(x) 134 | if self.verbose: 135 | print('block 3 output: {}'.format(x.shape)) 136 | x = self.block4(x) 137 | if self.verbose: 138 | print('block 4 output: {}'.format(x.shape)) 139 | x = self.block5(x) 140 | if self.verbose: 141 | print('block 5 output: {}'.format(x.shape)) 142 | x = x.view(x.shape[0], -1) 143 | x = self.classifier(x) 144 | return x 145 | --------------------------------------------------------------------------------