├── .gitignore
├── README.md
├── aws.md
├── chapter10_Natural-Language-Process
    ├── char_rnn
    │   ├── README.md
    │   ├── char_rnn.ipynb
    │   ├── config.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   └── dataset.py
    │   ├── dataset
    │   │   ├── jay.txt
    │   │   └── poetry.txt
    │   ├── main.py
    │   └── models
    │   │   ├── __init__.py
    │   │   └── char_rnn.py
    └── seq2seq-translation
    │   ├── README.md
    │   ├── dataset.py
    │   ├── evaluate.py
    │   ├── model
    │       ├── __init__.py
    │       └── seq2seq.py
    │   └── train.py
├── chapter2_PyTorch-Basics
    ├── PyTorch-introduction.ipynb
    ├── Tensor-and-Variable.ipynb
    ├── autograd.ipynb
    └── dynamic-graph.ipynb
├── chapter3_NN
    ├── bp.ipynb
    ├── deep-nn.ipynb
    ├── linear-regression-gradient-descend.ipynb
    ├── logistic-regression
    │   ├── data.txt
    │   └── logistic-regression.ipynb
    ├── nn-sequential-module.ipynb
    ├── optimizer
    │   ├── adadelta.ipynb
    │   ├── adagrad.ipynb
    │   ├── adam.ipynb
    │   ├── momentum.ipynb
    │   ├── rmsprop.ipynb
    │   └── sgd.ipynb
    └── param_initialize.ipynb
├── chapter4_CNN
    ├── basic_conv.ipynb
    ├── batch-normalization.ipynb
    ├── cat.png
    ├── data-augumentation.ipynb
    ├── densenet.ipynb
    ├── googlenet.ipynb
    ├── lr-decay.ipynb
    ├── regularization.ipynb
    ├── resnet.ipynb
    ├── utils.py
    └── vgg.ipynb
├── chapter5_RNN
    ├── nlp
    │   ├── n-gram.ipynb
    │   ├── seq-lstm.ipynb
    │   └── word-embedding.ipynb
    ├── pytorch-rnn.ipynb
    ├── rnn-for-image.ipynb
    ├── time-series
    │   ├── data.csv
    │   └── lstm-time-series.ipynb
    └── utils.py
├── chapter6_GAN
    ├── autoencoder.ipynb
    ├── gan.ipynb
    └── vae.ipynb
├── chapter7_RL
    ├── dqn.ipynb
    ├── dqn.py
    ├── mount-car.py
    ├── open_ai_gym.ipynb
    └── q-learning-intro.ipynb
├── chapter8_PyTorch-Advances
    ├── data-io.ipynb
    ├── example_data
    │   ├── image
    │   │   ├── class_1
    │   │   │   ├── 1.png
    │   │   │   ├── 2.png
    │   │   │   └── 3.png
    │   │   ├── class_2
    │   │   │   ├── 10.png
    │   │   │   ├── 11.png
    │   │   │   └── 12.png
    │   │   └── class_3
    │   │   │   ├── 16.png
    │   │   │   ├── 17.png
    │   │   │   └── 18.png
    │   └── train.txt
    └── tensorboard.ipynb
├── chapter9_Computer-Vision
    ├── Deep-Dream
    │   ├── README.md
    │   ├── backward
    │   │   └── backward.py
    │   ├── deepdream.py
    │   ├── guide_image
    │   │   ├── flower.jpg
    │   │   ├── input.png
    │   │   └── kitten.jpg
    │   ├── resnet.py
    │   ├── show_image.ipynb
    │   ├── sky.jpg
    │   └── util.py
    ├── fine_tune
    │   ├── READMD.md
    │   ├── config.py
    │   ├── fine-tune.ipynb
    │   ├── get_data.sh
    │   └── main.py
    ├── kaggle_dog_vs_cat
    │   ├── README.md
    │   └── model
    │   │   ├── dataset.py
    │   │   ├── feature_extraction.py
    │   │   ├── feature_train.py
    │   │   ├── fix_train.py
    │   │   ├── net.py
    │   │   └── process data.ipynb
    ├── neural-transfer
    │   ├── README.md
    │   ├── build_model.py
    │   ├── demo.ipynb
    │   ├── load_img.py
    │   ├── loss.py
    │   ├── picture
    │   │   ├── content.png
    │   │   ├── saved_picture.png
    │   │   └── style.png
    │   └── run_code.py
    └── segmentation
    │   ├── README.md
    │   ├── config.py
    │   ├── data
    │       ├── __init__.py
    │       └── voc.py
    │   ├── fcn.ipynb
    │   ├── get_data.sh
    │   ├── main.py
    │   └── models
    │       ├── __init__.py
    │       └── fcn.py
└── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | .idea


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 深度学习入门之PyTorch 
  2 | 
  3 | Learn Deep Learning with PyTorch
  4 | 
  5 | 非常感谢您能够购买此书，这个github repository包含有[深度学习入门之PyTorch](https://item.jd.com/17915495606.html)的实例代码。由于本人水平有限，在写此书的时候参考了一些网上的资料，在这里对他们表示敬意。由于深度学习的技术在飞速的发展，同时PyTorch也在不断更新，且本人在完成此书的时候也有诸多领域没有涉及，所以这个repository会不断更新作为购买次书的一个后续服务，希望我能够在您深度学习的入门道路上提供绵薄之力。
  6 | 
  7 | **注意：由于PyTorch版本更迭，书中的代码可能会出现bug，所以一切代码以该github中的为主。**
  8 | 
  9 | ![image.png](http://upload-images.jianshu.io/upload_images/3623720-7cc3a383f486d157.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
 10 | 
 11 | ## 配置环境
 12 | 
 13 | 书中已经详细给出了如何基于Anaconda配置python环境，以及PyTorch的安装，如果你使用自己的电脑，并且有Nvidia的显卡，那么你可以愉快地进入深度学习的世界了，如果你没有Nvidia的显卡，那么我们需要一个云计算的平台来帮助我们学习深度学习之旅。[如何配置aws计算平台](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/aws.md)
 14 | 
 15 | 
 16 | **以下的课程目录和书中目录有出入，因为内容正在更新到第二版，第二版即将上线！！**
 17 | ## 课程目录
 18 | ### part1: 深度学习基础
 19 | - Chapter 2: PyTorch基础
 20 |     - [Tensor和Variable](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter2_PyTorch-Basics/Tensor-and-Variable.ipynb)    
 21 |     - [自动求导机制](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter2_PyTorch-Basics/autograd.ipynb)
 22 |     - [动态图与静态图](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter2_PyTorch-Basics/dynamic-graph.ipynb)
 23 | 
 24 | 
 25 | - Chapter 3: 神经网络
 26 |     - [线性模型与梯度下降](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/linear-regression-gradient-descend.ipynb)
 27 |     - [Logistic 回归与优化器](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/logistic-regression/logistic-regression.ipynb)
 28 |     - [多层神经网络，Sequential 和 Module](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/nn-sequential-module.ipynb)
 29 |     - [深层神经网络](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/deep-nn.ipynb)
 30 |     - [参数初始化方法](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/param_initialize.ipynb)
 31 |     - 优化算法
 32 |         - [SGD](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/sgd.ipynb)
 33 |         - [动量法](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/momentum.ipynb)
 34 |         - [Adagrad](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/adagrad.ipynb)
 35 |         - [RMSProp](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/rmsprop.ipynb)
 36 |         - [Adadelta](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/adadelta.ipynb)
 37 |         - [Adam](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter3_NN/optimizer/adam.ipynb)
 38 | - Chapter 4: 卷积神经网络
 39 |     - [PyTorch 中的卷积模块](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/basic_conv.ipynb)
 40 |     - [批标准化，batch normalization](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/batch-normalization.ipynb)
 41 |     - [使用重复元素的深度网络，VGG](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/vgg.ipynb)
 42 |     - [更加丰富化结构的网络，GoogLeNet](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/googlenet.ipynb)
 43 |     - [深度残差网络，ResNet](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/resnet.ipynb)
 44 |     - [稠密连接的卷积网络，DenseNet](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/densenet.ipynb)
 45 |     - 更好的训练卷积网络
 46 |         - [数据增强](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/data-augumentation.ipynb)
 47 |         - [正则化](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/regularization.ipynb)
 48 |         - [学习率衰减](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter4_CNN/lr-decay.ipynb)
 49 | - Chapter 5: 循环神经网络
 50 |     - [循环神经网络模块：LSTM 和 GRU](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/pytorch-rnn.ipynb)
 51 |     - [使用 RNN 进行图像分类](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/rnn-for-image.ipynb)
 52 |     - [使用 RNN 进行时间序列分析](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/time-series/lstm-time-series.ipynb)
 53 |     - 自然语言处理的应用：
 54 |         - [Word Embedding](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/nlp/word-embedding.ipynb)
 55 |         - [N-Gram 模型](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/nlp/n-gram.ipynb)
 56 |         - [Seq-LSTM 做词性预测](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter5_RNN/nlp/seq-lstm.ipynb)
 57 | - Chapter 6: 生成对抗网络
 58 |     - [自动编码器](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter6_GAN/autoencoder.ipynb)
 59 |     - [变分自动编码器](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter6_GAN/vae.ipynb)
 60 |     - [生成对抗网络](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter6_GAN/gan.ipynb)
 61 |     - 深度卷积对抗网络 (DCGANs) 生成人脸
 62 | - Chapter 7: 深度强化学习
 63 |     - [Q Learning](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter7_RL/q-learning-intro.ipynb)
 64 |     - [Open AI gym](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter7_RL/open_ai_gym.ipynb)
 65 |     - [Deep Q-networks](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter7_RL/dqn.ipynb)
 66 | - Chapter 8: PyTorch高级
 67 |     - [tensorboard 可视化](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter8_PyTorch-Advances/tensorboard.ipynb)
 68 |    - [灵活的数据读取介绍](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter8_PyTorch-Advances/data-io.ipynb)
 69 |     - autograd.function 的介绍
 70 |     - 数据并行和多 GPU
 71 |     - 使用 ONNX 转化为 Caffe2 模型
 72 |     - 如何部署训练好的神经网络
 73 |     - 打造属于自己的 PyTorch 的使用习惯
 74 | 
 75 | ### part2: 深度学习的应用
 76 | - Chapter 9: 计算机视觉
 77 |     - [Fine-tuning: 通过微调进行迁移学习](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter9_Computer-Vision/fine_tune/)
 78 |     - kaggle初体验:猫狗大战
 79 |     - [语义分割: 通过 FCN 实现像素级别的分类](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/tree/master/chapter9_Computer-Vision/segmentation)
 80 |     - Pixel to Pixel 生成对抗网络
 81 |     - Neural Transfer: 通过卷积网络实现风格迁移
 82 |     - Deep Dream: 探索卷积网络眼中的世界
 83 | 
 84 | - Chapter 10: 自然语言处理
 85 |     - [Char RNN 实现文本生成](https://github.com/SherlockLiao/code-of-learn-deep-learning-with-pytorch/blob/master/chapter10_Natural-Language-Process/char_rnn/) 
 86 |     - Image Caption: 实现图片字幕生成
 87 |     - seq2seq 实现机器翻译
 88 |     - cnn + rnn + attention 实现文本识别
 89 | 
 90 | ## 一些别的资源
 91 | 
 92 | 关于深度学习的一些公开课程以及学习资源，可以参考我的这个[repository](https://github.com/SherlockLiao/Roadmap-of-DL-and-ML)
 93 | 
 94 | 可以关注我的[知乎专栏](https://zhuanlan.zhihu.com/c_94953554)和[博客](https://sherlockliao.github.io/)，会经常分享一些深度学习的文章
 95 | 
 96 | 关于PyTorch的资源
 97 | 
 98 | 我的github repo [pytorch-beginner](https://github.com/SherlockLiao/pytorch-beginner)
 99 | 
100 | [pytorch-tutorial](https://github.com/yunjey/pytorch-tutorial)
101 | 
102 | [the-incredible-pytorch](https://github.com/ritchieng/the-incredible-pytorch)
103 | 
104 | [practical-pytorch](https://github.com/spro/practical-pytorch)
105 | 
106 | [PyTorchZeroToAll](https://github.com/hunkim/PyTorchZeroToAll)
107 | 
108 | [Awesome-pytorch-list](https://github.com/bharathgs/Awesome-pytorch-list)
109 | 
110 | 
111 | 
112 | ## Acknowledgement
113 | 
114 | 本书的第二版内容其中一些部分参考了 mxnet gluon 的中文教程，[通过MXNet/Gluon来动手学习深度学习](https://zh.gluon.ai/)。
115 | 
116 | Gluon 是一个和 PyTorch 非常相似的框架，非常简单、易上手，推荐大家去学习一下，也安利一下 gluon 的中文课程，全中文授课，有视频，有代码练习，可以说是最全面的中文深度学习教程。


--------------------------------------------------------------------------------
/aws.md:
--------------------------------------------------------------------------------
  1 | ## 配置AWS云计算平台
  2 | 
  3 | 这是一个帮助文档，我们会一步一步讲解如何从0开始在AWS上申请CPU或者GPU机器进行使用。
  4 | 
  5 | 
  6 | 
  7 | ### 申请账号并登陆
  8 | 
  9 | 首先我们需要在[aws官网](https://aws.amazon.com/)上面注册账号，这里需要绑定一张信用卡，所以没有master或者VISA卡的同学可以开通一张，实在不熟悉的同学可以搜索一下"如何注册aws账号"。
 10 | 
 11 | 然后我们进入到控制面板，可以看到下面的图片，点击"EC2"
 12 | 
 13 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xn33e4cj31kw0wo11u.jpg)
 14 | 
 15 | 
 16 | 然后我们就能够进入到下一个界面
 17 | 
 18 | 
 19 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xoznbz3j31kw0j7dmu.jpg)
 20 | 
 21 | 
 22 | 这个界面只需要注意三个地方，一个是右上角的地区，需要选择一个离你比较近的地区，整个亚太地区可以选择韩国，日本，新加坡和孟买，需要注意的是不同的地区实例价格是不同的，如果你有vpn，那么推荐选择俄勒冈，因为这个地区最便宜，比亚太地区便宜了4到5倍。然后是左边的一个方框"限制"，如果你申请CPU的计算实例，那么不用管，如果你要申请GPU计算实例，就需要点击"限制"进行申请，因为GPU实例会产生费用，亚马逊需要和你确认这个事情，一般需要两到三个工作日。
 23 | 
 24 | 接下面就可以开始启动实例了，点击中间的红框即可开始。
 25 | 
 26 | 
 27 | ### 申请实例并启动
 28 | 
 29 | 
 30 | ![](https://ws2.sinaimg.cn/large/006tNc79gy1fo7xpjsxwlj31kw0q8dp2.jpg)
 31 | 
 32 | 
 33 | 进入上面的界面之后，需要选择操作系统，这里我们一般都选择linux系统，当然还有很多社区AMI，也就是别人配置好的系统，这里先暂时不用管，我们一般就在上面两个红框中选择一个，第一个是一个空的系统，什么都没有，第二个是一个深度学习的系统，装好了CUDA以及很多框架，可以选择这一个，如果选择这个，那么需要的磁盘空间可能更大。
 34 | 
 35 | 
 36 | 
 37 | 点击选择之后便可以进入下面的界面。
 38 | 
 39 | ![](https://ws4.sinaimg.cn/large/006tNc79gy1fo7xqq958cj31kw0ki112.jpg)
 40 | 
 41 | 
 42 | 这里需要选择实例类型，如果新注册的用户可以免费使用一年的t2.mirco实例，这个实例是没有GPU的，如果要使用GPU的实例，那么从上面的实例类型中选择GPU计算，便可以快速跳转到下面这里。
 43 | 
 44 | 
 45 | ![](https://ws4.sinaimg.cn/large/006tNc79gy1fo7xr45wmkj31kw0nktgt.jpg)
 46 | 
 47 | 这里有很多个实例，一般我们就选第一个p2.xlarge，这个实例包含一个Nvidia k40GPU，后面有8块GPU和16块GPU的版本，当然费用也更高。除此之外，下面还有 p3.2xlarge，这里面包含更新的 GPU，速度也会快很多，当然价格也会贵一些，有一点需要注意，选择 p2.xlarge 只能安装 cuda8，而选择 p3.2xlarge 则可以安装 cuda9。选择完成之后我们可以进入下一步配置实例信息。
 48 | 
 49 | 
 50 | ![](https://ws2.sinaimg.cn/large/006tNc79gy1fo7xrl5bi9j31kw08j77v.jpg)
 51 | 
 52 | 这里我们只需要关注根目录的大小，也就是云端计算平台的硬盘大小，因为我们需要存放数据集，需要安装框架，所以需要大一点，新注册的用户可以免费试用30G的存储，我们可以设置为40G，一般费用比较便宜。然后点击审核和启动实例。
 53 | 
 54 | 
 55 | 
 56 | 接着进入到下面这个界面，我们可以点击右下角的启动来启动实例了。
 57 | 
 58 | 
 59 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xs8wl8hj31kw0sp13n.jpg)
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 接着会跳出一个对话框如下。
 65 | 
 66 | 
 67 | ![9.png](http://upload-images.jianshu.io/upload_images/3623720-4a6cd6ff1321e5fb.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
 68 | 
 69 | 
 70 | 这里需要你创建一个密钥对，因为现在aws不支持密码登录，所以需要密钥登录，你在名称那一栏取一个名字，然后点击**下载密钥对**就可以了。
 71 | 
 72 | 
 73 | 
 74 | 然后你就可以看到你的实例正在启动，点击下图红框的地方进入具体的实例位置。
 75 | 
 76 | 
 77 | ![](https://ws3.sinaimg.cn/large/006tNc79gy1fo7xtcjn2fj31kw0c177o.jpg)
 78 | 
 79 | 
 80 | 
 81 | 然后可以进入到下面的界面，可以看到实例正在启动，右键点击实例这一栏，然后点击连接。
 82 | 
 83 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fo7xtys9mej31kw0iu422.jpg)
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 接着便会出来下面的窗口，按着这个窗口的操作，如果使用windows系统，需要PuTTY连接，因为我的电脑是mac，所以这个部分没有尝试。在mac下打开终端，先进入刚才存放密钥的位置，然后输出`chmod 400 yourkey.pem`，这里我的密钥是'liao.pem'，这个命令只需要第一次连接的时候输入，后面连接就不用管了，然后通过下面的命令连到你的远程linux服务器。
 89 | 
 90 | 
 91 | ![12.png](http://upload-images.jianshu.io/upload_images/3623720-1c476e3770c0eb63.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 比如，这是我在终端中进行的输入，第一次连接会弹出一个问题，输入yes即可。
 97 | 
 98 | ![13.png](http://upload-images.jianshu.io/upload_images/3623720-825156b98dba8b84.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
 99 | 
100 | 
101 | 
102 | 然后我们便进入到了系统，可以看到，红框就表示连接的远程服务器。
103 | 
104 | ![14.png](http://upload-images.jianshu.io/upload_images/3623720-8a19f59377d88055.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
105 | 
106 | 
107 | 
108 | 
109 | ### 安装Anaconda
110 | 
111 | 下面简单演示一下如何在远程环境下安装anaconda，这里需要一点bash命令的基础。首先使用自己的电脑进入到[Anaconda的官网](https://www.anaconda.com/download/#linux)，然后右键点击Download，保存链接地址。
112 | 
113 | ![15.png](http://upload-images.jianshu.io/upload_images/3623720-54ba5def9981eb27.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
114 | 
115 | 
116 | 
117 | 
118 | 然后在刚刚连接的远程服务器上面输入
119 | 
120 | ```bash
121 | wget https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh
122 | ```
123 | 
124 | 后面是刚刚复制的地址，然后输入回车，就开始下载了，下载完成之后是一个后缀为.sh的文件，输入`sudo sh 文件名.sh`就可以开始安装了。
125 | 
126 | 
127 | ![16.png](http://upload-images.jianshu.io/upload_images/3623720-709e1ab46eb204a2.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
128 | 
129 | 
130 | 
131 | 
132 | 安装完成之后，通过下面的命令配置环境变量。
133 | 
134 | ```bash
135 | echo 'export PATH="~/anaconda3/bin:$PATH"' >> ~/.bashrc
136 | 
137 | source ~/.bashrc
138 | ```
139 | 
140 | 这样便完成了远程Anaconda的安装。
141 | 
142 | 
143 | 
144 | ### 安装CUDA
145 | 
146 | 【注意】只有CPU的实例可以跳过步骤。
147 | 
148 | 我们去Nvidia官网下载CUDA并安装。选择正确的版本并获取下载地址。
149 | 
150 | 【注意】目前官方默认是 cuda9，如果选择的是 p2.xlarge，则需要安装 cuda8，可以使用下面的命令来下载并安装 cuda8
151 | 
152 | ```bash
153 | wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
154 | sudo sh cuda_8.0.61_375.26_linux-run
155 | ```
156 | 
157 | 
158 | 
159 | ![](https://github.com/mli/gluon-tutorials-zh/blob/master/img/cuda.png?raw=true)
160 | 
161 | 然后使用`wget`下载并且安装 cuda9
162 | 
163 | ```bash
164 | wget https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_387.26_linux
165 | ```
166 | 
167 | 这里需要回答几个问题。
168 | 
169 | ```
170 | accept/decline/quit: accept
171 | Install NVIDIA Accelerated Graphics Driver for Linux-x86_64 375.26?
172 | (y)es/(n)o/(q)uit: y
173 | Do you want to install the OpenGL libraries?
174 | (y)es/(n)o/(q)uit [ default is yes ]: y
175 | Do you want to run nvidia-xconfig?
176 | (y)es/(n)o/(q)uit [ default is no ]: n
177 | Install the CUDA 8.0 Toolkit?
178 | (y)es/(n)o/(q)uit: y
179 | Enter Toolkit Location
180 |  [ default is /usr/local/cuda-8.0 ]:
181 | Do you want to install a symbolic link at /usr/local/cuda?
182 | (y)es/(n)o/(q)uit: y
183 | Install the CUDA 8.0 Samples?
184 | (y)es/(n)o/(q)uit: n
185 | ```
186 | 
187 | 安装完成后运行
188 | 
189 | ```bash
190 | nvidia-smi
191 | ```
192 | 
193 | 就可以看到这个实例的GPU了。最后将CUDA加入到library path方便之后安装的库找到它。
194 | 
195 | cuda 8
196 | 
197 | ```bash
198 | echo "export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/cuda-8.0/lib64" >>.bashrc
199 | ```
200 | 
201 | cuda 9
202 | 
203 | ```bash
204 | echo "export LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/cuda-9.1/lib64" >>.bashrc
205 | ```
206 | 
207 | 
208 | 
209 | ### 运行Jupyter notebook
210 | 
211 | 接下来在远程终端运行Jupyter notebook。
212 | 
213 | ```bash
214 | jupyter notebook
215 | ```
216 | 
217 | 如果成功的话会看到类似的输出
218 | 
219 | ![](https://github.com/mli/gluon-tutorials-zh/blob/master/img/jupyter.png?raw=true)
220 | 
221 | 因为我们的实例没有暴露8888端口，所以我们可以通过ssh映射到本地
222 | 
223 | ```bash
224 | ssh -L8888:locallhost:8888 ubuntu@your-ip.amazonaws.com
225 | ```
226 | 
227 |  然后把jupyter log里的URL复制到本地浏览器就行了。
228 | 
229 | 【注意】如果本地运行了Jupyter notebook，那么8888端口就可能被占用了。要么关掉本地jupyter，要么把端口映射改成别的。例如，假设aws使用默认8888端口，我们可以通过ssh映射到本地8889端口：
230 | 
231 | ```bash
232 | ssh -N -f -L localhost:8889:localhost:8888 ubuntu@your-ip.amazonaws.com
233 | ```
234 | 
235 | 然后在本地浏览器打开localhost:8889，这时会提示需要token值。接下来，我们将aws上jupyter log里的token值（例如上图里：...localhost:8888/?token=`token值`）复制粘贴即可。
236 | 
237 | 
238 | 
239 | ### 后续
240 | 
241 | 因为云服务按时间计费，通常我们不用时需要把样例关掉，到下次要用时再开。
242 | 
243 | 
244 | 
245 | ![17.png](http://upload-images.jianshu.io/upload_images/3623720-6e4fb6cb2d39d66f.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
246 | 
247 | 
248 | 如果是停掉（Stop)，下次可以直接继续用，但硬盘空间会计费。如果是终结(Termination)，我们一般会先把操作系统做镜像，下次开始时直接使用镜像（AMI）（上面的教程使用了Ubuntu 16.06 AMI）就行了，不需要再把上面流程走一次。
249 | 
250 | 
251 | ![18.png](http://upload-images.jianshu.io/upload_images/3623720-e4aac81d991e1a28.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
252 | 
253 | 
254 | 
255 | 
256 | **云虽然很方便，但是不便宜，所以在使用完GPU实例之后一定要记得关掉。**
257 | 
258 | 
259 | 
260 | 上面就是整个的配置流程，有问题欢迎提出issue。


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/README.md:
--------------------------------------------------------------------------------
 1 | # Char-RNN-PyTorch
 2 | 使用字符级别的RNN进行文本生成，使用PyTorch框架。[Gluon实现](https://github.com/SherlockLiao/Char-RNN-Gluon)
 3 | 
 4 | ## Requirements
 5 | [PyTorch 0.3](http://pytorch.org/)
 6 | 
 7 | [MxTorch](https://github.com/SherlockLiao/mxtorch)
 8 | 
 9 | [tensorboardX](https://github.com/lanpa/tensorboard-pytorch)
10 | 
11 | 按照 pytorch 官网安装 pytorch，将 mxtorch 下载下来，放到根目录，安装 tensorboardX 实现 tensorboard 可视化
12 | 
13 | ```bash
14 | \Char-RNN-PyTorch
15 | 	\mxtorch
16 | 	\data
17 | 	\dataset
18 | 	\models
19 | 	config.py
20 | 	main.py
21 | ```
22 | 
23 | 
24 | 
25 | ### 训练模型
26 | 
27 | 所有的配置文件都放在 config.py 里面，通过下面的代码来训练模型
28 | 
29 | ```bash
30 | python main.py train
31 | ```
32 | 
33 | 也可以在终端修改配置
34 | 
35 | ```bash
36 | python main.py train \
37 | 	--txt='./dataset/poetry.txt' \ # 训练用的txt文本
38 | 	--batch=128  \ # batch_size
39 | 	--max_epoch=300 \ 
40 | 	--len=30 \ # 输入RNN的序列长度
41 | 	--max_vocab=5000 \ # 最大的字符数量
42 | 	--embed_dim=512 \ # 词向量的维度
43 | 	--hidden_size=512 \ # 网络的输出维度
44 | 	--num_layers=2 \ # RNN的层数
45 | 	--dropout=0.5
46 | ```
47 | 
48 | 如果希望使用训练好的网络进行文本生成，使用下面的代码
49 | 
50 | ```bash
51 | python main.py predict \
52 | 	--begin='天青色等烟雨' \ # 生成文本的开始，可以是一个字符，也可以一段话
53 | 	--predict_len=100 \ # 希望生成文本的长度
54 | 	--load_model='./checkpoints/CharRNN_best_model.pth' # 读取训练模型的位置
55 | ```
56 | 
57 | ## Result
58 | 如果使用古诗的数据集进行训练，可以得到下面的结果
59 | 
60 | ```bash
61 | 天青色等烟雨翩 黄望堪魄弦夜 逐奏文明际天月辉 豪天明月天趣 天外何山重满 遥天明上天  心空游无拂天外空寂室叨
62 | ```
63 | 
64 | 如果使用周杰伦的歌词作为训练集，可以得到下面的结果
65 | 
66 | ```bash
67 | 这感觉得可能 我这玻童来 城堡药比生对这些年风天　脚剧飘逐在尘里里步的路 麦缘日下一经经 听觉得远回白择
68 | ```
69 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/config.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author: xyliao
 4 | @contact: xyliao1993@qq.com
 5 | """
 6 | import warnings
 7 | from pprint import pprint
 8 | 
 9 | 
10 | class DefaultConfig(object):
11 |     model = 'CharRNN'
12 | 
13 |     # Dataset.
14 |     txt = './dataset/poetry.txt'
15 |     len = 20
16 |     max_vocab = 8000
17 |     begin = '天青色等烟雨'  # begin word of text
18 |     predict_len = 50  # predict length
19 | 
20 |     # Store result and save models.
21 |     result_file = 'result.txt'
22 |     save_file = './checkpoints/'
23 |     save_freq = 30  # save model every N epochs
24 |     save_best = True
25 | 
26 |     # Predict mode and generate contexts
27 |     load_model = './checkpoints/CharRNN_best_model.pth'
28 |     write_file = './write_context.txt'
29 | 
30 |     # Visualization parameters.
31 |     vis_dir = './vis/'
32 |     plot_freq = 100  # plot in tensorboard every N iterations
33 | 
34 |     # Model parameters.
35 |     embed_dim = 512
36 |     hidden_size = 512
37 |     num_layers = 2
38 |     dropout = 0.5
39 | 
40 |     # Model hyperparameters.
41 |     use_gpu = True  # use GPU or not
42 |     ctx = 0  # running on which cuda device
43 |     batch_size = 128  # batch size
44 |     num_workers = 4  # how many workers for loading data
45 |     max_epoch = 200
46 |     lr = 1e-3  # initial learning rate
47 |     weight_decay = 1e-4
48 | 
49 |     def _parse(self, kwargs):
50 |         for k, v in kwargs.items():
51 |             if not hasattr(self, k):
52 |                 warnings.warn("Warning: opt has not attribut %s" % k)
53 |             setattr(self, k, v)
54 | 
55 |         print('=========user config==========')
56 |         pprint(self._state_dict())
57 |         print('============end===============')
58 | 
59 |     def _state_dict(self):
60 |         return {k: getattr(self, k) for k, _ in DefaultConfig.__dict__.items()
61 |                 if not k.startswith('_')}
62 | 
63 | 
64 | opt = DefaultConfig()
65 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/data/__init__.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | """
3 | @author: xyliao
4 | @contact: xyliao1993@qq.com
5 | """
6 | from .dataset import TextConverter, TextDataset
7 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/data/dataset.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author: xyliao
 4 | @contact: xyliao1993@qq.com
 5 | 
 6 | This file is utils to convert text to index and create dataset to PyTorch training model.
 7 | """
 8 | 
 9 | import numpy as np
10 | import torch
11 | 
12 | 
13 | class TextConverter(object):
14 |     def __init__(self, text_path, max_vocab=5000):
15 |         """Construct a text index converter.
16 | 
17 |         Args:
18 |             text_path: txt file path.
19 |             max_vocab: maximum number of words.
20 |         """
21 | 
22 |         with open(text_path, 'r') as f:
23 |             text = f.read()
24 |         text = text.replace('\n', ' ').replace('\r', ' ').replace('，', ' ').replace('。', ' ')
25 |         vocab = set(text)
26 |         # If the number of words is larger than limit, clip the words with minimum frequency.
27 |         vocab_count = {}
28 |         for word in vocab:
29 |             vocab_count[word] = 0
30 |         for word in text:
31 |             vocab_count[word] += 1
32 |         vocab_count_list = []
33 |         for word in vocab_count:
34 |             vocab_count_list.append((word, vocab_count[word]))
35 |         vocab_count_list.sort(key=lambda x: x[1], reverse=True)
36 |         if len(vocab_count_list) > max_vocab:
37 |             vocab_count_list = vocab_count_list[:max_vocab]
38 |         vocab = [x[0] for x in vocab_count_list]
39 |         self.vocab = vocab
40 | 
41 |         self.word_to_int_table = {c: i for i, c in enumerate(self.vocab)}
42 |         self.int_to_word_table = dict(enumerate(self.vocab))
43 | 
44 |     @property
45 |     def vocab_size(self):
46 |         return len(self.vocab) + 1
47 | 
48 |     def word_to_int(self, word):
49 |         if word in self.word_to_int_table:
50 |             return self.word_to_int_table[word]
51 |         else:
52 |             return len(self.vocab)
53 | 
54 |     def int_to_word(self, index):
55 |         if index == len(self.vocab):
56 |             return '<unk>'
57 |         elif index < len(self.vocab):
58 |             return self.int_to_word_table[index]
59 |         else:
60 |             raise Exception('Unknown index!')
61 | 
62 |     def text_to_arr(self, text):
63 |         arr = []
64 |         for word in text:
65 |             arr.append(self.word_to_int(word))
66 |         return np.array(arr)
67 | 
68 |     def arr_to_text(self, arr):
69 |         words = []
70 |         for index in arr:
71 |             words.append(self.int_to_word(index))
72 |         return "".join(words)
73 | 
74 | 
75 | class TextDataset(object):
76 |     def __init__(self, text_path, n_step, arr_to_idx):
77 | 
78 |         with open(text_path, 'r') as f:
79 |             text = f.read()
80 |         text = text.replace('\n', ' ').replace('\r', ' ').replace('，', ' ').replace('。', ' ')
81 |         num_seq = int(len(text) / n_step)
82 |         self.num_seq = num_seq
83 |         self.n_step = n_step
84 |         # Clip more than maximum length.
85 |         text = text[:num_seq * n_step]
86 |         arr = arr_to_idx(text)
87 |         arr = arr.reshape((num_seq, -1))
88 |         self.arr = torch.from_numpy(arr)
89 | 
90 |     def __getitem__(self, item):
91 |         x = self.arr[item, :]
92 |         y = torch.zeros(x.shape)
93 |         y[:-1], y[-1] = x[1:], x[0]
94 |         return x, y
95 | 
96 |     def __len__(self):
97 |         return self.num_seq
98 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/main.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author: xyliao
  4 | @contact: xyliao1993@qq.com
  5 | """
  6 | from copy import deepcopy
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | from mxtorch import meter
 11 | from mxtorch.trainer import Trainer, ScheduledOptim
 12 | from torch import nn
 13 | from torch.autograd import Variable
 14 | from torch.utils.data import DataLoader
 15 | from tqdm import tqdm
 16 | 
 17 | import models
 18 | from config import opt
 19 | from data import TextDataset, TextConverter
 20 | 
 21 | 
 22 | def get_data(convert):
 23 |     dataset = TextDataset(opt.txt, opt.len, convert.text_to_arr)
 24 |     return DataLoader(dataset, opt.batch_size, shuffle=True, num_workers=opt.num_workers)
 25 | 
 26 | 
 27 | def get_model(convert):
 28 |     model = getattr(models, opt.model)(convert.vocab_size,
 29 |                                        opt.embed_dim,
 30 |                                        opt.hidden_size,
 31 |                                        opt.num_layers,
 32 |                                        opt.dropout)
 33 |     if opt.use_gpu:
 34 |         model = model.cuda()
 35 |     return model
 36 | 
 37 | 
 38 | def get_loss(score, label):
 39 |     return nn.CrossEntropyLoss()(score, label.view(-1))
 40 | 
 41 | 
 42 | def get_optimizer(model):
 43 |     optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
 44 |     return ScheduledOptim(optimizer)
 45 | 
 46 | 
 47 | def pick_top_n(preds, top_n=5):
 48 |     top_pred_prob, top_pred_label = torch.topk(preds, top_n, 1)
 49 |     top_pred_prob /= torch.sum(top_pred_prob)
 50 |     top_pred_prob = top_pred_prob.squeeze(0).cpu().numpy()
 51 |     top_pred_label = top_pred_label.squeeze(0).cpu().numpy()
 52 |     c = np.random.choice(top_pred_label, size=1, p=top_pred_prob)
 53 |     return c
 54 | 
 55 | 
 56 | class CharRNNTrainer(Trainer):
 57 |     def __init__(self, convert):
 58 |         self.convert = convert
 59 | 
 60 |         model = get_model(convert)
 61 |         criterion = get_loss
 62 |         optimizer = get_optimizer(model)
 63 |         super().__init__(model, criterion, optimizer)
 64 |         self.config += ('text: ' + opt.txt + '\n' + 'train text length: ' + str(opt.len) + '\n')
 65 |         self.config += ('predict text length: ' + str(opt.predict_len) + '\n')
 66 | 
 67 |         self.metric_meter['loss'] = meter.AverageValueMeter()
 68 | 
 69 |     def train(self, kwargs):
 70 |         self.reset_meter()
 71 |         self.model.train()
 72 |         train_data = kwargs['train_data']
 73 |         for data in tqdm(train_data):
 74 |             x, y = data
 75 |             y = y.long()
 76 |             if opt.use_gpu:
 77 |                 x = x.cuda()
 78 |                 y = y.cuda()
 79 |             x, y = Variable(x), Variable(y)
 80 | 
 81 |             # Forward.
 82 |             score, _ = self.model(x)
 83 |             loss = self.criterion(score, y)
 84 | 
 85 |             # Backward.
 86 |             self.optimizer.zero_grad()
 87 |             loss.backward()
 88 |             # Clip gradient.
 89 |             nn.utils.clip_grad_norm(self.model.parameters(), 5)
 90 |             self.optimizer.step()
 91 | 
 92 |             self.metric_meter['loss'].add(loss.data[0])
 93 | 
 94 |             # Update to tensorboard.
 95 |             if (self.n_iter + 1) % opt.plot_freq == 0:
 96 |                 self.writer.add_scalar('perplexity', np.exp(self.metric_meter['loss'].value()[0]), self.n_plot)
 97 |                 self.n_plot += 1
 98 | 
 99 |             self.n_iter += 1
100 | 
101 |         # Log the train metrics to dict.
102 |         self.metric_log['perplexity'] = np.exp(self.metric_meter['loss'].value()[0])
103 | 
104 |     def test(self, kwargs):
105 |         """Set beginning words and predicted length, using model to generate texts.
106 | 
107 |         Returns:
108 |             predicted generating text
109 |         """
110 |         self.model.eval()
111 |         begin = np.array([i for i in kwargs['begin']])
112 |         begin = np.random.choice(begin, size=1)
113 |         text_len = kwargs['predict_len']
114 |         samples = [self.convert.word_to_int(c) for c in begin]
115 |         input_txt = torch.LongTensor(samples)[None]
116 |         if opt.use_gpu:
117 |             input_txt = input_txt.cuda()
118 |         input_txt = Variable(input_txt)
119 |         _, init_state = self.model(input_txt)
120 |         result = samples
121 |         model_input = input_txt[:, -1][:, None]
122 |         for i in range(text_len):
123 |             out, init_state = self.model(model_input, init_state)
124 |             pred = pick_top_n(out.data)
125 |             model_input = Variable(torch.LongTensor(pred))[None]
126 |             if opt.use_gpu:
127 |                 model_input = model_input.cuda()
128 |             result.append(pred[0])
129 | 
130 |         # Update generating txt to tensorboard.
131 |         self.writer.add_text('text', self.convert.arr_to_text(result), self.n_plot)
132 |         self.n_plot += 1
133 |         print(self.convert.arr_to_text(result))
134 | 
135 |     def predict(self, begin, predict_len):
136 |         self.model.eval()
137 |         samples = [self.convert.word_to_int(c) for c in begin]
138 |         input_txt = torch.LongTensor(samples)[None]
139 |         if opt.use_gpu:
140 |             input_txt = input_txt.cuda()
141 |         input_txt = Variable(input_txt)
142 |         _, init_state = self.model(input_txt)
143 |         result = samples
144 |         model_input = input_txt[:, -1][:, None]
145 |         for i in range(predict_len):
146 |             out, init_state = self.model(model_input, init_state)
147 |             pred = pick_top_n(out.data)
148 |             model_input = Variable(torch.LongTensor(pred))[None]
149 |             if opt.use_gpu:
150 |                 model_input = model_input.cuda()
151 |             result.append(pred[0])
152 |         text = self.convert.arr_to_text(result)
153 |         print('Generate text is: {}'.format(text))
154 |         with open(opt.write_file, 'a') as f:
155 |             f.write(text)
156 | 
157 |     def load_state_dict(self, checkpoints):
158 |         self.model.load_state_dict(torch.load(checkpoints))
159 | 
160 |     def get_best_model(self):
161 |         if self.metric_log['perplexity'] < self.best_metric:
162 |             self.best_model = deepcopy(self.model.state_dict())
163 |             self.best_metric = self.metric_log['perplexity']
164 | 
165 | 
166 | def train(**kwargs):
167 |     opt._parse(kwargs)
168 |     torch.cuda.set_device(opt.ctx)
169 |     convert = TextConverter(opt.txt, max_vocab=opt.max_vocab)
170 |     train_data = get_data(convert)
171 |     char_rnn_trainer = CharRNNTrainer(convert)
172 |     char_rnn_trainer.fit(train_data=train_data,
173 |                          epochs=opt.max_epoch,
174 |                          begin=opt.begin,
175 |                          predict_len=opt.predict_len)
176 | 
177 | 
178 | def predict(**kwargs):
179 |     opt._parse(kwargs)
180 |     torch.cuda.set_device(opt.ctx)
181 |     convert = TextConverter(opt.txt, max_vocab=opt.max_vocab)
182 |     char_rnn_trainer = CharRNNTrainer(convert)
183 |     char_rnn_trainer.load_state_dict(opt.load_model)
184 |     char_rnn_trainer.predict(opt.begin, opt.predict_len)
185 | 
186 | 
187 | if __name__ == '__main__':
188 |     import fire
189 | 
190 |     fire.Fire()
191 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/models/__init__.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | """
3 | @author: xyliao
4 | @contact: xyliao1993@qq.com
5 | """
6 | from .char_rnn import CharRNN
7 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/char_rnn/models/char_rnn.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author: xyliao
 4 | @contact: xyliao1993@qq.com
 5 | """
 6 | import torch
 7 | from torch import nn
 8 | from torch.autograd import Variable
 9 | 
10 | from config import opt
11 | 
12 | 
13 | class CharRNN(nn.Module):
14 |     def __init__(self, num_classes, embed_dim, hidden_size, num_layers,
15 |                  dropout):
16 |         super().__init__()
17 |         self.num_layers = num_layers
18 |         self.hidden_size = hidden_size
19 | 
20 |         self.word_to_vec = nn.Embedding(num_classes, embed_dim)
21 |         self.rnn = nn.GRU(embed_dim, hidden_size, num_layers, dropout)
22 |         self.project = nn.Linear(hidden_size, num_classes)
23 | 
24 |     def forward(self, x, hs=None):
25 |         batch = x.shape[0]
26 |         if hs is None:
27 |             hs = Variable(
28 |                 torch.zeros(self.num_layers, batch, self.hidden_size))
29 |             if opt.use_gpu:
30 |                 hs = hs.cuda()
31 |         word_embed = self.word_to_vec(x)  # (batch, len, embed)
32 |         word_embed = word_embed.permute(1, 0, 2)  # (len, batch, embed)
33 |         out, h0 = self.rnn(word_embed, hs)  # (len, batch, hidden)
34 |         le, mb, hd = out.shape
35 |         out = out.view(le * mb, hd)
36 |         out = self.project(out)
37 |         out = out.view(le, mb, -1)
38 |         out = out.permute(1, 0, 2).contiguous()  # (batch, len, hidden)
39 |         return out.view(-1, out.shape[2]), h0
40 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/seq2seq-translation/README.md:
--------------------------------------------------------------------------------
1 | # seq2seq-translation
2 | PyTorch implement of neural machine translation
3 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/seq2seq-translation/dataset.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import re
  3 | import string
  4 | import unicodedata
  5 | 
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | 
  9 | SOS_token = 0
 10 | EOS_token = 1
 11 | MAX_LENGTH = 10
 12 | 
 13 | 
 14 | class Lang(object):
 15 |     def __init__(self, name):
 16 |         self.name = name
 17 |         self.word2index = {}
 18 |         self.word2count = {}
 19 |         self.index2word = {0: "SOS", 1: "EOS"}
 20 |         self.n_words = 2  # Count SOS and EOS
 21 | 
 22 |     def addSentence(self, sentence):
 23 |         for word in sentence.split(' '):
 24 |             self.addWord(word)
 25 | 
 26 |     def addWord(self, word):
 27 |         if word not in self.word2index:
 28 |             self.word2index[word] = self.n_words
 29 |             self.word2count[word] = 1
 30 |             self.index2word[self.n_words] = word
 31 |             self.n_words += 1
 32 |         else:
 33 |             self.word2count[word] += 1
 34 | 
 35 | 
 36 | def unicodeToAscii(s):
 37 |     return ''.join(
 38 |         c for c in unicodedata.normalize('NFD', s)
 39 |         if unicodedata.category(c) != 'Mn')
 40 | 
 41 | 
 42 | # Lowercase, trim, and remove non-letter characters
 43 | 
 44 | 
 45 | def normalizeString(s):
 46 |     s = unicodeToAscii(s.lower().strip())
 47 |     s = re.sub(r"([.!?])", r" \1", s)
 48 |     s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
 49 |     return s
 50 | 
 51 | 
 52 | def readLangs(lang1, lang2, reverse=False):
 53 |     print("Reading lines...")
 54 | 
 55 |     # Read the file and split into lines
 56 |     lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
 57 |         read().strip().split('\n')
 58 | 
 59 |     # Split every line into pairs and normalize
 60 |     pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
 61 | 
 62 |     # Reverse pairs, make Lang instances
 63 |     if reverse:
 64 |         pairs = [list(reversed(p)) for p in pairs]
 65 |         input_lang = Lang(lang2)
 66 |         output_lang = Lang(lang1)
 67 |     else:
 68 |         input_lang = Lang(lang1)
 69 |         output_lang = Lang(lang2)
 70 | 
 71 |     return input_lang, output_lang, pairs
 72 | 
 73 | 
 74 | eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s",
 75 |                 "you are", "you re ", "we are", "we re ", "they are",
 76 |                 "they re ")
 77 | 
 78 | 
 79 | def filterPair(p):
 80 |     return len(p[0].split(' ')) < MAX_LENGTH and \
 81 |         len(p[1].split(' ')) < MAX_LENGTH and \
 82 |         p[1].startswith(eng_prefixes)
 83 | 
 84 | 
 85 | def filterPairs(pairs):
 86 |     return [pair for pair in pairs if filterPair(pair)]
 87 | 
 88 | 
 89 | def prepareData(lang1, lang2, reverse=False):
 90 |     input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
 91 |     print("Read %s sentence pairs" % len(pairs))
 92 |     pairs = filterPairs(pairs)
 93 |     print("Trimmed to %s sentence pairs" % len(pairs))
 94 |     print("Counting words...")
 95 |     for pair in pairs:
 96 |         input_lang.addSentence(pair[0])
 97 |         output_lang.addSentence(pair[1])
 98 |     print("Counted words:")
 99 |     print(input_lang.name, input_lang.n_words)
100 |     print(output_lang.name, output_lang.n_words)
101 |     print(random.choice(pairs))
102 |     return input_lang, output_lang, pairs
103 | 
104 | 
105 | def indexesFromSentence(lang, sentence):
106 |     return [lang.word2index[word] for word in sentence.split(' ')]
107 | 
108 | 
109 | def tensorFromSentence(lang, sentence):
110 |     indexes = indexesFromSentence(lang, sentence)
111 |     indexes.append(EOS_token)
112 |     result = torch.LongTensor(indexes)
113 |     return result
114 | 
115 | 
116 | def tensorFromPair(input_lang, output_lang, pair):
117 |     input_tensor = tensorFromSentence(input_lang, pair[0])
118 |     target_tensor = tensorFromSentence(output_lang, pair[1])
119 |     return input_tensor, target_tensor
120 | 
121 | 
122 | class TextDataset(Dataset):
123 |     def __init__(self, dataload=prepareData, lang=['eng', 'fra']):
124 |         self.input_lang, self.output_lang, self.pairs = dataload(
125 |             lang[0], lang[1], reverse=True)
126 |         self.input_lang_words = self.input_lang.n_words
127 |         self.output_lang_words = self.output_lang.n_words
128 | 
129 |     def __getitem__(self, index):
130 |         return tensorFromPair(self.input_lang, self.output_lang,
131 |                               self.pairs[index])
132 | 
133 |     def __len__(self):
134 |         return len(self.pairs)
135 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/seq2seq-translation/evaluate.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import torch
  4 | from torch.autograd import Variable
  5 | 
  6 | from dataset import TextDataset
  7 | from model.seq2seq import AttnDecoderRNN, DecoderRNN, EncoderRNN
  8 | import matplotlib.pyplot as plt
  9 | SOS_token = 0
 10 | EOS_token = 1
 11 | MAX_LENGTH = 10
 12 | use_attn = True
 13 | use_cuda = torch.cuda.is_available()
 14 | lang_dataset = TextDataset()
 15 | print('*' * 10)
 16 | 
 17 | 
 18 | def evaluate(encoder, decoder, in_lang, max_length=MAX_LENGTH):
 19 |     if use_cuda:
 20 |         in_lang = in_lang.cuda()
 21 |     input_variable = Variable(in_lang)
 22 |     input_variable = input_variable.unsqueeze(0)
 23 |     input_length = input_variable.size(1)
 24 |     encoder_hidden = encoder.initHidden()
 25 | 
 26 |     encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
 27 |     encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs
 28 | 
 29 |     for ei in range(input_length):
 30 |         encoder_output, encoder_hidden = encoder(input_variable[:, ei],
 31 |                                                  encoder_hidden)
 32 |         encoder_outputs[ei] = encoder_output[0][0]
 33 | 
 34 |     decoder_input = Variable(torch.LongTensor([[SOS_token]]))  # SOS
 35 |     decoder_input = decoder_input.cuda() if use_cuda else decoder_input
 36 | 
 37 |     decoder_hidden = encoder_hidden
 38 | 
 39 |     decoded_words = []
 40 |     decoder_attentions = torch.zeros(max_length, max_length)
 41 | 
 42 |     if use_attn:
 43 |         for di in range(max_length):
 44 |             decoder_output, decoder_hidden, decoder_attention = decoder(
 45 |                 decoder_input, decoder_hidden, encoder_outputs)
 46 |             decoder_attentions[di] = decoder_attention.data
 47 |             topv, topi = decoder_output.data.topk(1)
 48 |             ni = topi[0][0]
 49 |             if ni == EOS_token:
 50 |                 decoded_words.append('<EOS>')
 51 |                 break
 52 |             else:
 53 |                 decoded_words.append(lang_dataset.output_lang.index2word[ni])
 54 | 
 55 |             decoder_input = Variable(torch.LongTensor([[ni]]))
 56 |             decoder_input = decoder_input.cuda() if use_cuda else decoder_input
 57 |     else:
 58 |         for di in range(max_length):
 59 |             decoder_output, decoder_hidden = decoder(decoder_input,
 60 |                                                      decoder_hidden)
 61 |             topv, topi = decoder_output.data.topk(1)
 62 |             ni = topi[0][0]
 63 |             if ni == EOS_token:
 64 |                 decoded_words.append('<EOS>')
 65 |                 break
 66 |             else:
 67 |                 decoded_words.append(lang_dataset.output_lang.index2word[ni])
 68 | 
 69 |             decoder_input = Variable(torch.LongTensor([[ni]]))
 70 |             decoder_input = decoder_input.cuda() if use_cuda else decoder_input
 71 |     if use_attn:
 72 |         return decoded_words, decoder_attentions[:di + 1]
 73 |     else:
 74 |         return decoded_words
 75 | 
 76 | 
 77 | def evaluateRandomly(encoder, decoder, n=10):
 78 |     for i in range(n):
 79 |         pair_idx = random.choice(list(range(len(lang_dataset))))
 80 |         pair = lang_dataset.pairs[pair_idx]
 81 |         in_lang, out_lang = lang_dataset[pair_idx]
 82 |         print('>', pair[0])
 83 |         print('=', pair[1])
 84 |         if use_attn:
 85 |             output_words, attentions = evaluate(encoder, decoder, in_lang)
 86 |         else:
 87 |             output_words = evaluate(encoder, decoder, in_lang)
 88 |         output_sentence = ' '.join(output_words)
 89 |         print('<', output_sentence)
 90 |         print('')
 91 | 
 92 | 
 93 | input_size = lang_dataset.input_lang_words
 94 | hidden_size = 256
 95 | output_size = lang_dataset.output_lang_words
 96 | 
 97 | encoder = EncoderRNN(input_size, hidden_size)
 98 | encoder.load_state_dict(torch.load('./encoder.pth'))
 99 | if use_attn:
100 |     decoder = AttnDecoderRNN(hidden_size, output_size, n_layers=2)
101 |     decoder.load_state_dict(torch.load('./attn_decoder.pth'))
102 | else:
103 |     decoder = DecoderRNN(hidden_size, output_size, n_layers=2)
104 |     decoder.load_state_dict(torch.load('./decoder.pth'))
105 | 
106 | if use_cuda:
107 |     encoder = encoder.cuda()
108 |     decoder = decoder.cuda()
109 | 
110 | evaluateRandomly(encoder, decoder)
111 | 
112 | if use_attn:
113 |     pair_idx = random.choice(list(range(len(lang_dataset))))
114 |     pairs = lang_dataset.pairs[pair_idx]
115 |     print('>')
116 |     print(pairs[0])
117 |     in_lang, out_lang = lang_dataset[pair_idx]
118 |     output_words, attentions = evaluate(encoder, decoder, in_lang)
119 |     plt.matshow(attentions.cpu().numpy())
120 |     plt.show()


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/seq2seq-translation/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter10_Natural-Language-Process/seq2seq-translation/model/__init__.py


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/seq2seq-translation/model/seq2seq.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch import nn
  4 | from torch.autograd import Variable
  5 | 
  6 | MAX_LENGTH = 10
  7 | use_cuda = torch.cuda.is_available()
  8 | 
  9 | 
 10 | class EncoderRNN(nn.Module):
 11 |     def __init__(self, input_size, hidden_size, n_layers=1):
 12 |         super(EncoderRNN, self).__init__()
 13 |         self.n_layers = n_layers
 14 |         self.hidden_size = hidden_size
 15 | 
 16 |         self.embedding = nn.Embedding(input_size, hidden_size)
 17 |         self.gru = nn.GRU(hidden_size, hidden_size)
 18 | 
 19 |     def forward(self, input, hidden):
 20 |         input = input.unsqueeze(1)
 21 |         embedded = self.embedding(input)  # batch, hidden
 22 |         output = embedded.permute(1, 0, 2)
 23 |         for i in range(self.n_layers):
 24 |             output, hidden = self.gru(output, hidden)
 25 |         return output, hidden
 26 | 
 27 |     def initHidden(self):
 28 |         result = Variable(torch.zeros(1, 1, self.hidden_size))
 29 |         if use_cuda:
 30 |             return result.cuda()
 31 |         else:
 32 |             return result
 33 | 
 34 | 
 35 | class DecoderRNN(nn.Module):
 36 |     def __init__(self, hidden_size, output_size, n_layers=1):
 37 |         super(DecoderRNN, self).__init__()
 38 |         self.n_layers = n_layers
 39 |         self.hidden_size = hidden_size
 40 | 
 41 |         self.embedding = nn.Embedding(output_size, hidden_size)
 42 |         self.gru = nn.GRU(hidden_size, hidden_size)
 43 |         self.out = nn.Linear(hidden_size, output_size)
 44 |         self.softmax = nn.LogSoftmax()
 45 | 
 46 |     def forward(self, input, hidden):
 47 |         output = self.embedding(input)  # batch, 1, hidden
 48 |         output = output.permute(1, 0, 2)  # 1, batch, hidden
 49 |         for i in range(self.n_layers):
 50 |             output = F.relu(output)
 51 |             output, hidden = self.gru(output, hidden)
 52 |         output = self.softmax(self.out(output[0]))
 53 |         return output, hidden
 54 | 
 55 |     def initHidden(self):
 56 |         result = Variable(torch.zeros(1, 1, self.hidden_size))
 57 |         if use_cuda:
 58 |             return result.cuda()
 59 |         else:
 60 |             return result
 61 | 
 62 | 
 63 | class AttnDecoderRNN(nn.Module):
 64 |     def __init__(self,
 65 |                  hidden_size,
 66 |                  output_size,
 67 |                  n_layers=1,
 68 |                  dropout_p=0.1,
 69 |                  max_length=MAX_LENGTH):
 70 |         super(AttnDecoderRNN, self).__init__()
 71 |         self.hidden_size = hidden_size
 72 |         self.output_size = output_size
 73 |         self.n_layers = n_layers
 74 |         self.dropout_p = dropout_p
 75 |         self.max_length = max_length
 76 | 
 77 |         self.embedding = nn.Embedding(self.output_size, self.hidden_size)
 78 |         self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
 79 |         self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
 80 |         self.dropout = nn.Dropout(self.dropout_p)
 81 |         self.gru = nn.GRU(self.hidden_size, self.hidden_size)
 82 |         self.out = nn.Linear(self.hidden_size, self.output_size)
 83 | 
 84 |     def forward(self, input, hidden, encoder_outputs):
 85 |         '''
 86 |         input: batch, 1
 87 |         hidden: 1, batch, hidden
 88 |         encoder_outputs: length, hidden
 89 |         '''
 90 |         embedded = self.embedding(input)  # batch, 1, hidden
 91 |         embedded = self.dropout(embedded)
 92 |         embedded = embedded.squeeze(1)  # batch, hidden
 93 | 
 94 |         attn_weights = F.softmax(
 95 |             self.attn(torch.cat((embedded, hidden[0]), 1)))
 96 |         # batch, max_length
 97 |         encoder_outputs = encoder_outputs.unsqueeze(0)
 98 |         # batch, max_length, hidden
 99 |         attn_applied = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs)
100 |         # batch, 1, hidden
101 |         output = torch.cat((embedded, attn_applied.squeeze(1)), 1)
102 |         # batch, 2xhidden
103 |         output = self.attn_combine(output).unsqueeze(0)
104 |         #1, batch, hidden
105 | 
106 |         for i in range(self.n_layers):
107 |             output = F.relu(output)
108 |             output, hidden = self.gru(output, hidden)
109 | 
110 |         output = F.log_softmax(self.out(output.squeeze(0)))
111 |         return output, hidden, attn_weights
112 | 
113 |     def initHidden(self):
114 |         result = Variable(torch.zeros(1, 1, self.hidden_size))
115 |         if use_cuda:
116 |             return result.cuda()
117 |         else:
118 |             return result
119 | 


--------------------------------------------------------------------------------
/chapter10_Natural-Language-Process/seq2seq-translation/train.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import torch
  6 | from torch import nn, optim
  7 | from torch.autograd import Variable
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | from dataset import TextDataset
 11 | from model.seq2seq import AttnDecoderRNN, DecoderRNN, EncoderRNN
 12 | 
 13 | SOS_token = 0
 14 | EOS_token = 1
 15 | MAX_LENGTH = 10
 16 | lang_dataset = TextDataset()
 17 | lang_dataloader = DataLoader(lang_dataset, shuffle=True)
 18 | print()
 19 | 
 20 | input_size = lang_dataset.input_lang_words
 21 | hidden_size = 256
 22 | output_size = lang_dataset.output_lang_words
 23 | total_epoch = 20
 24 | 
 25 | encoder = EncoderRNN(input_size, hidden_size)
 26 | decoder = DecoderRNN(hidden_size, output_size, n_layers=2)
 27 | attn_decoder = AttnDecoderRNN(hidden_size, output_size, n_layers=2)
 28 | use_attn = True
 29 | 
 30 | if torch.cuda.is_available():
 31 |     encoder = encoder.cuda()
 32 |     decoder = decoder.cuda()
 33 |     attn_decoder = attn_decoder.cuda()
 34 | 
 35 | 
 36 | def showPlot(points):
 37 |     plt.figure()
 38 |     x = np.arange(len(points))
 39 |     plt.plot(x, points)
 40 |     plt.show()
 41 | 
 42 | 
 43 | def train(encoder, decoder, total_epoch, use_attn):
 44 | 
 45 |     param = list(encoder.parameters()) + list(decoder.parameters())
 46 |     optimizer = optim.Adam(param, lr=1e-3)
 47 |     criterion = nn.NLLLoss()
 48 |     plot_losses = []
 49 |     for epoch in range(total_epoch):
 50 |         since = time.time()
 51 |         running_loss = 0
 52 |         print_loss_total = 0
 53 |         total_loss = 0
 54 |         for i, data in enumerate(lang_dataloader):
 55 |             in_lang, out_lang = data
 56 |             if torch.cuda.is_available():
 57 |                 in_lang = in_lang.cuda()
 58 |                 out_lang = out_lang.cuda()
 59 |             in_lang = Variable(in_lang)  # batch=1, length
 60 |             out_lang = Variable(out_lang)
 61 | 
 62 |             encoder_outputs = Variable(
 63 |                 torch.zeros(MAX_LENGTH, encoder.hidden_size))
 64 |             if torch.cuda.is_available():
 65 |                 encoder_outputs = encoder_outputs.cuda()
 66 |             encoder_hidden = encoder.initHidden()
 67 |             for ei in range(in_lang.size(1)):
 68 |                 encoder_output, encoder_hidden = encoder(
 69 |                     in_lang[:, ei], encoder_hidden)
 70 |                 encoder_outputs[ei] = encoder_output[0][0]
 71 | 
 72 |             decoder_input = Variable(torch.LongTensor([[SOS_token]]))
 73 |             if torch.cuda.is_available():
 74 |                 decoder_input = decoder_input.cuda()
 75 |             decoder_hidden = encoder_hidden
 76 |             loss = 0
 77 |             if use_attn:
 78 |                 for di in range(out_lang.size(1)):
 79 |                     decoder_output, decoder_hidden, decoder_attention = attn_decoder(
 80 |                         decoder_input, decoder_hidden, encoder_outputs)
 81 |                     loss += criterion(decoder_output, out_lang[:, di])
 82 |                     topv, topi = decoder_output.data.topk(1)
 83 |                     ni = topi[0][0]
 84 | 
 85 |                     decoder_input = Variable(torch.LongTensor([[ni]]))
 86 |                     if torch.cuda.is_available():
 87 |                         decoder_input = decoder_input.cuda()
 88 |                     if ni == EOS_token:
 89 |                         break
 90 |             else:
 91 |                 for di in range(out_lang.size(1)):
 92 |                     decoder_output, decoder_hidden = decoder(
 93 |                         decoder_input, decoder_hidden)
 94 |                     loss += criterion(decoder_output, out_lang[:, di])
 95 |                     topv, topi = decoder_output.data.topk(1)
 96 |                     ni = topi[0][0]
 97 | 
 98 |                     decoder_input = Variable(torch.LongTensor([[ni]]))
 99 |                     if torch.cuda.is_available():
100 |                         decoder_input = decoder_input.cuda()
101 |                     if ni == EOS_token:
102 |                         break
103 |             optimizer.zero_grad()
104 |             loss.backward()
105 |             optimizer.step()
106 |             running_loss += loss.data[0]
107 |             print_loss_total += loss.data[0]
108 |             total_loss += loss.data[0]
109 |             if (i + 1) % 5000 == 0:
110 |                 print('{}/{}, Loss:{:.6f}'.format(
111 |                     i + 1, len(lang_dataloader), running_loss / 5000))
112 |                 running_loss = 0
113 |             if (i + 1) % 100 == 0:
114 |                 plot_loss = print_loss_total / 100
115 |                 plot_losses.append(plot_loss)
116 |                 print_loss_total = 0
117 |         during = time.time() - since
118 |         print('Finish {}/{} , Loss:{:.6f}, Time:{:.0f}s'.format(
119 |             epoch + 1, total_epoch, total_loss / len(lang_dataset), during))
120 |         print()
121 |     showPlot(plot_losses)
122 | 
123 | 
124 | if use_attn:
125 |     train(encoder, attn_decoder, total_epoch, use_attn=True)
126 | else:
127 |     train(encoder, decoder, total_epoch, use_attn=False)
128 | 
129 | print('finish training!')
130 | if use_attn:
131 |     torch.save(encoder.state_dict(), './encoder.pth')
132 |     torch.save(attn_decoder.state_dict(), './attn_decoder.pth')
133 | else:
134 |     torch.save(encoder.state_dict(), './encoder.pth')
135 |     torch.save(decoder.state_dict(), './decoder.pth')
136 | 


--------------------------------------------------------------------------------
/chapter2_PyTorch-Basics/PyTorch-introduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fmebdrkuawj30b3032a9w.jpg)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "# PyTorch 介绍"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "PyTorch 是由 Torch7 团队开源的，这也是Facebook 的 AI 研究团队发布了一个 Python 工具包，据该项目官网介绍，是一个 Python 优先的深度学习框架，能够在强大的 GPU 加速基础上实现张量和动态神经网络。\n",
 22 |     "\n",
 23 |     "- [官网](http://pytorch.org/)\n",
 24 |     "- [Github](https://github.com/pytorch/pytorch)\n",
 25 |     "\n",
 26 |     "目前除了 Facebook 之外，也有大量的机构正在使用 PyTorch\n",
 27 |     "\n",
 28 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fmebl3ayfij30kk0c2aac.jpg)"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "PyTorch 的前身是 Torch，其是一个十分老牌、对多维矩阵数据进行操作的张量（tensor ）库，在机器学习和其他数学密集型应用有广泛应用，但由于其语言采用 Lua，导致在国内一直很小众，如今使用 Python 语言强势归来，快速的赢得了大量使用者。\n",
 36 |     "\n",
 37 |     "PyTorch 提供了两种高层面的功能：\n",
 38 |     "- 使用强大的 GPU 加速的 Tensor 计算（类似 numpy）\n",
 39 |     "- 构建于基于 autograd 系统的深度神经网络\n",
 40 |     "\n",
 41 |     "所以使用 PyTorch 的原因通常有两个：\n",
 42 |     "- 作为 numpy 的替代，以便使用强大的 GPU 加速；\n",
 43 |     "- 将其作为一个能提供最大灵活性和速度的深度学习研究平台"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "PyTorch 作为一个 Python 优先的动态图框架，有下面几个特点"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "### Python 优先\n",
 58 |     "PyTorch 不是简单地在整体 C++ 框架上绑定 Python，他深入构建在 Python 之上，你可以像使用 numpy/scipy/scikit-learn 那样轻松地使用 PyTorch，也可以用你喜欢的库和包在 PyTorch 中编写新的神经网络层，尽量让你不用重新发明轮子。\n",
 59 |     "\n",
 60 |     "### 命令式体验\n",
 61 |     "PyTorch 的设计思路是线性、直观且易于使用。当你需要执行一行代码时，它会忠实执行。PyTorch 没有异步的世界观。当你打开调试器，或接收到错误代码和 stack trace 时，你会发现理解这些信息是非常轻松的。Stack-trace 点将会直接指向代码定义的确切位置。我们不希望你在 debug 时会因为错误的指向或异步和不透明的引擎而浪费时间。\n",
 62 |     "\n",
 63 |     "### 快速精益\n",
 64 |     "PyTorch 具有轻巧的框架，集成了各种加速库，如 Intel MKL、英伟达的 CuDNN 和 NCCL 来优化速度。在其核心，它的 CPU 和 GPU Tensor 与神经网络后端（TH、THC、THNN、THCUNN）被编写成了独立的库，带有 C99 API。"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "## 安装\n",
 72 |     "PyTorch 的安装非常方便，可以使用 Anaconda 进行安装，也可以使用 pip 进行安装，比如\n",
 73 |     "\n",
 74 |     "使用 conda 进行安装   \n",
 75 |     "`conda install pytorch torchvision -c pytorch`\n",
 76 |     "\n",
 77 |     "或者使用 pip   \n",
 78 |     "`pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl \n",
 79 |     "pip install torchvision`\n",
 80 |     "\n",
 81 |     "目前只支持 Mac OSX 和 Linux 系统，Windows 系统在不久之后也会支持，更多详细信息可以访问[官网](http://pytorch.org/)"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "mx",
 88 |    "language": "python",
 89 |    "name": "mx"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.6.0"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 2
106 | }
107 | 


--------------------------------------------------------------------------------
/chapter2_PyTorch-Basics/dynamic-graph.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 动态图和静态图\n",
  8 |     "目前神经网络框架分为静态图框架和动态图框架，PyTorch 和 TensorFlow、Caffe 等框架最大的区别就是他们拥有不同的计算图表现形式。 TensorFlow 使用静态图，这意味着我们先定义计算图，然后不断使用它，而在 PyTorch 中，每次都会重新构建一个新的计算图。通过这次课程，我们会了解静态图和动态图之间的优缺点。\n",
  9 |     "\n",
 10 |     "对于使用者来说，两种形式的计算图有着非常大的区别，同时静态图和动态图都有他们各自的优点，比如动态图比较方便debug，使用者能够用任何他们喜欢的方式进行debug，同时非常直观，而静态图是通过先定义后运行的方式，之后再次运行的时候就不再需要重新构建计算图，所以速度会比动态图更快。"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "![](https://ws3.sinaimg.cn/large/006tNc79ly1fmai482qumg30rs0fmq6e.gif)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "下面我们比较 while 循环语句在 TensorFlow 和 PyTorch 中的定义"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## TensorFlow"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 1,
 37 |    "metadata": {
 38 |     "collapsed": true
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# tensorflow\n",
 43 |     "import tensorflow as tf\n",
 44 |     "\n",
 45 |     "first_counter = tf.constant(0)\n",
 46 |     "second_counter = tf.constant(10)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "def cond(first_counter, second_counter, *args):\n",
 58 |     "    return first_counter < second_counter\n",
 59 |     "\n",
 60 |     "def body(first_counter, second_counter):\n",
 61 |     "    first_counter = tf.add(first_counter, 2)\n",
 62 |     "    second_counter = tf.add(second_counter, 1)\n",
 63 |     "    return first_counter, second_counter"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "c1, c2 = tf.while_loop(cond, body, [first_counter, second_counter])"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 4,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "with tf.Session() as sess:\n",
 86 |     "    counter_1_res, counter_2_res = sess.run([c1, c2])"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {
 93 |     "collapsed": false
 94 |    },
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "20\n",
101 |       "20\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "print(counter_1_res)\n",
107 |     "print(counter_2_res)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "可以看到 TensorFlow 需要将整个图构建成静态的，换句话说，每次运行的时候图都是一样的，是不能够改变的，所以不能直接使用 Python 的 while 循环语句，需要使用辅助函数 `tf.while_loop` 写成 TensorFlow 内部的形式\n",
115 |     "\n",
116 |     "这是非常反直觉的，学习成本也是比较高的\n",
117 |     "\n",
118 |     "下面我们来看看 PyTorch 的动态图机制，这使得我们能够使用 Python 的 while 写循环，非常方便"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "## PyTorch"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 6,
131 |    "metadata": {
132 |     "collapsed": true
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "# pytorch\n",
137 |     "import torch\n",
138 |     "first_counter = torch.Tensor([0])\n",
139 |     "second_counter = torch.Tensor([10])"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 11,
145 |    "metadata": {
146 |     "collapsed": false
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "while (first_counter < second_counter)[0]:\n",
151 |     "    first_counter += 2\n",
152 |     "    second_counter += 1"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 12,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "\n",
167 |       " 20\n",
168 |       "[torch.FloatTensor of size 1]\n",
169 |       "\n",
170 |       "\n",
171 |       " 20\n",
172 |       "[torch.FloatTensor of size 1]\n",
173 |       "\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "print(first_counter)\n",
179 |     "print(second_counter)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "可以看到 PyTorch 的写法跟 Python 的写法是完全一致的，没有任何额外的学习成本\n",
187 |     "\n",
188 |     "上面的例子展示如何使用静态图和动态图构建 while 循环，看起来动态图的方式更加简单且直观，你觉得呢？"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "kernelspec": {
194 |    "display_name": "mx",
195 |    "language": "python",
196 |    "name": "mx"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.6.0"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 2
213 | }
214 | 


--------------------------------------------------------------------------------
/chapter3_NN/bp.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 反向传播算法\n",
  8 |     "\n",
  9 |     "前面我们介绍了三个模型，整个处理的基本流程都是定义模型，读入数据，给出损失函数$f$，通过梯度下降法更新参数。PyTorch 提供了非常简单的自动求导帮助我们求解导数，对于比较简单的模型，我们也能手动求出参数的梯度，但是对于非常复杂的模型，比如一个 100 层的网络，我们如何能够有效地手动求出这个梯度呢？这里就需要引入反向传播算法，自动求导本质是就是一个反向传播算法。\n",
 10 |     "\n",
 11 |     "反向传播算法是一个有效地求解梯度的算法，本质上其实就是一个链式求导法则的应用，然而这个如此简单而且显而易见的方法却是在 Roseblatt 提出感知机算法后将近 30 年才被发明和普及的，对此 Bengio 这样说道：“很多看似显而易见的想法只有在事后才变得的显而易见。”\n",
 12 |     "\n",
 13 |     "下面我们就来详细将一讲什么是反向传播算法。"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "## 链式法则\n",
 21 |     "\n",
 22 |     "首先来简单地介绍一下链式法则，考虑一个简单的函数，比如\n",
 23 |     "$$f(x, y, z) = (x + y)z$$\n",
 24 |     "\n",
 25 |     "我们当然可以直接求出这个函数的微分，但是这里我们要使用链式法则，令\n",
 26 |     "$$q=x+y$$\n",
 27 |     "\n",
 28 |     "那么\n",
 29 |     "\n",
 30 |     "$$f = qz$$\n",
 31 |     "\n",
 32 |     "对于这两个式子，我们可以分别求出他们的微分 \n",
 33 |     "\n",
 34 |     "$$\\frac{\\partial f}{\\partial q} = z, \\frac{\\partial f}{\\partial z}=q$$\n",
 35 |     "\n",
 36 |     "同时$q$是$x$和$y$的求和，所以我们能够得到\n",
 37 |     "\n",
 38 |     "$$\\frac{\\partial q}{x} = 1, \\frac{\\partial q}{y} = 1$$\n",
 39 |     "\n",
 40 |     "我们关心的问题是\n",
 41 |     "\n",
 42 |     "$$\\frac{\\partial f}{\\partial x}, \\frac{\\partial f}{\\partial y}, \\frac{\\partial f}{\\partial z}$$\n",
 43 |     "\n",
 44 |     "链式法则告诉我们如何来计算出他们的值\n",
 45 |     "\n",
 46 |     "$$\n",
 47 |     "\\frac{\\partial f}{\\partial x} = \\frac{\\partial f}{\\partial q}\\frac{\\partial q}{\\partial x}\n",
 48 |     "$$\n",
 49 |     "$$\n",
 50 |     "\\frac{\\partial f}{\\partial y} = \\frac{\\partial f}{\\partial q}\\frac{\\partial q}{\\partial y}\n",
 51 |     "$$\n",
 52 |     "$$\n",
 53 |     "\\frac{\\partial f}{\\partial z} = q\n",
 54 |     "$$\n",
 55 |     "\n",
 56 |     "通过链式法则我们知道如果我们需要对其中的元素求导，那么我们可以一层一层求导然后将结果乘起来，这就是链式法则的核心，也是反向传播算法的核心，更多关于链式法则的算法，可以访问这个[文档](https://zh.wikipedia.org/wiki/%E9%93%BE%E5%BC%8F%E6%B3%95%E5%88%99)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "## 反向传播算法\n",
 64 |     "\n",
 65 |     "了解了链式法则，我们就可以开始介绍反向传播算法了，本质上反向传播算法只是链式法则的一个应用。我们还是使用之前那个相同的例子$q=x+y, f=qz$，通过计算图可以将这个计算过程表达出来\n",
 66 |     "\n",
 67 |     "![](https://ws1.sinaimg.cn/large/006tNc79ly1fmiozcinyzj30c806vglk.jpg)\n",
 68 |     "\n",
 69 |     "上面绿色的数字表示其数值，下面红色的数字表示求出的梯度，我们可以一步一步看看反向传播算法的实现。首先从最后开始，梯度当然是1，然后计算\n",
 70 |     "\n",
 71 |     "$$\\frac{\\partial f}{\\partial q} = z = -4,\\ \\frac{\\partial f}{\\partial z} = q = 3$$\n",
 72 |     "\n",
 73 |     "接着我们计算\n",
 74 |     "$$\\frac{\\partial f}{\\partial x} = \\frac{\\partial f}{\\partial q} \\frac{\\partial q}{\\partial x} = -4 \\times 1 = -4,\\ \\frac{\\partial f}{\\partial y} = \\frac{\\partial f}{\\partial q} \\frac{\\partial q}{\\partial y} = -4 \\times 1 = -4$$\n",
 75 |     "\n",
 76 |     "这样一步一步我们就求出了$\\nabla f(x, y, z)$。\n",
 77 |     "\n",
 78 |     "直观上看反向传播算法是一个优雅的局部过程，每次求导只是对当前的运算求导，求解每层网络的参数都是通过链式法则将前面的结果求出不断迭代到这一层，所以说这是一个传播过程\n",
 79 |     "\n",
 80 |     "### Sigmoid函数举例\n",
 81 |     "\n",
 82 |     "下面我们通过Sigmoid函数来演示反向传播过程在一个复杂的函数上是如何进行的。\n",
 83 |     "\n",
 84 |     "$$\n",
 85 |     "f(w, x) = \\frac{1}{1+e^{-(w_0 x_0 + w_1 x_1 + w_2)}}\n",
 86 |     "$$\n",
 87 |     "\n",
 88 |     "我们需要求解出\n",
 89 |     "$$\\frac{\\partial f}{\\partial w_0}, \\frac{\\partial f}{\\partial w_1}, \\frac{\\partial f}{\\partial w_2}$$\n",
 90 |     "\n",
 91 |     "首先我们将这个函数抽象成一个计算图来表示，即\n",
 92 |     "$$\n",
 93 |     "   f(x) = \\frac{1}{x} \\\\\n",
 94 |     "   f_c(x) = 1 + x \\\\\n",
 95 |     "   f_e(x) = e^x \\\\\n",
 96 |     "   f_w(x) = -(w_0 x_0 + w_1 x_1 + w_2)\n",
 97 |     "$$\n",
 98 |     "\n",
 99 |     "这样我们就能够画出下面的计算图\n",
100 |     "\n",
101 |     "![](https://ws1.sinaimg.cn/large/006tNc79ly1fmip1va5qjj30lb08e0t0.jpg)\n",
102 |     "\n",
103 |     "同样上面绿色的数子表示数值，下面红色的数字表示梯度，我们从后往前计算一下各个参数的梯度。首先最后面的梯度是1,，然后经过$\\frac{1}{x}$这个函数，这个函数的梯度是$-\\frac{1}{x^2}$，所以往前传播的梯度是$1 \\times -\\frac{1}{1.37^2} = -0.53$，然后是$+1$这个操作，梯度不变，接着是$e^x$这个运算，它的梯度就是$-0.53 \\times e^{-1} = -0.2$，这样不断往后传播就能够求得每个参数的梯度。"
104 |    ]
105 |   }
106 |  ],
107 |  "metadata": {
108 |   "kernelspec": {
109 |    "display_name": "mx",
110 |    "language": "python",
111 |    "name": "mx"
112 |   },
113 |   "language_info": {
114 |    "codemirror_mode": {
115 |     "name": "ipython",
116 |     "version": 3
117 |    },
118 |    "file_extension": ".py",
119 |    "mimetype": "text/x-python",
120 |    "name": "python",
121 |    "nbconvert_exporter": "python",
122 |    "pygments_lexer": "ipython3",
123 |    "version": "3.6.0"
124 |   }
125 |  },
126 |  "nbformat": 4,
127 |  "nbformat_minor": 2
128 | }
129 | 


--------------------------------------------------------------------------------
/chapter3_NN/logistic-regression/data.txt:
--------------------------------------------------------------------------------
  1 | 34.62365962451697,78.0246928153624,0
  2 | 30.28671076822607,43.89499752400101,0
  3 | 35.84740876993872,72.90219802708364,0
  4 | 60.18259938620976,86.30855209546826,1
  5 | 79.0327360507101,75.3443764369103,1
  6 | 45.08327747668339,56.3163717815305,0
  7 | 61.10666453684766,96.51142588489624,1
  8 | 75.02474556738889,46.55401354116538,1
  9 | 76.09878670226257,87.42056971926803,1
 10 | 84.43281996120035,43.53339331072109,1
 11 | 95.86155507093572,38.22527805795094,0
 12 | 75.01365838958247,30.60326323428011,0
 13 | 82.30705337399482,76.48196330235604,1
 14 | 69.36458875970939,97.71869196188608,1
 15 | 39.53833914367223,76.03681085115882,0
 16 | 53.9710521485623,89.20735013750205,1
 17 | 69.07014406283025,52.74046973016765,1
 18 | 67.94685547711617,46.67857410673128,0
 19 | 70.66150955499435,92.92713789364831,1
 20 | 76.97878372747498,47.57596364975532,1
 21 | 67.37202754570876,42.83843832029179,0
 22 | 89.67677575072079,65.79936592745237,1
 23 | 50.534788289883,48.85581152764205,0
 24 | 34.21206097786789,44.20952859866288,0
 25 | 77.9240914545704,68.9723599933059,1
 26 | 62.27101367004632,69.95445795447587,1
 27 | 80.1901807509566,44.82162893218353,1
 28 | 93.114388797442,38.80067033713209,0
 29 | 61.83020602312595,50.25610789244621,0
 30 | 38.78580379679423,64.99568095539578,0
 31 | 61.379289447425,72.80788731317097,1
 32 | 85.40451939411645,57.05198397627122,1
 33 | 52.10797973193984,63.12762376881715,0
 34 | 52.04540476831827,69.43286012045222,1
 35 | 40.23689373545111,71.16774802184875,0
 36 | 54.63510555424817,52.21388588061123,0
 37 | 33.91550010906887,98.86943574220611,0
 38 | 64.17698887494485,80.90806058670817,1
 39 | 74.78925295941542,41.57341522824434,0
 40 | 34.1836400264419,75.2377203360134,0
 41 | 83.90239366249155,56.30804621605327,1
 42 | 51.54772026906181,46.85629026349976,0
 43 | 94.44336776917852,65.56892160559052,1
 44 | 82.36875375713919,40.61825515970618,0
 45 | 51.04775177128865,45.82270145776001,0
 46 | 62.22267576120188,52.06099194836679,0
 47 | 77.19303492601364,70.45820000180959,1
 48 | 97.77159928000232,86.7278223300282,1
 49 | 62.07306379667647,96.76882412413983,1
 50 | 91.56497449807442,88.69629254546599,1
 51 | 79.94481794066932,74.16311935043758,1
 52 | 99.2725269292572,60.99903099844988,1
 53 | 90.54671411399852,43.39060180650027,1
 54 | 34.52451385320009,60.39634245837173,0
 55 | 50.2864961189907,49.80453881323059,0
 56 | 49.58667721632031,59.80895099453265,0
 57 | 97.64563396007767,68.86157272420604,1
 58 | 32.57720016809309,95.59854761387875,0
 59 | 74.24869136721598,69.82457122657193,1
 60 | 71.79646205863379,78.45356224515052,1
 61 | 75.3956114656803,85.75993667331619,1
 62 | 35.28611281526193,47.02051394723416,0
 63 | 56.25381749711624,39.26147251058019,0
 64 | 30.05882244669796,49.59297386723685,0
 65 | 44.66826172480893,66.45008614558913,0
 66 | 66.56089447242954,41.09209807936973,0
 67 | 40.45755098375164,97.53518548909936,1
 68 | 49.07256321908844,51.88321182073966,0
 69 | 80.27957401466998,92.11606081344084,1
 70 | 66.74671856944039,60.99139402740988,1
 71 | 32.72283304060323,43.30717306430063,0
 72 | 64.0393204150601,78.03168802018232,1
 73 | 72.34649422579923,96.22759296761404,1
 74 | 60.45788573918959,73.09499809758037,1
 75 | 58.84095621726802,75.85844831279042,1
 76 | 99.82785779692128,72.36925193383885,1
 77 | 47.26426910848174,88.47586499559782,1
 78 | 50.45815980285988,75.80985952982456,1
 79 | 60.45555629271532,42.50840943572217,0
 80 | 82.22666157785568,42.71987853716458,0
 81 | 88.9138964166533,69.80378889835472,1
 82 | 94.83450672430196,45.69430680250754,1
 83 | 67.31925746917527,66.58935317747915,1
 84 | 57.23870631569862,59.51428198012956,1
 85 | 80.36675600171273,90.96014789746954,1
 86 | 68.46852178591112,85.59430710452014,1
 87 | 42.0754545384731,78.84478600148043,0
 88 | 75.47770200533905,90.42453899753964,1
 89 | 78.63542434898018,96.64742716885644,1
 90 | 52.34800398794107,60.76950525602592,0
 91 | 94.09433112516793,77.15910509073893,1
 92 | 90.44855097096364,87.50879176484702,1
 93 | 55.48216114069585,35.57070347228866,0
 94 | 74.49269241843041,84.84513684930135,1
 95 | 89.84580670720979,45.35828361091658,1
 96 | 83.48916274498238,48.38028579728175,1
 97 | 42.2617008099817,87.10385094025457,1
 98 | 99.31500880510394,68.77540947206617,1
 99 | 55.34001756003703,64.9319380069486,1
100 | 74.77589300092767,89.52981289513276,1
101 | 


--------------------------------------------------------------------------------
/chapter4_CNN/cat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter4_CNN/cat.png


--------------------------------------------------------------------------------
/chapter4_CNN/regularization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 正则化\n",
  8 |     "前面我们讲了数据增强和 dropout，而在实际使用中，现在的网络往往不使用 dropout，而是用另外一个技术，叫正则化。\n",
  9 |     "\n",
 10 |     "正则化是机器学习中提出来的一种方法，有 L1 和 L2 正则化，目前使用较多的是 L2 正则化，引入正则化相当于在 loss 函数上面加上一项，比如\n",
 11 |     "\n",
 12 |     "$$\n",
 13 |     "f = loss + \\lambda \\sum_{p \\in params} ||p||_2^2\n",
 14 |     "$$\n",
 15 |     "\n",
 16 |     "就是在 loss 的基础上加上了参数的二范数作为一个正则化，我们在训练网络的时候，不仅要最小化 loss 函数，同时还要最小化参数的二范数，也就是说我们会对参数做一些限制，不让它变得太大。"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "如果我们对新的损失函数 f 求导进行梯度下降，就有\n",
 24 |     "\n",
 25 |     "$$\n",
 26 |     "\\frac{\\partial f}{\\partial p_j} = \\frac{\\partial loss}{\\partial p_j} + 2 \\lambda p_j\n",
 27 |     "$$\n",
 28 |     "\n",
 29 |     "那么在更新参数的时候就有\n",
 30 |     "\n",
 31 |     "$$\n",
 32 |     "p_j \\rightarrow p_j - \\eta (\\frac{\\partial loss}{\\partial p_j} + 2 \\lambda p_j) = p_j - \\eta \\frac{\\partial loss}{\\partial p_j} - 2 \\eta \\lambda p_j \n",
 33 |     "$$\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "可以看到 $p_j - \\eta \\frac{\\partial loss}{\\partial p_j}$ 和没加正则项要更新的部分一样，而后面的 $2\\eta \\lambda p_j$ 就是正则项的影响，可以看到加完正则项之后会对参数做更大程度的更新，这也被称为权重衰减(weight decay)，在 pytorch 中正则项就是通过这种方式来加入的，比如想在随机梯度下降法中使用正则项，或者说权重衰减，`torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-4)` 就可以了，这个 `weight_decay` 系数就是上面公式中的 $\\lambda$，非常方便\n",
 41 |     "\n",
 42 |     "注意正则项的系数的大小非常重要，如果太大，会极大的抑制参数的更新，导致欠拟合，如果太小，那么正则项这个部分基本没有贡献，所以选择一个合适的权重衰减系数非常重要，这个需要根据具体的情况去尝试，初步尝试可以使用 `1e-4` 或者 `1e-3` \n",
 43 |     "\n",
 44 |     "下面我们在训练 cifar 10 中添加正则项"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 1,
 50 |    "metadata": {
 51 |     "ExecuteTime": {
 52 |      "end_time": "2017-12-24T08:02:11.903459Z",
 53 |      "start_time": "2017-12-24T08:02:11.383170Z"
 54 |     },
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "import sys\n",
 60 |     "sys.path.append('..')\n",
 61 |     "\n",
 62 |     "import numpy as np\n",
 63 |     "import torch\n",
 64 |     "from torch import nn\n",
 65 |     "import torch.nn.functional as F\n",
 66 |     "from torch.autograd import Variable\n",
 67 |     "from torchvision.datasets import CIFAR10\n",
 68 |     "from utils import train, resnet\n",
 69 |     "from torchvision import transforms as tfs"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "metadata": {
 76 |     "ExecuteTime": {
 77 |      "end_time": "2017-12-24T08:02:13.120502Z",
 78 |      "start_time": "2017-12-24T08:02:11.905617Z"
 79 |     },
 80 |     "collapsed": true
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "def data_tf(x):\n",
 85 |     "    im_aug = tfs.Compose([\n",
 86 |     "        tfs.Resize(96),\n",
 87 |     "        tfs.ToTensor(),\n",
 88 |     "        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n",
 89 |     "    ])\n",
 90 |     "    x = im_aug(x)\n",
 91 |     "    return x\n",
 92 |     "\n",
 93 |     "train_set = CIFAR10('./data', train=True, transform=data_tf)\n",
 94 |     "train_data = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=4)\n",
 95 |     "test_set = CIFAR10('./data', train=False, transform=data_tf)\n",
 96 |     "test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False, num_workers=4)\n",
 97 |     "\n",
 98 |     "net = resnet(3, 10)\n",
 99 |     "optimizer = torch.optim.SGD(net.parameters(), lr=0.01, weight_decay=1e-4) # 增加正则项\n",
100 |     "criterion = nn.CrossEntropyLoss()"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 3,
106 |    "metadata": {
107 |     "ExecuteTime": {
108 |      "end_time": "2017-12-24T08:11:36.106177Z",
109 |      "start_time": "2017-12-24T08:02:13.122785Z"
110 |     }
111 |    },
112 |    "outputs": [
113 |     {
114 |      "name": "stdout",
115 |      "output_type": "stream",
116 |      "text": [
117 |       "Epoch 0. Train Loss: 1.429834, Train Acc: 0.476982, Valid Loss: 1.261334, Valid Acc: 0.546776, Time 00:00:26\n",
118 |       "Epoch 1. Train Loss: 0.994539, Train Acc: 0.645400, Valid Loss: 1.310620, Valid Acc: 0.554688, Time 00:00:27\n",
119 |       "Epoch 2. Train Loss: 0.788570, Train Acc: 0.723585, Valid Loss: 1.256101, Valid Acc: 0.577433, Time 00:00:28\n",
120 |       "Epoch 3. Train Loss: 0.629832, Train Acc: 0.780411, Valid Loss: 1.222015, Valid Acc: 0.609474, Time 00:00:27\n",
121 |       "Epoch 4. Train Loss: 0.500406, Train Acc: 0.825288, Valid Loss: 0.831702, Valid Acc: 0.720332, Time 00:00:27\n",
122 |       "Epoch 5. Train Loss: 0.388376, Train Acc: 0.868646, Valid Loss: 0.829582, Valid Acc: 0.726760, Time 00:00:27\n",
123 |       "Epoch 6. Train Loss: 0.291237, Train Acc: 0.902094, Valid Loss: 1.499777, Valid Acc: 0.623714, Time 00:00:28\n",
124 |       "Epoch 7. Train Loss: 0.222401, Train Acc: 0.925072, Valid Loss: 1.832660, Valid Acc: 0.558643, Time 00:00:28\n",
125 |       "Epoch 8. Train Loss: 0.157753, Train Acc: 0.947990, Valid Loss: 1.255313, Valid Acc: 0.668117, Time 00:00:28\n",
126 |       "Epoch 9. Train Loss: 0.111407, Train Acc: 0.963595, Valid Loss: 1.004693, Valid Acc: 0.724782, Time 00:00:27\n",
127 |       "Epoch 10. Train Loss: 0.084960, Train Acc: 0.972926, Valid Loss: 0.867961, Valid Acc: 0.775119, Time 00:00:27\n",
128 |       "Epoch 11. Train Loss: 0.066854, Train Acc: 0.979280, Valid Loss: 1.011263, Valid Acc: 0.749604, Time 00:00:28\n",
129 |       "Epoch 12. Train Loss: 0.048280, Train Acc: 0.985534, Valid Loss: 2.438345, Valid Acc: 0.576938, Time 00:00:27\n",
130 |       "Epoch 13. Train Loss: 0.046176, Train Acc: 0.985614, Valid Loss: 1.008425, Valid Acc: 0.756527, Time 00:00:27\n",
131 |       "Epoch 14. Train Loss: 0.039515, Train Acc: 0.988411, Valid Loss: 0.945017, Valid Acc: 0.766317, Time 00:00:27\n",
132 |       "Epoch 15. Train Loss: 0.025882, Train Acc: 0.992667, Valid Loss: 0.918691, Valid Acc: 0.784217, Time 00:00:27\n",
133 |       "Epoch 16. Train Loss: 0.018592, Train Acc: 0.994985, Valid Loss: 1.507427, Valid Acc: 0.680281, Time 00:00:27\n",
134 |       "Epoch 17. Train Loss: 0.021062, Train Acc: 0.994246, Valid Loss: 2.976452, Valid Acc: 0.558940, Time 00:00:27\n",
135 |       "Epoch 18. Train Loss: 0.021458, Train Acc: 0.993926, Valid Loss: 0.927871, Valid Acc: 0.785898, Time 00:00:27\n",
136 |       "Epoch 19. Train Loss: 0.015656, Train Acc: 0.995824, Valid Loss: 0.962502, Valid Acc: 0.782832, Time 00:00:27\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "from utils import train\n",
142 |     "train(net, train_data, test_data, 20, optimizer, criterion)"
143 |    ]
144 |   }
145 |  ],
146 |  "metadata": {
147 |   "kernelspec": {
148 |    "display_name": "Python 3",
149 |    "language": "python",
150 |    "name": "python3"
151 |   },
152 |   "language_info": {
153 |    "codemirror_mode": {
154 |     "name": "ipython",
155 |     "version": 3
156 |    },
157 |    "file_extension": ".py",
158 |    "mimetype": "text/x-python",
159 |    "name": "python",
160 |    "nbconvert_exporter": "python",
161 |    "pygments_lexer": "ipython3",
162 |    "version": "3.6.3"
163 |   }
164 |  },
165 |  "nbformat": 4,
166 |  "nbformat_minor": 2
167 | }
168 | 


--------------------------------------------------------------------------------
/chapter4_CNN/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | def get_acc(output, label):
 10 |     total = output.shape[0]
 11 |     _, pred_label = output.max(1)
 12 |     num_correct = (pred_label == label).sum().data[0]
 13 |     return num_correct / total
 14 | 
 15 | 
 16 | def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
 17 |     if torch.cuda.is_available():
 18 |         net = net.cuda()
 19 |     prev_time = datetime.now()
 20 |     for epoch in range(num_epochs):
 21 |         train_loss = 0
 22 |         train_acc = 0
 23 |         net = net.train()
 24 |         for im, label in train_data:
 25 |             if torch.cuda.is_available():
 26 |                 im = Variable(im.cuda())  # (bs, 3, h, w)
 27 |                 label = Variable(label.cuda())  # (bs, h, w)
 28 |             else:
 29 |                 im = Variable(im)
 30 |                 label = Variable(label)
 31 |             # forward
 32 |             output = net(im)
 33 |             loss = criterion(output, label)
 34 |             # backward
 35 |             optimizer.zero_grad()
 36 |             loss.backward()
 37 |             optimizer.step()
 38 | 
 39 |             train_loss += loss.data[0]
 40 |             train_acc += get_acc(output, label)
 41 | 
 42 |         cur_time = datetime.now()
 43 |         h, remainder = divmod((cur_time - prev_time).seconds, 3600)
 44 |         m, s = divmod(remainder, 60)
 45 |         time_str = "Time %02d:%02d:%02d" % (h, m, s)
 46 |         if valid_data is not None:
 47 |             valid_loss = 0
 48 |             valid_acc = 0
 49 |             net = net.eval()
 50 |             for im, label in valid_data:
 51 |                 if torch.cuda.is_available():
 52 |                     im = Variable(im.cuda(), volatile=True)
 53 |                     label = Variable(label.cuda(), volatile=True)
 54 |                 else:
 55 |                     im = Variable(im, volatile=True)
 56 |                     label = Variable(label, volatile=True)
 57 |                 output = net(im)
 58 |                 loss = criterion(output, label)
 59 |                 valid_loss += loss.data[0]
 60 |                 valid_acc += get_acc(output, label)
 61 |             epoch_str = (
 62 |                 "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
 63 |                 % (epoch, train_loss / len(train_data),
 64 |                    train_acc / len(train_data), valid_loss / len(valid_data),
 65 |                    valid_acc / len(valid_data)))
 66 |         else:
 67 |             epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
 68 |                          (epoch, train_loss / len(train_data),
 69 |                           train_acc / len(train_data)))
 70 |         prev_time = cur_time
 71 |         print(epoch_str + time_str)
 72 | 
 73 | 
 74 | def conv3x3(in_channel, out_channel, stride=1):
 75 |     return nn.Conv2d(
 76 |         in_channel, out_channel, 3, stride=stride, padding=1, bias=False)
 77 | 
 78 | 
 79 | class residual_block(nn.Module):
 80 |     def __init__(self, in_channel, out_channel, same_shape=True):
 81 |         super(residual_block, self).__init__()
 82 |         self.same_shape = same_shape
 83 |         stride = 1 if self.same_shape else 2
 84 | 
 85 |         self.conv1 = conv3x3(in_channel, out_channel, stride=stride)
 86 |         self.bn1 = nn.BatchNorm2d(out_channel)
 87 | 
 88 |         self.conv2 = conv3x3(out_channel, out_channel)
 89 |         self.bn2 = nn.BatchNorm2d(out_channel)
 90 |         if not self.same_shape:
 91 |             self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride)
 92 | 
 93 |     def forward(self, x):
 94 |         out = self.conv1(x)
 95 |         out = F.relu(self.bn1(out), True)
 96 |         out = self.conv2(out)
 97 |         out = F.relu(self.bn2(out), True)
 98 | 
 99 |         if not self.same_shape:
100 |             x = self.conv3(x)
101 |         return F.relu(x + out, True)
102 | 
103 | 
104 | class resnet(nn.Module):
105 |     def __init__(self, in_channel, num_classes, verbose=False):
106 |         super(resnet, self).__init__()
107 |         self.verbose = verbose
108 | 
109 |         self.block1 = nn.Conv2d(in_channel, 64, 7, 2)
110 | 
111 |         self.block2 = nn.Sequential(
112 |             nn.MaxPool2d(3, 2), residual_block(64, 64), residual_block(64, 64))
113 | 
114 |         self.block3 = nn.Sequential(
115 |             residual_block(64, 128, False), residual_block(128, 128))
116 | 
117 |         self.block4 = nn.Sequential(
118 |             residual_block(128, 256, False), residual_block(256, 256))
119 | 
120 |         self.block5 = nn.Sequential(
121 |             residual_block(256, 512, False),
122 |             residual_block(512, 512), nn.AvgPool2d(3))
123 | 
124 |         self.classifier = nn.Linear(512, num_classes)
125 | 
126 |     def forward(self, x):
127 |         x = self.block1(x)
128 |         if self.verbose:
129 |             print('block 1 output: {}'.format(x.shape))
130 |         x = self.block2(x)
131 |         if self.verbose:
132 |             print('block 2 output: {}'.format(x.shape))
133 |         x = self.block3(x)
134 |         if self.verbose:
135 |             print('block 3 output: {}'.format(x.shape))
136 |         x = self.block4(x)
137 |         if self.verbose:
138 |             print('block 4 output: {}'.format(x.shape))
139 |         x = self.block5(x)
140 |         if self.verbose:
141 |             print('block 5 output: {}'.format(x.shape))
142 |         x = x.view(x.shape[0], -1)
143 |         x = self.classifier(x)
144 |         return x
145 | 


--------------------------------------------------------------------------------
/chapter5_RNN/nlp/word-embedding.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 词嵌入\n",
  8 |     "前面讲了循环神经网络做简单的图像分类问题和飞机流量时序预测，但是现在循环神经网络最火热的应用是自然语言处理，下面我们介绍一下自然语言处理中如果运用循环神经网络，首先我们介绍一下第一个概念，词嵌入。"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "对于图像分类问题，我们可以使用 one-hot 的类型去编码，比如一共有 5 类，那么属于第二类就可以用 (0, 1, 0, 0, 0) 去表示，对于分类问题，这样当然忒别简单，但是在自然语言处理中，因为单词的数目过多，这样做就行不通了，比如有 10000 个不同的词，那么使用 one-hot 不仅效率低，同时还没有办法表达出单词的特点，这个时候就引入了词嵌入去表达每一个单词。"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "词向量简单来说就是用一个向量去表示一个词语，但是这个向量并不是随机的，因为这样并没有任何意义，所以我们需要对每个词有一个特定的向量去表示他们，而有一些词的词性是相近的，比如”(love)喜欢”和”(like)爱”，对于这种词性相近的词，我们需要他们的向量表示也能够相近，如何去度量和定义向量之间的相近呢？非常简单，就是使用两个向量的夹角，夹角越小，越相近，这样就有了一个完备的定义。"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "我们举一个例子，下面有 4 段话\n",
 30 |     "\n",
 31 |     "1. The cat likes playing wool.\n",
 32 |     "\n",
 33 |     "2. The kitty likes playing wool.\n",
 34 |     "\n",
 35 |     "3. The dog likes playing ball.\n",
 36 |     "\n",
 37 |     "4. The boy does not like playing ball or wool.\n",
 38 |     "\n",
 39 |     "这里面有 4 个词，分别是 cat, kitty, dog 和 boy。下面我们使用一个二维的词向量 (a, b) 来表示每一个词，其中 a，b 分别代表着这个词的一种属性，比如 a 代表是否喜欢玩球，b 代表是否喜欢玩毛线，数值越大表示越喜欢，那么我们就能够用数值来定义每一个单词。\n",
 40 |     "\n",
 41 |     "对于 cat，我们可以定义它的词嵌入为 (-1, 4)，因为他不喜欢玩球，喜欢玩毛线，同时可以定义 kitty 为 (-2, 5)，dog 为 (3, 2) 以及 boy 为 (-2, -3)，那么把这四个向量在坐标系中表示出来，就是\n",
 42 |     "\n",
 43 |     "<img src=\"https://ws1.sinaimg.cn/large/006tNc79gy1fmwf2jxhbzj30g40b2my2.jpg\" width=\"350\">"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "可以看到，上面这张图就显示了不同词嵌入之间的夹角，kitty 和 cat 之间的夹角比较小，所以他们更相似，dog 和 boy 之间的夹角很大，所以他们是不相似的。\n",
 51 |     "\n",
 52 |     "下面我们看看 pytorch 中如何调用词向量"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "## PyTorch 实现\n",
 60 |     "词嵌入在 pytorch 中非常简单，只需要调用 `torch.nn.Embedding(m, n)` 就可以了，m 表示单词的总数目，n 表示词嵌入的维度，其实词嵌入就相当于是一个大矩阵，矩阵的每一行表示一个单词"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 9,
 66 |    "metadata": {
 67 |     "collapsed": true
 68 |    },
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import torch\n",
 72 |     "from torch import nn\n",
 73 |     "from torch.autograd import Variable"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 2,
 79 |    "metadata": {
 80 |     "collapsed": true
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# 定义词嵌入\n",
 85 |     "embeds = nn.Embedding(2, 5) # 2 个单词，维度 5"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 5,
 91 |    "metadata": {},
 92 |    "outputs": [
 93 |     {
 94 |      "data": {
 95 |       "text/plain": [
 96 |        "Parameter containing:\n",
 97 |        "-1.3426  0.7316 -0.2437  0.4925 -0.0191\n",
 98 |        "-0.8326  0.3367  0.2135  0.5059  0.8326\n",
 99 |        "[torch.FloatTensor of size 2x5]"
100 |       ]
101 |      },
102 |      "execution_count": 5,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "# 得到词嵌入矩阵\n",
109 |     "embeds.weight"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "我们通过 `weight` 得到了整个词嵌入的矩阵，注意，这个矩阵是一个可以改变的 parameter，在网络的训练中会不断更新，同时词嵌入的数值可以直接进行修改，比如我们可以读入一个预训练好的词嵌入等等"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 8,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "data": {
126 |       "text/plain": [
127 |        "Parameter containing:\n",
128 |        " 1  1  1  1  1\n",
129 |        " 1  1  1  1  1\n",
130 |        "[torch.FloatTensor of size 2x5]"
131 |       ]
132 |      },
133 |      "execution_count": 8,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "# 直接手动修改词嵌入的值\n",
140 |     "embeds.weight.data = torch.ones(2, 5)\n",
141 |     "embeds.weight"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 11,
147 |    "metadata": {
148 |     "collapsed": true
149 |    },
150 |    "outputs": [],
151 |    "source": [
152 |     "# 访问第 50 个词的词向量\n",
153 |     "embeds = nn.Embedding(100, 10)\n",
154 |     "single_word_embed = embeds(Variable(torch.LongTensor([50])))"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 12,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "Variable containing:\n",
166 |        "-1.4954 -1.8475  0.2913 -0.9674 -2.1250 -0.5783 -0.6717  0.5638  0.7038  0.4437\n",
167 |        "[torch.FloatTensor of size 1x10]"
168 |       ]
169 |      },
170 |      "execution_count": 12,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "single_word_embed"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "markdown",
181 |    "metadata": {},
182 |    "source": [
183 |     "可以看到如果我们要访问其中一个单词的词向量，我们可以直接调用定义好的词嵌入，但是输入必须传入一个 Variable，且类型是 LongTensor"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "虽然我们知道了如何定义词向量的相似性，但是我们仍然不知道如何得到词嵌入，因为如果一个词嵌入式 100 维，这显然不可能人为去赋值，所以为了得到词向量，需要介绍 skip-gram 模型。"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "## Skip-Gram 模型\n",
198 |     "Skip Gram 模型是 [Word2Vec](https://arxiv.org/pdf/1301.3781.pdf) 这篇论文的网络架构，下面我们来讲一讲这个模型。\n",
199 |     "\n",
200 |     "## 模型结构\n",
201 |     "skip-gram 模型非常简单，我们在一段文本中训练一个简单的网络，这个网络的任务是通过一个词周围的词来预测这个词，然而我们实际上要做的就是训练我们的词嵌入。\n",
202 |     "\n",
203 |     "比如我们给定一句话中的一个词，看看它周围的词，然后随机挑选一个，我们希望网络能够输出一个概率值，这个概率值能够告诉我们到底这个词离我们选择的词的远近程度，比如这么一句话 'A dog is playing with a ball'，如果我们选的词是 'ball'，那么 'playing' 就要比 'dog' 离我们选择的词更近。\n",
204 |     "\n",
205 |     "对于一段话，我们可以按照顺序选择不同的词，然后构建训练样本和 label，比如\n",
206 |     "\n",
207 |     "![](https://ws2.sinaimg.cn/large/006tNc79gy1fmwlpfp3loj30hh0ah75l.jpg)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "对于这个例子，我们依次取一个词以及其周围的词构成一个训练样本，比如第一次选择的词是 'the'，那么我们取其前后两个词作为训练样本，这个也可以被称为一个滑动窗口，对于第一个词，其左边没有单词，所以训练集就是三个词，然后我们在这三个词中选择 'the' 作为输入，另外两个词都是他的输出，就构成了两个训练样本，又比如选择 'fox' 这个词，那么加上其左边两个词，右边两个词，一共是 5 个词，然后选择 'fox' 作为输入，那么输出就是其周围的四个词，一共可以构成 4 个训练样本，通过这个办法，我们就能够训练出需要的词嵌入。\n",
215 |     "\n",
216 |     "下次课，我们会讲一讲词嵌入到底有什么用。"
217 |    ]
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.6.3"
237 |   }
238 |  },
239 |  "nbformat": 4,
240 |  "nbformat_minor": 2
241 | }
242 | 


--------------------------------------------------------------------------------
/chapter5_RNN/rnn-for-image.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# RNN 做图像分类\n",
  8 |     "前面我们讲了 RNN 特别适合做序列类型的数据，那么 RNN 能不能想 CNN 一样用来做图像分类呢？下面我们用 mnist 手写字体的例子来展示一下如何用 RNN 做图像分类，但是这种方法并不是主流，这里我们只是作为举例。"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "对于一张手写字体的图片，其大小是 28 * 28，我们可以将其看做是一个长为 28 的序列，每个序列的特征都是 28，也就是\n",
 16 |     "\n",
 17 |     "![](https://ws4.sinaimg.cn/large/006tKfTcly1fmu7d0byfkj30n60djdg5.jpg)"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "这样我们解决了输入序列的问题，对于输出序列怎么办呢？其实非常简单，虽然我们的输出是一个序列，但是我们只需要保留其中一个作为输出结果就可以了，这样的话肯定保留最后一个结果是最好的，因为最后一个结果有前面所有序列的信息，就像下面这样\n",
 25 |     "\n",
 26 |     "![](https://ws3.sinaimg.cn/large/006tKfTcly1fmu7fpqri0j30c407yjr8.jpg)\n",
 27 |     "\n",
 28 |     "下面我们直接通过例子展示"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "metadata": {
 35 |     "ExecuteTime": {
 36 |      "end_time": "2017-12-26T08:01:44.502896Z",
 37 |      "start_time": "2017-12-26T08:01:44.062542Z"
 38 |     },
 39 |     "collapsed": true
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "import sys\n",
 44 |     "sys.path.append('..')\n",
 45 |     "\n",
 46 |     "import torch\n",
 47 |     "from torch.autograd import Variable\n",
 48 |     "from torch import nn\n",
 49 |     "from torch.utils.data import DataLoader\n",
 50 |     "\n",
 51 |     "from torchvision import transforms as tfs\n",
 52 |     "from torchvision.datasets import MNIST"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 2,
 58 |    "metadata": {
 59 |     "ExecuteTime": {
 60 |      "end_time": "2017-12-26T08:01:50.714439Z",
 61 |      "start_time": "2017-12-26T08:01:50.650872Z"
 62 |     },
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# 定义数据\n",
 68 |     "data_tf = tfs.Compose([\n",
 69 |     "    tfs.ToTensor(),\n",
 70 |     "    tfs.Normalize([0.5], [0.5]) # 标准化\n",
 71 |     "])\n",
 72 |     "\n",
 73 |     "train_set = MNIST('./data', train=True, transform=data_tf)\n",
 74 |     "test_set = MNIST('./data', train=False, transform=data_tf)\n",
 75 |     "\n",
 76 |     "train_data = DataLoader(train_set, 64, True, num_workers=4)\n",
 77 |     "test_data = DataLoader(test_set, 128, False, num_workers=4)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 3,
 83 |    "metadata": {
 84 |     "ExecuteTime": {
 85 |      "end_time": "2017-12-26T08:01:51.165144Z",
 86 |      "start_time": "2017-12-26T08:01:51.115807Z"
 87 |     },
 88 |     "collapsed": true
 89 |    },
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# 定义模型\n",
 93 |     "class rnn_classify(nn.Module):\n",
 94 |     "    def __init__(self, in_feature=28, hidden_feature=100, num_class=10, num_layers=2):\n",
 95 |     "        super(rnn_classify, self).__init__()\n",
 96 |     "        self.rnn = nn.LSTM(in_feature, hidden_feature, num_layers) # 使用两层 lstm\n",
 97 |     "        self.classifier = nn.Linear(hidden_feature, num_class) # 将最后一个 rnn 的输出使用全连接得到最后的分类结果\n",
 98 |     "        \n",
 99 |     "    def forward(self, x):\n",
100 |     "        '''\n",
101 |     "        x 大小为 (batch, 1, 28, 28)，所以我们需要将其转换成 RNN 的输入形式，即 (28, batch, 28)\n",
102 |     "        '''\n",
103 |     "        x = x.squeeze() # 去掉 (batch, 1, 28, 28) 中的 1，变成 (batch, 28, 28)\n",
104 |     "        x = x.permute(2, 0, 1) # 将最后一维放到第一维，变成 (28, batch, 28)\n",
105 |     "        out, _ = self.rnn(x) # 使用默认的隐藏状态，得到的 out 是 (28, batch, hidden_feature)\n",
106 |     "        out = out[-1, :, :] # 取序列中的最后一个，大小是 (batch, hidden_feature)\n",
107 |     "        out = self.classifier(out) # 得到分类结果\n",
108 |     "        return out"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 4,
114 |    "metadata": {
115 |     "ExecuteTime": {
116 |      "end_time": "2017-12-26T08:01:51.252533Z",
117 |      "start_time": "2017-12-26T08:01:51.244612Z"
118 |     },
119 |     "collapsed": true
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "net = rnn_classify()\n",
124 |     "criterion = nn.CrossEntropyLoss()\n",
125 |     "\n",
126 |     "optimzier = torch.optim.Adadelta(net.parameters(), 1e-1)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 5,
132 |    "metadata": {
133 |     "ExecuteTime": {
134 |      "end_time": "2017-12-26T08:03:36.739732Z",
135 |      "start_time": "2017-12-26T08:01:51.607967Z"
136 |     }
137 |    },
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "Epoch 0. Train Loss: 1.858605, Train Acc: 0.318347, Valid Loss: 1.147508, Valid Acc: 0.578125, Time 00:00:09\n",
144 |       "Epoch 1. Train Loss: 0.503072, Train Acc: 0.848514, Valid Loss: 0.300552, Valid Acc: 0.912579, Time 00:00:09\n",
145 |       "Epoch 2. Train Loss: 0.224762, Train Acc: 0.934785, Valid Loss: 0.176321, Valid Acc: 0.946499, Time 00:00:09\n",
146 |       "Epoch 3. Train Loss: 0.157010, Train Acc: 0.953392, Valid Loss: 0.155280, Valid Acc: 0.954015, Time 00:00:09\n",
147 |       "Epoch 4. Train Loss: 0.125926, Train Acc: 0.962137, Valid Loss: 0.105295, Valid Acc: 0.969640, Time 00:00:09\n",
148 |       "Epoch 5. Train Loss: 0.104938, Train Acc: 0.968450, Valid Loss: 0.091477, Valid Acc: 0.972805, Time 00:00:10\n",
149 |       "Epoch 6. Train Loss: 0.089124, Train Acc: 0.973481, Valid Loss: 0.104799, Valid Acc: 0.969343, Time 00:00:09\n",
150 |       "Epoch 7. Train Loss: 0.077920, Train Acc: 0.976679, Valid Loss: 0.084242, Valid Acc: 0.976661, Time 00:00:10\n",
151 |       "Epoch 8. Train Loss: 0.070259, Train Acc: 0.978795, Valid Loss: 0.078536, Valid Acc: 0.977749, Time 00:00:09\n",
152 |       "Epoch 9. Train Loss: 0.063089, Train Acc: 0.981093, Valid Loss: 0.066984, Valid Acc: 0.980716, Time 00:00:09\n"
153 |      ]
154 |     }
155 |    ],
156 |    "source": [
157 |     "# 开始训练\n",
158 |     "from utils import train\n",
159 |     "train(net, train_data, test_data, 10, optimzier, criterion)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "可以看到，训练 10 次在简单的 mnist 数据集上也取得的了 98% 的准确率，所以说 RNN 也可以做做简单的图像分类，但是这并不是他的主战场，下次课我们会讲到 RNN 的一个使用场景，时间序列预测。"
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.6.3"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 2
191 | }
192 | 


--------------------------------------------------------------------------------
/chapter5_RNN/time-series/data.csv:
--------------------------------------------------------------------------------
  1 | "Month","International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60"
  2 | "1949-01",112
  3 | "1949-02",118
  4 | "1949-03",132
  5 | "1949-04",129
  6 | "1949-05",121
  7 | "1949-06",135
  8 | "1949-07",148
  9 | "1949-08",148
 10 | "1949-09",136
 11 | "1949-10",119
 12 | "1949-11",104
 13 | "1949-12",118
 14 | "1950-01",115
 15 | "1950-02",126
 16 | "1950-03",141
 17 | "1950-04",135
 18 | "1950-05",125
 19 | "1950-06",149
 20 | "1950-07",170
 21 | "1950-08",170
 22 | "1950-09",158
 23 | "1950-10",133
 24 | "1950-11",114
 25 | "1950-12",140
 26 | "1951-01",145
 27 | "1951-02",150
 28 | "1951-03",178
 29 | "1951-04",163
 30 | "1951-05",172
 31 | "1951-06",178
 32 | "1951-07",199
 33 | "1951-08",199
 34 | "1951-09",184
 35 | "1951-10",162
 36 | "1951-11",146
 37 | "1951-12",166
 38 | "1952-01",171
 39 | "1952-02",180
 40 | "1952-03",193
 41 | "1952-04",181
 42 | "1952-05",183
 43 | "1952-06",218
 44 | "1952-07",230
 45 | "1952-08",242
 46 | "1952-09",209
 47 | "1952-10",191
 48 | "1952-11",172
 49 | "1952-12",194
 50 | "1953-01",196
 51 | "1953-02",196
 52 | "1953-03",236
 53 | "1953-04",235
 54 | "1953-05",229
 55 | "1953-06",243
 56 | "1953-07",264
 57 | "1953-08",272
 58 | "1953-09",237
 59 | "1953-10",211
 60 | "1953-11",180
 61 | "1953-12",201
 62 | "1954-01",204
 63 | "1954-02",188
 64 | "1954-03",235
 65 | "1954-04",227
 66 | "1954-05",234
 67 | "1954-06",264
 68 | "1954-07",302
 69 | "1954-08",293
 70 | "1954-09",259
 71 | "1954-10",229
 72 | "1954-11",203
 73 | "1954-12",229
 74 | "1955-01",242
 75 | "1955-02",233
 76 | "1955-03",267
 77 | "1955-04",269
 78 | "1955-05",270
 79 | "1955-06",315
 80 | "1955-07",364
 81 | "1955-08",347
 82 | "1955-09",312
 83 | "1955-10",274
 84 | "1955-11",237
 85 | "1955-12",278
 86 | "1956-01",284
 87 | "1956-02",277
 88 | "1956-03",317
 89 | "1956-04",313
 90 | "1956-05",318
 91 | "1956-06",374
 92 | "1956-07",413
 93 | "1956-08",405
 94 | "1956-09",355
 95 | "1956-10",306
 96 | "1956-11",271
 97 | "1956-12",306
 98 | "1957-01",315
 99 | "1957-02",301
100 | "1957-03",356
101 | "1957-04",348
102 | "1957-05",355
103 | "1957-06",422
104 | "1957-07",465
105 | "1957-08",467
106 | "1957-09",404
107 | "1957-10",347
108 | "1957-11",305
109 | "1957-12",336
110 | "1958-01",340
111 | "1958-02",318
112 | "1958-03",362
113 | "1958-04",348
114 | "1958-05",363
115 | "1958-06",435
116 | "1958-07",491
117 | "1958-08",505
118 | "1958-09",404
119 | "1958-10",359
120 | "1958-11",310
121 | "1958-12",337
122 | "1959-01",360
123 | "1959-02",342
124 | "1959-03",406
125 | "1959-04",396
126 | "1959-05",420
127 | "1959-06",472
128 | "1959-07",548
129 | "1959-08",559
130 | "1959-09",463
131 | "1959-10",407
132 | "1959-11",362
133 | "1959-12",405
134 | "1960-01",417
135 | "1960-02",391
136 | "1960-03",419
137 | "1960-04",461
138 | "1960-05",472
139 | "1960-06",535
140 | "1960-07",622
141 | "1960-08",606
142 | "1960-09",508
143 | "1960-10",461
144 | "1960-11",390
145 | "1960-12",432
146 | 
147 | International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60
148 | 
149 | 


--------------------------------------------------------------------------------
/chapter5_RNN/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | def get_acc(output, label):
 10 |     total = output.shape[0]
 11 |     _, pred_label = output.max(1)
 12 |     num_correct = (pred_label == label).sum().data[0]
 13 |     return num_correct / total
 14 | 
 15 | 
 16 | def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
 17 |     if torch.cuda.is_available():
 18 |         net = net.cuda()
 19 |     prev_time = datetime.now()
 20 |     for epoch in range(num_epochs):
 21 |         train_loss = 0
 22 |         train_acc = 0
 23 |         net = net.train()
 24 |         for im, label in train_data:
 25 |             if torch.cuda.is_available():
 26 |                 im = Variable(im.cuda())  # (bs, 3, h, w)
 27 |                 label = Variable(label.cuda())  # (bs, h, w)
 28 |             else:
 29 |                 im = Variable(im)
 30 |                 label = Variable(label)
 31 |             # forward
 32 |             output = net(im)
 33 |             loss = criterion(output, label)
 34 |             # backward
 35 |             optimizer.zero_grad()
 36 |             loss.backward()
 37 |             optimizer.step()
 38 | 
 39 |             train_loss += loss.data[0]
 40 |             train_acc += get_acc(output, label)
 41 | 
 42 |         cur_time = datetime.now()
 43 |         h, remainder = divmod((cur_time - prev_time).seconds, 3600)
 44 |         m, s = divmod(remainder, 60)
 45 |         time_str = "Time %02d:%02d:%02d" % (h, m, s)
 46 |         if valid_data is not None:
 47 |             valid_loss = 0
 48 |             valid_acc = 0
 49 |             net = net.eval()
 50 |             for im, label in valid_data:
 51 |                 if torch.cuda.is_available():
 52 |                     im = Variable(im.cuda(), volatile=True)
 53 |                     label = Variable(label.cuda(), volatile=True)
 54 |                 else:
 55 |                     im = Variable(im, volatile=True)
 56 |                     label = Variable(label, volatile=True)
 57 |                 output = net(im)
 58 |                 loss = criterion(output, label)
 59 |                 valid_loss += loss.data[0]
 60 |                 valid_acc += get_acc(output, label)
 61 |             epoch_str = (
 62 |                 "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
 63 |                 % (epoch, train_loss / len(train_data),
 64 |                    train_acc / len(train_data), valid_loss / len(valid_data),
 65 |                    valid_acc / len(valid_data)))
 66 |         else:
 67 |             epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
 68 |                          (epoch, train_loss / len(train_data),
 69 |                           train_acc / len(train_data)))
 70 |         prev_time = cur_time
 71 |         print(epoch_str + time_str)
 72 | 
 73 | 
 74 | def conv3x3(in_channel, out_channel, stride=1):
 75 |     return nn.Conv2d(
 76 |         in_channel, out_channel, 3, stride=stride, padding=1, bias=False)
 77 | 
 78 | 
 79 | class residual_block(nn.Module):
 80 |     def __init__(self, in_channel, out_channel, same_shape=True):
 81 |         super(residual_block, self).__init__()
 82 |         self.same_shape = same_shape
 83 |         stride = 1 if self.same_shape else 2
 84 | 
 85 |         self.conv1 = conv3x3(in_channel, out_channel, stride=stride)
 86 |         self.bn1 = nn.BatchNorm2d(out_channel)
 87 | 
 88 |         self.conv2 = conv3x3(out_channel, out_channel)
 89 |         self.bn2 = nn.BatchNorm2d(out_channel)
 90 |         if not self.same_shape:
 91 |             self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride)
 92 | 
 93 |     def forward(self, x):
 94 |         out = self.conv1(x)
 95 |         out = F.relu(self.bn1(out), True)
 96 |         out = self.conv2(out)
 97 |         out = F.relu(self.bn2(out), True)
 98 | 
 99 |         if not self.same_shape:
100 |             x = self.conv3(x)
101 |         return F.relu(x + out, True)
102 | 
103 | 
104 | class resnet(nn.Module):
105 |     def __init__(self, in_channel, num_classes, verbose=False):
106 |         super(resnet, self).__init__()
107 |         self.verbose = verbose
108 | 
109 |         self.block1 = nn.Conv2d(in_channel, 64, 7, 2)
110 | 
111 |         self.block2 = nn.Sequential(
112 |             nn.MaxPool2d(3, 2), residual_block(64, 64), residual_block(64, 64))
113 | 
114 |         self.block3 = nn.Sequential(
115 |             residual_block(64, 128, False), residual_block(128, 128))
116 | 
117 |         self.block4 = nn.Sequential(
118 |             residual_block(128, 256, False), residual_block(256, 256))
119 | 
120 |         self.block5 = nn.Sequential(
121 |             residual_block(256, 512, False),
122 |             residual_block(512, 512), nn.AvgPool2d(3))
123 | 
124 |         self.classifier = nn.Linear(512, num_classes)
125 | 
126 |     def forward(self, x):
127 |         x = self.block1(x)
128 |         if self.verbose:
129 |             print('block 1 output: {}'.format(x.shape))
130 |         x = self.block2(x)
131 |         if self.verbose:
132 |             print('block 2 output: {}'.format(x.shape))
133 |         x = self.block3(x)
134 |         if self.verbose:
135 |             print('block 3 output: {}'.format(x.shape))
136 |         x = self.block4(x)
137 |         if self.verbose:
138 |             print('block 4 output: {}'.format(x.shape))
139 |         x = self.block5(x)
140 |         if self.verbose:
141 |             print('block 5 output: {}'.format(x.shape))
142 |         x = x.view(x.shape[0], -1)
143 |         x = self.classifier(x)
144 |         return x
145 | 


--------------------------------------------------------------------------------
/chapter6_GAN/vae.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": true
  7 |    },
  8 |    "source": [
  9 |     "# 变分自动编码器\n",
 10 |     "变分编码器是自动编码器的升级版本，其结构跟自动编码器是类似的，也由编码器和解码器构成。\n",
 11 |     "\n",
 12 |     "回忆一下，自动编码器有个问题，就是并不能任意生成图片，因为我们没有办法自己去构造隐藏向量，需要通过一张图片输入编码我们才知道得到的隐含向量是什么，这时我们就可以通过变分自动编码器来解决这个问题。\n",
 13 |     "\n",
 14 |     "其实原理特别简单，只需要在编码过程给它增加一些限制，迫使其生成的隐含向量能够粗略的遵循一个标准正态分布，这就是其与一般的自动编码器最大的不同。\n",
 15 |     "\n",
 16 |     "这样我们生成一张新图片就很简单了，我们只需要给它一个标准正态分布的随机隐含向量，这样通过解码器就能够生成我们想要的图片，而不需要给它一张原始图片先编码。\n",
 17 |     "\n",
 18 |     "一般来讲，我们通过 encoder 得到的隐含向量并不是一个标准的正态分布，为了衡量两种分布的相似程度，我们使用 KL divergence，利用其来表示隐含向量与标准正态分布之间差异的 loss，另外一个 loss 仍然使用生成图片与原图片的均方误差来表示。\n",
 19 |     "\n",
 20 |     "KL divergence 的公式如下\n",
 21 |     "\n",
 22 |     "$$\n",
 23 |     "D{KL} (P || Q) =  \\int_{-\\infty}^{\\infty} p(x) \\log \\frac{p(x)}{q(x)} dx\n",
 24 |     "$$"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## 重参数\n",
 32 |     "为了避免计算 KL divergence 中的积分，我们使用重参数的技巧，不是每次产生一个隐含向量，而是生成两个向量，一个表示均值，一个表示标准差，这里我们默认编码之后的隐含向量服从一个正态分布的之后，就可以用一个标准正态分布先乘上标准差再加上均值来合成这个正态分布，最后 loss 就是希望这个生成的正态分布能够符合一个标准正态分布，也就是希望均值为 0，方差为 1\n",
 33 |     "\n",
 34 |     "所以标准的变分自动编码器如下\n",
 35 |     "\n",
 36 |     "![](https://ws4.sinaimg.cn/large/006tKfTcgy1fn15cq6n7pj30k007t0sv.jpg)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "所以最后我们可以将我们的 loss 定义为下面的函数，由均方误差和 KL divergence 求和得到一个总的 loss\n",
 44 |     "\n",
 45 |     "```\n",
 46 |     "def loss_function(recon_x, x, mu, logvar):\n",
 47 |     "    \"\"\"\n",
 48 |     "    recon_x: generating images\n",
 49 |     "    x: origin images\n",
 50 |     "    mu: latent mean\n",
 51 |     "    logvar: latent log variance\n",
 52 |     "    \"\"\"\n",
 53 |     "    MSE = reconstruction_function(recon_x, x)\n",
 54 |     "    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)\n",
 55 |     "    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)\n",
 56 |     "    KLD = torch.sum(KLD_element).mul_(-0.5)\n",
 57 |     "    # KL divergence\n",
 58 |     "    return MSE + KLD\n",
 59 |     "```"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "下面我们用 mnist 数据集来简单说明一下变分自动编码器"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 1,
 72 |    "metadata": {
 73 |     "ExecuteTime": {
 74 |      "end_time": "2018-01-01T10:41:05.738797Z",
 75 |      "start_time": "2018-01-01T10:41:05.215490Z"
 76 |     },
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "import os\n",
 82 |     "\n",
 83 |     "import torch\n",
 84 |     "from torch.autograd import Variable\n",
 85 |     "import torch.nn.functional as F\n",
 86 |     "from torch import nn\n",
 87 |     "from torch.utils.data import DataLoader\n",
 88 |     "\n",
 89 |     "from torchvision.datasets import MNIST\n",
 90 |     "from torchvision import transforms as tfs\n",
 91 |     "from torchvision.utils import save_image"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 2,
 97 |    "metadata": {
 98 |     "ExecuteTime": {
 99 |      "end_time": "2018-01-01T10:41:05.769643Z",
100 |      "start_time": "2018-01-01T10:41:05.741302Z"
101 |     },
102 |     "collapsed": true
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "im_tfs = tfs.Compose([\n",
107 |     "    tfs.ToTensor(),\n",
108 |     "    tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) # 标准化\n",
109 |     "])\n",
110 |     "\n",
111 |     "train_set = MNIST('./mnist', transform=im_tfs)\n",
112 |     "train_data = DataLoader(train_set, batch_size=128, shuffle=True)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 3,
118 |    "metadata": {
119 |     "ExecuteTime": {
120 |      "end_time": "2018-01-01T10:41:06.397118Z",
121 |      "start_time": "2018-01-01T10:41:06.306479Z"
122 |     },
123 |     "collapsed": true
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "class VAE(nn.Module):\n",
128 |     "    def __init__(self):\n",
129 |     "        super(VAE, self).__init__()\n",
130 |     "\n",
131 |     "        self.fc1 = nn.Linear(784, 400)\n",
132 |     "        self.fc21 = nn.Linear(400, 20) # mean\n",
133 |     "        self.fc22 = nn.Linear(400, 20) # var\n",
134 |     "        self.fc3 = nn.Linear(20, 400)\n",
135 |     "        self.fc4 = nn.Linear(400, 784)\n",
136 |     "\n",
137 |     "    def encode(self, x):\n",
138 |     "        h1 = F.relu(self.fc1(x))\n",
139 |     "        return self.fc21(h1), self.fc22(h1)\n",
140 |     "\n",
141 |     "    def reparametrize(self, mu, logvar):\n",
142 |     "        std = logvar.mul(0.5).exp_()\n",
143 |     "        eps = torch.FloatTensor(std.size()).normal_()\n",
144 |     "        if torch.cuda.is_available():\n",
145 |     "            eps = Variable(eps.cuda())\n",
146 |     "        else:\n",
147 |     "            eps = Variable(eps)\n",
148 |     "        return eps.mul(std).add_(mu)\n",
149 |     "\n",
150 |     "    def decode(self, z):\n",
151 |     "        h3 = F.relu(self.fc3(z))\n",
152 |     "        return F.tanh(self.fc4(h3))\n",
153 |     "\n",
154 |     "    def forward(self, x):\n",
155 |     "        mu, logvar = self.encode(x) # 编码\n",
156 |     "        z = self.reparametrize(mu, logvar) # 重新参数化成正态分布\n",
157 |     "        return self.decode(z), mu, logvar # 解码，同时输出均值方差"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 4,
163 |    "metadata": {
164 |     "ExecuteTime": {
165 |      "end_time": "2018-01-01T10:41:10.056600Z",
166 |      "start_time": "2018-01-01T10:41:06.430817Z"
167 |     },
168 |     "collapsed": true
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "net = VAE() # 实例化网络\n",
173 |     "if torch.cuda.is_available():\n",
174 |     "    net = net.cuda()"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 5,
180 |    "metadata": {
181 |     "ExecuteTime": {
182 |      "end_time": "2018-01-01T10:41:10.409900Z",
183 |      "start_time": "2018-01-01T10:41:10.059597Z"
184 |     },
185 |     "collapsed": true
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "x, _ = train_set[0]\n",
190 |     "x = x.view(x.shape[0], -1)\n",
191 |     "if torch.cuda.is_available():\n",
192 |     "    x = x.cuda()\n",
193 |     "x = Variable(x)\n",
194 |     "_, mu, var = net(x)"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 8,
200 |    "metadata": {
201 |     "ExecuteTime": {
202 |      "end_time": "2018-01-01T10:41:29.753678Z",
203 |      "start_time": "2018-01-01T10:41:29.749178Z"
204 |     }
205 |    },
206 |    "outputs": [
207 |     {
208 |      "name": "stdout",
209 |      "output_type": "stream",
210 |      "text": [
211 |       "Variable containing:\n",
212 |       "\n",
213 |       "Columns 0 to 9 \n",
214 |       "-0.0307 -0.1439 -0.0435  0.3472  0.0368 -0.0339  0.0274 -0.5608  0.0280  0.2742\n",
215 |       "\n",
216 |       "Columns 10 to 19 \n",
217 |       "-0.6221 -0.0894 -0.0933  0.4241  0.1611  0.3267  0.5755 -0.0237  0.2714 -0.2806\n",
218 |       "[torch.cuda.FloatTensor of size 1x20 (GPU 0)]\n",
219 |       "\n"
220 |      ]
221 |     }
222 |    ],
223 |    "source": [
224 |     "print(mu)"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "可以看到，对于输入，网络可以输出隐含变量的均值和方差，这里的均值方差还没有训练\n",
232 |     "\n",
233 |     "下面开始训练"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 6,
239 |    "metadata": {
240 |     "ExecuteTime": {
241 |      "end_time": "2018-01-01T10:13:54.560436Z",
242 |      "start_time": "2018-01-01T10:13:54.530108Z"
243 |     },
244 |     "collapsed": true
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "reconstruction_function = nn.MSELoss(size_average=False)\n",
249 |     "\n",
250 |     "def loss_function(recon_x, x, mu, logvar):\n",
251 |     "    \"\"\"\n",
252 |     "    recon_x: generating images\n",
253 |     "    x: origin images\n",
254 |     "    mu: latent mean\n",
255 |     "    logvar: latent log variance\n",
256 |     "    \"\"\"\n",
257 |     "    MSE = reconstruction_function(recon_x, x)\n",
258 |     "    # loss = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)\n",
259 |     "    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)\n",
260 |     "    KLD = torch.sum(KLD_element).mul_(-0.5)\n",
261 |     "    # KL divergence\n",
262 |     "    return MSE + KLD\n",
263 |     "\n",
264 |     "optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)\n",
265 |     "\n",
266 |     "def to_img(x):\n",
267 |     "    '''\n",
268 |     "    定义一个函数将最后的结果转换回图片\n",
269 |     "    '''\n",
270 |     "    x = 0.5 * (x + 1.)\n",
271 |     "    x = x.clamp(0, 1)\n",
272 |     "    x = x.view(x.shape[0], 1, 28, 28)\n",
273 |     "    return x"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 7,
279 |    "metadata": {
280 |     "ExecuteTime": {
281 |      "end_time": "2018-01-01T10:35:01.115877Z",
282 |      "start_time": "2018-01-01T10:13:54.562533Z"
283 |     }
284 |    },
285 |    "outputs": [
286 |     {
287 |      "name": "stdout",
288 |      "output_type": "stream",
289 |      "text": [
290 |       "epoch: 20, Loss: 61.5803\n",
291 |       "epoch: 40, Loss: 62.9573\n",
292 |       "epoch: 60, Loss: 63.4285\n",
293 |       "epoch: 80, Loss: 64.7138\n",
294 |       "epoch: 100, Loss: 63.3343\n"
295 |      ]
296 |     }
297 |    ],
298 |    "source": [
299 |     "for e in range(100):\n",
300 |     "    for im, _ in train_data:\n",
301 |     "        im = im.view(im.shape[0], -1)\n",
302 |     "        im = Variable(im)\n",
303 |     "        if torch.cuda.is_available():\n",
304 |     "            im = im.cuda()\n",
305 |     "        recon_im, mu, logvar = net(im)\n",
306 |     "        loss = loss_function(recon_im, im, mu, logvar) / im.shape[0] # 将 loss 平均\n",
307 |     "        optimizer.zero_grad()\n",
308 |     "        loss.backward()\n",
309 |     "        optimizer.step()\n",
310 |     "\n",
311 |     "    if (e + 1) % 20 == 0:\n",
312 |     "        print('epoch: {}, Loss: {:.4f}'.format(e + 1, loss.data[0]))\n",
313 |     "        save = to_img(recon_im.cpu().data)\n",
314 |     "        if not os.path.exists('./vae_img'):\n",
315 |     "            os.mkdir('./vae_img')\n",
316 |     "        save_image(save, './vae_img/image_{}.png'.format(e + 1))"
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "markdown",
321 |    "metadata": {},
322 |    "source": [
323 |     "可以看看使用变分自动编码器得到的结果，可以发现效果比一般的编码器要好很多\n",
324 |     "\n",
325 |     "![](https://ws1.sinaimg.cn/large/006tKfTcgy1fn1ag8832zj306q0a2gmz.jpg)\n",
326 |     "\n",
327 |     "我们可以输出其中的均值看看"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "code",
332 |    "execution_count": 14,
333 |    "metadata": {
334 |     "ExecuteTime": {
335 |      "end_time": "2018-01-01T10:40:36.481622Z",
336 |      "start_time": "2018-01-01T10:40:36.463332Z"
337 |     },
338 |     "collapsed": true
339 |    },
340 |    "outputs": [],
341 |    "source": [
342 |     "x, _ = train_set[0]\n",
343 |     "x = x.view(x.shape[0], -1)\n",
344 |     "if torch.cuda.is_available():\n",
345 |     "    x = x.cuda()\n",
346 |     "x = Variable(x)\n",
347 |     "_, mu, _ = net(x)"
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 15,
353 |    "metadata": {
354 |     "ExecuteTime": {
355 |      "end_time": "2018-01-01T10:40:37.490484Z",
356 |      "start_time": "2018-01-01T10:40:37.485127Z"
357 |     }
358 |    },
359 |    "outputs": [
360 |     {
361 |      "name": "stdout",
362 |      "output_type": "stream",
363 |      "text": [
364 |       "Variable containing:\n",
365 |       "\n",
366 |       "Columns 0 to 9 \n",
367 |       " 0.3861  0.5561  1.1995 -1.6773  0.9867  0.1244 -0.3443 -1.6658  1.3332  1.1606\n",
368 |       "\n",
369 |       "Columns 10 to 19 \n",
370 |       " 0.6898  0.3042  2.1044 -2.4588  0.0504  0.9743  1.1136  0.7872 -0.0777  1.6101\n",
371 |       "[torch.cuda.FloatTensor of size 1x20 (GPU 0)]\n",
372 |       "\n"
373 |      ]
374 |     }
375 |    ],
376 |    "source": [
377 |     "print(mu)"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "markdown",
382 |    "metadata": {},
383 |    "source": [
384 |     "变分自动编码器虽然比一般的自动编码器效果要好，而且也限制了其输出的编码 (code) 的概率分布，但是它仍然是通过直接计算生成图片和原始图片的均方误差来生成 loss，这个方式并不好，在下一章生成对抗网络中，我们会讲一讲这种方式计算 loss 的局限性，然后会介绍一种新的训练办法，就是通过生成对抗的训练方式来训练网络而不是直接比较两张图片的每个像素点的均方误差"
385 |    ]
386 |   }
387 |  ],
388 |  "metadata": {
389 |   "kernelspec": {
390 |    "display_name": "Python 3",
391 |    "language": "python",
392 |    "name": "python3"
393 |   },
394 |   "language_info": {
395 |    "codemirror_mode": {
396 |     "name": "ipython",
397 |     "version": 3
398 |    },
399 |    "file_extension": ".py",
400 |    "mimetype": "text/x-python",
401 |    "name": "python",
402 |    "nbconvert_exporter": "python",
403 |    "pygments_lexer": "ipython3",
404 |    "version": "3.6.3"
405 |   }
406 |  },
407 |  "nbformat": 4,
408 |  "nbformat_minor": 2
409 | }
410 | 


--------------------------------------------------------------------------------
/chapter7_RL/dqn.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.autograd import Variable
  6 | import numpy as np
  7 | import gym
  8 | 
  9 | # 定义一些超参数
 10 | 
 11 | 
 12 | batch_size = 32
 13 | lr = 0.01
 14 | epsilon = 0.9
 15 | gamma = 0.9
 16 | target_replace_iter = 100
 17 | memory_capacity = 2000
 18 | env = gym.make('CartPole-v0')
 19 | env = env.unwrapped
 20 | n_actions = env.action_space.n
 21 | n_states = env.observation_space.shape[0]
 22 | 
 23 | 
 24 | class q_net(nn.Module):
 25 |     def __init__(self, hidden=50):
 26 |         super(q_net, self).__init__()
 27 |         self.fc = nn.Sequential(
 28 |             nn.Linear(n_states, hidden),
 29 |             nn.ReLU(True),
 30 |             nn.Linear(hidden, n_actions)
 31 |         )
 32 | 
 33 |         nn.init.normal(self.fc[0].weight, std=0.1)  # 使用标准差是 0.1 的正态分布初始化
 34 |         nn.init.normal(self.fc[2].weight, std=0.1)  # 使用标准差是 0.1 的正态分布初始化
 35 | 
 36 |     def forward(self, x):
 37 |         actions_value = self.fc(x)
 38 |         return actions_value
 39 | 
 40 | 
 41 | class DQN(object):
 42 |     def __init__(self):
 43 |         self.eval_net, self.target_net = q_net(), q_net()
 44 | 
 45 |         self.learn_step_counter = 0
 46 |         self.memory_counter = 0
 47 |         self.memory = np.zeros((memory_capacity, n_states * 2 + 2))  # 当前的状态和动作，之后的状态和动作
 48 |         self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=lr)
 49 |         self.criterion = nn.MSELoss()
 50 | 
 51 |     def choose_action(self, s):
 52 |         '''
 53 |         根据输入的状态得到所有可行动作的价值估计
 54 |         '''
 55 |         s = Variable(torch.unsqueeze(torch.FloatTensor(s), 0))
 56 |         # input only one sample
 57 |         if np.random.uniform() < epsilon:  # greedy 贪婪算法
 58 |             actions_value = self.eval_net(s)
 59 |             action = torch.max(actions_value, 1)[1].data[0]
 60 |         else:  # random 随机选择
 61 |             action = np.random.randint(0, n_actions)
 62 |         return action
 63 | 
 64 |     def store_transition(self, s, a, r, s_):
 65 |         transition = np.hstack((s, [a, r], s_))
 66 |         # 用新的记忆替换旧的记忆
 67 |         index = self.memory_counter % memory_capacity
 68 |         self.memory[index, :] = transition
 69 |         self.memory_counter += 1
 70 | 
 71 |     def learn(self):
 72 |         # target net 的参数更新
 73 |         if self.learn_step_counter % target_replace_iter == 0:
 74 |             self.target_net.load_state_dict(self.eval_net.state_dict())
 75 |         self.learn_step_counter += 1
 76 | 
 77 |         # 取样记忆中的经历
 78 |         sample_index = np.random.choice(memory_capacity, batch_size)
 79 |         b_memory = self.memory[sample_index, :]
 80 |         b_s = Variable(torch.FloatTensor(b_memory[:, :n_states]))
 81 |         b_a = Variable(
 82 |             torch.LongTensor(b_memory[:, n_states:n_states + 1].astype(int)))
 83 |         b_r = Variable(
 84 |             torch.FloatTensor(b_memory[:, n_states + 1:n_states + 2]))
 85 |         b_s_ = Variable(torch.FloatTensor(b_memory[:, -n_states:]))
 86 | 
 87 |         # q_eval net 评估状态下动作的 value
 88 |         q_eval = self.eval_net(b_s).gather(1, b_a)  # shape (batch, 1) 选择对应 action 的动作
 89 |         q_next = self.target_net(
 90 |             b_s_).detach()  # detach from graph, don't backpropagate
 91 |         q_target = b_r + gamma * q_next.max(1)[0].view(batch_size, 1)  # shape (batch, 1)
 92 |         loss = self.criterion(q_eval, q_target)  # mse 作为 loss 函数
 93 |         # 更新网络
 94 |         self.optimizer.zero_grad()
 95 |         loss.backward()
 96 |         self.optimizer.step()
 97 | 
 98 | 
 99 | dqn_trainer = DQN()
100 | 
101 | print('collecting experience ... ')
102 | all_reward = []
103 | for i_episode in range(300):
104 |     s = env.reset()
105 |     reward = 0
106 |     while True:
107 |         if dqn_trainer.memory_counter > memory_capacity:
108 |             env.render()
109 |         a = dqn_trainer.choose_action(s)
110 | 
111 |         # 环境采取动作得到结果
112 |         s_, r, done, info = env.step(a)
113 | 
114 |         # 修改奖励以便更快收敛
115 |         x, x_dot, theta, theta_dot = s_
116 |         r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
117 |         r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
118 |         r = r1 + r2
119 | 
120 |         dqn_trainer.store_transition(s, a, r, s_)
121 | 
122 |         reward += r
123 |         if dqn_trainer.memory_counter > memory_capacity:  # 记忆收集够开始学习
124 |             dqn_trainer.learn()
125 |             if done:
126 |                 print('Ep: {} | reward: {:.3f}'.format(i_episode, round(reward, 3)))
127 |                 all_reward.append(reward)
128 |                 break
129 | 
130 |         if done:
131 |             break
132 |         s = s_
133 | 


--------------------------------------------------------------------------------
/chapter7_RL/mount-car.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import gym
 4 | 
 5 | n_states = 40  # 取样 40 个状态
 6 | iter_max = 10000
 7 | 
 8 | initial_lr = 1.0  # Learning rate
 9 | min_lr = 0.003
10 | gamma = 1.0
11 | t_max = 10000
12 | eps = 0.02
13 | 
14 | 
15 | def run_episode(env, policy=None, render=False):
16 |     obs = env.reset()
17 |     total_reward = 0
18 |     step_idx = 0
19 |     for _ in range(t_max):
20 |         if render:
21 |             env.render()
22 |         if policy is None:  # 如果没有策略，就随机取样
23 |             action = env.action_space.sample()
24 |         else:
25 |             a, b = obs_to_state(env, obs)
26 |             action = policy[a][b]
27 |         obs, reward, done, _ = env.step(action)
28 |         total_reward += gamma ** step_idx * reward
29 |         step_idx += 1
30 |         if done:
31 |             break
32 |     return total_reward
33 | 
34 | 
35 | def obs_to_state(env, obs):
36 |     """
37 |     将观察的连续环境映射到离散的输入的状态
38 |     """
39 |     env_low = env.observation_space.low
40 |     env_high = env.observation_space.high
41 |     env_dx = (env_high - env_low) / n_states
42 |     a = int((obs[0] - env_low[0]) / env_dx[0])
43 |     b = int((obs[1] - env_low[1]) / env_dx[1])
44 |     return a, b
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     env_name = 'MountainCar-v0'
49 |     env = gym.make(env_name)
50 |     env.seed(0)
51 |     np.random.seed(0)
52 |     print('----- using Q Learning -----')
53 |     q_table = np.zeros((n_states, n_states, 3))
54 |     for i in range(iter_max):
55 |         obs = env.reset()
56 |         total_reward = 0
57 |         ## eta: 每一步学习率都不断减小
58 |         eta = max(min_lr, initial_lr * (0.85 ** (i // 100)))
59 |         for j in range(t_max):
60 |             x, y = obs_to_state(env, obs)
61 |             if np.random.uniform(0, 1) < eps:  # greedy 贪心算法
62 |                 action = np.random.choice(env.action_space.n)
63 |             else:
64 |                 logits = q_table[x, y, :]
65 |                 logits_exp = np.exp(logits)
66 |                 probs = logits_exp / np.sum(logits_exp)  # 算出三个动作的概率
67 |                 action = np.random.choice(env.action_space.n, p=probs)  # 依概率来选择动作
68 |             obs, reward, done, _ = env.step(action)
69 |             total_reward += reward
70 |             # 更新 q 表
71 |             x_, y_ = obs_to_state(env, obs)
72 |             q_table[x, y, action] = q_table[x, y, action] + eta * (
73 |                     reward + gamma * np.max(q_table[x_, y_, :]) -
74 |                     q_table[x, y, action])
75 |             if done:
76 |                 break
77 |         if i % 100 == 0:
78 |             print('Iteration #%d -- Total reward = %d.' % (i + 1,
79 |                                                            total_reward))
80 |     solution_policy = np.argmax(q_table, axis=2)  # 在 q 表中每个状态下都取最大的值得动作
81 |     solution_policy_scores = [
82 |         run_episode(env, solution_policy, False) for _ in range(100)
83 |     ]
84 |     print("Average score of solution = ", np.mean(solution_policy_scores))
85 |     # Animate it
86 |     run_episode(env, solution_policy, True)
87 | 


--------------------------------------------------------------------------------
/chapter7_RL/open_ai_gym.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {
 6 |     "collapsed": true
 7 |    },
 8 |    "source": [
 9 |     "# Gym 介绍\n",
10 |     "前面我们简单的介绍了强化学习的例子，从这个例子可以发现，构建强化学习的环境非常麻烦，需要耗费我们大量的时间，这个时候我们可以使用一个开源的工具，叫做 gym，是由 open ai 开发的。\n",
11 |     "\n",
12 |     "在这个库中从简单的走格子到毁灭战士，提供了各种各样的游戏环境可以让大家放自己的 AI 进去玩耍。取名叫 gym 也很有意思，可以想象一群 AI 在健身房里各种锻炼，磨练技术。\n",
13 |     "\n",
14 |     "使用起来也非常方便，首先在终端内输入如下代码进行安装。\n",
15 |     "\n",
16 |     "```\n",
17 |     "# Github源\n",
18 |     "git clone https://github.com/openai/gym\n",
19 |     "cd gym\n",
20 |     "pip install -e .[all]\n",
21 |     "\n",
22 |     "# 直接下载gym包\n",
23 |     "pip install gym[all]\n",
24 |     "```\n",
25 |     "\n",
26 |     "我们可以访问这个页面看到 gym 所[包含的环境和介绍](https://github.com/openai/gym/wiki)。"
27 |    ]
28 |   },
29 |   {
30 |    "cell_type": "markdown",
31 |    "metadata": {},
32 |    "source": [
33 |     "在上面的环境页面，可以 gym 内置了很多环境，我们可以使用前面讲过的 q learning 尝试一个 gym 中的小例子，[mountain car](https://github.com/openai/gym/wiki/MountainCar-v0)。在 mounttain car，我们能够观察到环境中小车的位置，也就是坐标，我们能够采取的动作是向左或者向右。\n",
34 |     "\n",
35 |     "为了使用 q learning，我们必须要建立 q 表，而这里的状态空间是连续不可数的，所以我们需要离散化连续空间，将 x 坐标和 y 坐标都平均分成很多份，具体的实现可以运行 `mount-car.py` 看看结果。\n",
36 |     "\n",
37 |     "如果运行完之后，可以看到 q 表的收敛非常慢，reward 一直都很难变化，我们需要很久才能将小车推到终点，这个时候我们需要一个更加强大的武器，那就 deep q network。"
38 |    ]
39 |   }
40 |  ],
41 |  "metadata": {
42 |   "kernelspec": {
43 |    "display_name": "Python 3",
44 |    "language": "python",
45 |    "name": "python3"
46 |   },
47 |   "language_info": {
48 |    "codemirror_mode": {
49 |     "name": "ipython",
50 |     "version": 3
51 |    },
52 |    "file_extension": ".py",
53 |    "mimetype": "text/x-python",
54 |    "name": "python",
55 |    "nbconvert_exporter": "python",
56 |    "pygments_lexer": "ipython3",
57 |    "version": "3.6.3"
58 |   }
59 |  },
60 |  "nbformat": 4,
61 |  "nbformat_minor": 2
62 | }
63 | 


--------------------------------------------------------------------------------
/chapter7_RL/q-learning-intro.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Q Learning 介绍\n",
  8 |     "在增强学习中，有一种很有名的算法，叫做 q-learning，我们下面会从原理入手，然后通过一个简单的小例子讲一讲 q-learning。\n",
  9 |     "\n",
 10 |     "## q-learning 的原理\n",
 11 |     "我们使用一个简单的例子来导入 q-learning，假设一个屋子有 5 个房间，某一些房间之间相连，我们希望能够走出这个房间，示意图如下\n",
 12 |     "\n",
 13 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn70q0n91lj30h40a8aaf.jpg)"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "那么我们可以将其简化成一些节点和图的形式，每个房间作为一个节点，两个房间有门相连，就在两个节点之间连接一条线，可以得到下面的图片\n",
 21 |     "\n",
 22 |     "![](https://ws4.sinaimg.cn/large/006tNc79ly1fn70r6c6koj30h60b2gm0.jpg)"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "为了模拟整个过程，我们放置一个智能体在任何一个房间，希望它能够走出这个房间，也就是说希望其能够走到了 5 号节点。为了能够让智能体知道 5 号节点是目标房间，我们需要设置一些奖励，对于每一条边，我们都关联一个奖励值：直接连到目标房间的边的奖励值设置为 100，其他的边可以设置为 0，注意 5 号房间有一个指向自己的箭头，奖励值也设置为 100，其他直接指向 5 号房间的也设置为 100，这样当智能体到达 5 号房间之后，他就会选择一只待在 5 号房间，这也称为吸收目标，效果如下\n",
 30 |     "\n",
 31 |     "![](https://ws4.sinaimg.cn/large/006tNc79ly1fn71gf4idrj30c207u74i.jpg)"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "想想一下智能体可以不断学习，每次我们将其放在其中一个房间，然后它可以不断探索，根据奖励值走到 5 号房间，也就是走出这个屋子。比如现在这个智能体在 2 号房间，我们希望其能够不断探索走到 5 号房间。\n",
 39 |     "\n",
 40 |     "### 状态和动作\n",
 41 |     "q-learning 中有两个重要的概念，一个是状态，一个是动作，我们将每一个房间都称为一个状态，而智能体从一个房间走到另外一个房间称为一个动作，对应于上面的图就是每个节点是一个状态，每一个箭头都是一种行动。假如智能体处在状态 4，从状态 4 其可以选择走到状态 0，或者状态 3 或者状态 5，如果其走到了状态 3，也可以选择走到状态 2 或者状态 1 或者 状态 4。\n",
 42 |     "\n",
 43 |     "我们可以根据状态和动作得到的奖励来建立一个奖励表，用 -1 表示相应节点之间没有边相连，而没有到达终点的边奖励都记为 0，如下\n",
 44 |     "\n",
 45 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71o8jlinj307t055wek.jpg)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "类似的，我们可以让智能体通过和环境的交互来不断学习环境中的知识，让智能体根据每个状态来估计每种行动可能得到的收益，这个矩阵被称为 Q 表，每一行表示状态，每一列表示不同的动作，对于状态未知的情景，我们可以随机让智能体从任何的位置出发，然后去探索新的环境来尽可能的得到所有的状态。刚开始智能体对于环境一无所知，所以数值全部初始化为 0，如下\n",
 53 |     "\n",
 54 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71t3h3wnj306u053jrf.jpg)\n",
 55 |     "\n",
 56 |     "我们的智能体通过不断地学习来更新 Q 表中的结果，最后依据 Q 表中的值来做决策。"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### Q-learning 算法\n",
 64 |     "有了奖励表和 Q 表，我们需要知道智能体是如何通过学习来更新 Q 表，以便最后能够根据 Q 表进行决策，这个时候就需要讲一讲 Q-learning 的算法。\n",
 65 |     "\n",
 66 |     "Q-learning 的算法特别简单，状态转移公式如下\n",
 67 |     "\n",
 68 |     "$$Q(s, a) = R(s, a) + \\gamma \\mathop{max}_{\\tilde{a}}\\{ Q(\\tilde{s}, \\tilde{a}) \\}$$\n",
 69 |     "\n",
 70 |     "其中 s, a 表示当前的状态和行动，$\\tilde{s}, \\tilde{a}$ 分别表示 s 采取 a 的动作之后的下一个状态和该状态对应所有的行动，参数 $\\gamma$ 是一个常数，$0 \\leq \\gamma \\le 1 $表示对未来奖励的一个衰减程度，形象地比喻就是一个人对于未来的远见程度。\n",
 71 |     "\n",
 72 |     "解释一下就是智能体通过经验进行自主学习，不断从一个状态转移到另外一个状态进行探索，并在这个过程中不断更新 Q 表，直到到达目标位置，Q 表就像智能体的大脑，更新越多就越强。我们称智能体的每一次探索为 episode，每个 episode 都表示智能体从任意初始状态到达目标状态，当智能体到达一个目标状态，那么当前的 episode 结束，进入下一个 episode。"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "下面给出 q-learning 的整个算法流程\n",
 80 |     "- step1 给定参数 $\\gamma$ 和奖励矩阵 R\n",
 81 |     "- step2 令 Q:= 0\n",
 82 |     "- step3 For each episode:\n",
 83 |     "    - 3.1 随机选择一个初始状态 s\n",
 84 |     "    - 3.2 若未到达目标状态，则执行以下几步\n",
 85 |     "        - （1）在当前状态 s 的所有可能行动中选取一个行为 a\n",
 86 |     "        - （2）利用选定的行为 a，得到下一个状态 $\\tilde{s}$\n",
 87 |     "        - （3）按照前面的转移公式计算 Q(s, a)\n",
 88 |     "        - （4）令 $s: = \\tilde{s}$"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "### 单步演示\n",
 96 |     "为了更好地理解 q-learning，我们可以示例其中一步。\n",
 97 |     "\n",
 98 |     "首先选择 $\\gamma = 0.8$，初始状态为 1，Q 初始化为零矩阵\n",
 99 |     "\n",
100 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71t3h3wnj306u053jrf.jpg)\n"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn71o8jlinj307t055wek.jpg)\n",
108 |     "\n",
109 |     "因为是状态 1，所以我们观察 R 矩阵的第二行，负数表示非法行为，所以下一个状态只有两种可能，走到状态 3 或者走到状态 5，随机地，我们可以选择走到状态 5。\n",
110 |     "\n",
111 |     "当我们走到状态 5 之后，会发生什么事情呢？观察 R 矩阵的第 6 行可以发现，其对应于三个可能采取的动作：转至状态 1，4 或者 5，根据上面的转移公式，我们有\n",
112 |     "\n",
113 |     "$$Q(1, 5) = R(1, 5) + 0.8 * max\\{Q(5, 1), Q(5, 4), Q(5, 5)\\} = 100 + 0.8 * max\\{0, 0, 0\\} = 100$$\n",
114 |     "\n",
115 |     "所以现在 Q 矩阵进行了更新，变为了\n",
116 |     "\n",
117 |     "![](https://ws2.sinaimg.cn/large/006tNc79ly1fn8182u6xlj306y04mmx6.jpg)\n",
118 |     "\n",
119 |     "现在我们的状态由 1 变成了 5，因为 5 是最终的目标状态，所以一次 episode 便完成了，进入下一个 episode。\n",
120 |     "\n",
121 |     "在下一个 episode 中又随机选择一个初始状态开始，不断更新 Q 矩阵，在经过了很多个 episode 之后，矩阵 Q 接近收敛，那么我们的智能体就学会了从任意状态转移到目标状态的最优路径。"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "从上面的原理，我们知道了 q-learning 最重要的状态转移公式，这个公式也叫做 Bellman Equation，通过这个公式我们能够不断地进行更新 Q 矩阵，最后得到一个收敛的 Q 矩阵。\n",
129 |     "\n",
130 |     "下面我们通过代码来实现这个过程\n",
131 |     "\n",
132 |     "我们定义一个简单的走迷宫过程，也就是\n",
133 |     "\n",
134 |     "![](https://ws1.sinaimg.cn/large/006tNc79ly1fn82ja4dkwj308d08d3yj.jpg)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "初始位置随机在 state 0， state 1 和 state 2 上，然后希望智能体能够走到 state 3 获得宝藏，上面可行的行动路线已经用箭头标注了"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 1,
147 |    "metadata": {
148 |     "collapsed": true
149 |    },
150 |    "outputs": [],
151 |    "source": [
152 |     "import numpy as np\n",
153 |     "import random"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "下面定义奖励矩阵，一共是 4 行，5 列，每一行分别表示 state 0 到 state 3 这四个状态，每一列分别表示上下左右和静止 5 种状态，奖励矩阵中的 0 表示不可行的路线，比如第一个行，上走和左走都是不可行的路线，都用 0 表示，向下走会走到陷阱，所以使用 -10 表示奖励，向右走和静止都给与 -1 的奖励，因为既没有触发陷阱，也没有到达宝藏，但是过程中浪费了时间。"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 2,
166 |    "metadata": {
167 |     "collapsed": true
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "reward = np.array([[0, -10, 0, -1, -1],\n",
172 |     "                   [0, 10, -1, 0, -1],\n",
173 |     "                   [-1, 0, 0, 10, -10],\n",
174 |     "                   [-1, 0, -10, 0, 10]])"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "markdown",
179 |    "metadata": {},
180 |    "source": [
181 |     "接下来定义一个初始化为 0 的 q 矩阵"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 3,
187 |    "metadata": {
188 |     "collapsed": true
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "q_matrix = np.zeros((4, 5))"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "然后定义一个转移矩阵，也就是从一个状态，采取一个可行的动作之后到达的状态，因为这里的状态和动作都是有限的，所以我们可以将他们存下来，比如第一行表示 state 0，向上和向左都是不可行的路线，所以给 -1 的值表示，向下走到达了 state 2，所以第二个值为 2，向右走到达了 state 1，所以第四个值是 1，保持不同还是在 state 0，所以最后一个标注为 0，另外几行类似。"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 7,
205 |    "metadata": {
206 |     "collapsed": true
207 |    },
208 |    "outputs": [],
209 |    "source": [
210 |     "transition_matrix = np.array([[-1, 2, -1, 1, 0],\n",
211 |     "                              [-1, 3, 0, -1, 1],\n",
212 |     "                              [0, -1, -1, 3, 2],\n",
213 |     "                              [1, -1, 2, -1, 3]])"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "最后定义每个状态的有效行动，比如 state 0 的有效行动就是下、右和静止，对应于 1，3 和 4"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 8,
226 |    "metadata": {
227 |     "collapsed": true
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "valid_actions = np.array([[1, 3, 4],\n",
232 |     "                          [1, 2, 4],\n",
233 |     "                          [0, 3, 4],\n",
234 |     "                          [0, 2, 4]])"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 9,
240 |    "metadata": {
241 |     "collapsed": true
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "# 定义 bellman equation 中的 gamma\n",
246 |     "gamma = 0.8"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "最后开始让智能体与环境交互，不断地使用 bellman 方程来更新 q 矩阵，我们跑 10 个 episode"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 10,
259 |    "metadata": {},
260 |    "outputs": [
261 |     {
262 |      "name": "stdout",
263 |      "output_type": "stream",
264 |      "text": [
265 |       "episode: 0, q matrix: \n",
266 |       "[[  0.   0.   0.  -1.  -1.]\n",
267 |       " [  0.  10.  -1.   0.  -1.]\n",
268 |       " [  0.   0.   0.   0.   0.]\n",
269 |       " [  0.   0.   0.   0.   0.]]\n",
270 |       "\n",
271 |       "episode: 1, q matrix: \n",
272 |       "[[  0.   0.   0.  -1.  -1.]\n",
273 |       " [  0.  10.  -1.   0.  -1.]\n",
274 |       " [  0.   0.   0.  10.   0.]\n",
275 |       " [  0.   0.   0.   0.   0.]]\n",
276 |       "\n",
277 |       "episode: 2, q matrix: \n",
278 |       "[[  0.   -2.    0.    7.    4.6]\n",
279 |       " [  0.   10.    4.6   0.    7. ]\n",
280 |       " [ -1.8   0.    0.   10.   -2. ]\n",
281 |       " [  0.    0.    0.    0.    0. ]]\n",
282 |       "\n",
283 |       "episode: 3, q matrix: \n",
284 |       "[[  0.   -2.    0.    7.    4.6]\n",
285 |       " [  0.   10.    4.6   0.    7. ]\n",
286 |       " [  4.6   0.    0.   10.   -2. ]\n",
287 |       " [  0.    0.    0.    0.    0. ]]\n",
288 |       "\n",
289 |       "episode: 4, q matrix: \n",
290 |       "[[  0.   -2.    0.    7.    4.6]\n",
291 |       " [  0.   10.    4.6   0.    7. ]\n",
292 |       " [  4.6   0.    0.   10.   -2. ]\n",
293 |       " [  0.    0.    0.    0.    0. ]]\n",
294 |       "\n",
295 |       "episode: 5, q matrix: \n",
296 |       "[[  0.   -2.    0.    7.    4.6]\n",
297 |       " [  0.   10.    4.6   0.    7. ]\n",
298 |       " [  4.6   0.    0.   10.   -2. ]\n",
299 |       " [  0.    0.    0.    0.    0. ]]\n",
300 |       "\n",
301 |       "episode: 6, q matrix: \n",
302 |       "[[  0.   -2.    0.    7.    4.6]\n",
303 |       " [  0.   10.    4.6   0.    7. ]\n",
304 |       " [  4.6   0.    0.   10.   -2. ]\n",
305 |       " [  0.    0.    0.    0.    0. ]]\n",
306 |       "\n",
307 |       "episode: 7, q matrix: \n",
308 |       "[[  0.   -2.    0.    7.    4.6]\n",
309 |       " [  0.   10.    4.6   0.    7. ]\n",
310 |       " [  4.6   0.    0.   10.   -2. ]\n",
311 |       " [  0.    0.    0.    0.    0. ]]\n",
312 |       "\n",
313 |       "episode: 8, q matrix: \n",
314 |       "[[  0.   -2.    0.    7.    4.6]\n",
315 |       " [  0.   10.    4.6   0.    7. ]\n",
316 |       " [  4.6   0.    0.   10.   -2. ]\n",
317 |       " [  0.    0.    0.    0.    0. ]]\n",
318 |       "\n",
319 |       "episode: 9, q matrix: \n",
320 |       "[[  0.   -2.    0.    7.    4.6]\n",
321 |       " [  0.   10.    4.6   0.    7. ]\n",
322 |       " [  4.6   0.    0.   10.   -2. ]\n",
323 |       " [  0.    0.    0.    0.    0. ]]\n",
324 |       "\n"
325 |      ]
326 |     }
327 |    ],
328 |    "source": [
329 |     "for i in range(10):\n",
330 |     "    start_state = np.random.choice([0, 1, 2], size=1)[0] # 随机初始起点\n",
331 |     "    current_state = start_state\n",
332 |     "    while current_state != 3: # 判断是否到达终点\n",
333 |     "        action = random.choice(valid_actions[current_state]) # greedy 随机选择当前状态下的有效动作\n",
334 |     "        next_state = transition_matrix[current_state][action] # 通过选择的动作得到下一个状态\n",
335 |     "        future_rewards = []\n",
336 |     "        for action_nxt in valid_actions[next_state]:\n",
337 |     "            future_rewards.append(q_matrix[next_state][action_nxt]) # 得到下一个状态所有可能动作的奖励\n",
338 |     "        q_state = reward[current_state][action] + gamma * max(future_rewards) # bellman equation\n",
339 |     "        q_matrix[current_state][action] = q_state # 更新 q 矩阵\n",
340 |     "        current_state = next_state # 将下一个状态变成当前状态\n",
341 |     "        \n",
342 |     "    print('episode: {}, q matrix: \\n{}'.format(i, q_matrix))\n",
343 |     "    print()"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {
349 |     "collapsed": true
350 |    },
351 |    "source": [
352 |     "可以看到在第一次 episode 之后，智能体就学会了在 state 2 的时候向下走能够得到奖励，通过不断地学习，在 10 个 episode 之后，智能体知道，在 state 0，向右走能得到奖励，在 state 1 向下走能够得到奖励，在 state 3 向右 走能得到奖励，这样在这个环境中任何一个状态智能体都能够知道如何才能够最快地到达宝藏的位置\n",
353 |     "\n",
354 |     "从上面的例子我们简单的演示了 q-learning，可以看出自己来构建整个环境是非常麻烦的，所以我们可以通过一些第三方库来帮我们搭建强化学习的环境，其中最有名的就是 open-ai 的 gym 模块，下一章我们将介绍一下 gym。"
355 |    ]
356 |   }
357 |  ],
358 |  "metadata": {
359 |   "kernelspec": {
360 |    "display_name": "Python 3",
361 |    "language": "python",
362 |    "name": "python3"
363 |   },
364 |   "language_info": {
365 |    "codemirror_mode": {
366 |     "name": "ipython",
367 |     "version": 3
368 |    },
369 |    "file_extension": ".py",
370 |    "mimetype": "text/x-python",
371 |    "name": "python",
372 |    "nbconvert_exporter": "python",
373 |    "pygments_lexer": "ipython3",
374 |    "version": "3.6.3"
375 |   }
376 |  },
377 |  "nbformat": 4,
378 |  "nbformat_minor": 2
379 | }
380 | 


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_1/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_1/1.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_1/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_1/2.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_1/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_1/3.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_2/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_2/10.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_2/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_2/11.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_2/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_2/12.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_3/16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_3/16.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_3/17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_3/17.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/example_data/image/class_3/18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter8_PyTorch-Advances/example_data/image/class_3/18.png


--------------------------------------------------------------------------------
/chapter8_PyTorch-Advances/tensorboard.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# TensorBoard 可视化\n",
  8 |     "[github](https://github.com/lanpa/tensorboard-pytorch)"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {
 15 |     "ExecuteTime": {
 16 |      "end_time": "2017-12-24T09:39:39.910789Z",
 17 |      "start_time": "2017-12-24T09:39:39.398570Z"
 18 |     },
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import sys\n",
 24 |     "sys.path.append('..')\n",
 25 |     "\n",
 26 |     "import numpy as np\n",
 27 |     "import torch\n",
 28 |     "from torch import nn\n",
 29 |     "import torch.nn.functional as F\n",
 30 |     "from torch.autograd import Variable\n",
 31 |     "from torchvision.datasets import CIFAR10\n",
 32 |     "from utils import resnet\n",
 33 |     "from torchvision import transforms as tfs\n",
 34 |     "from datetime import datetime\n",
 35 |     "from tensorboardX import SummaryWriter"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {
 42 |     "ExecuteTime": {
 43 |      "end_time": "2017-12-24T09:39:41.981293Z",
 44 |      "start_time": "2017-12-24T09:39:40.621895Z"
 45 |     },
 46 |     "collapsed": true
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# 使用数据增强\n",
 51 |     "def train_tf(x):\n",
 52 |     "    im_aug = tfs.Compose([\n",
 53 |     "        tfs.Resize(120),\n",
 54 |     "        tfs.RandomHorizontalFlip(),\n",
 55 |     "        tfs.RandomCrop(96),\n",
 56 |     "        tfs.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),\n",
 57 |     "        tfs.ToTensor(),\n",
 58 |     "        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n",
 59 |     "    ])\n",
 60 |     "    x = im_aug(x)\n",
 61 |     "    return x\n",
 62 |     "\n",
 63 |     "def test_tf(x):\n",
 64 |     "    im_aug = tfs.Compose([\n",
 65 |     "        tfs.Resize(96),\n",
 66 |     "        tfs.ToTensor(),\n",
 67 |     "        tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])\n",
 68 |     "    ])\n",
 69 |     "    x = im_aug(x)\n",
 70 |     "    return x\n",
 71 |     "\n",
 72 |     "train_set = CIFAR10('./data', train=True, transform=train_tf)\n",
 73 |     "train_data = torch.utils.data.DataLoader(train_set, batch_size=256, shuffle=True, num_workers=4)\n",
 74 |     "valid_set = CIFAR10('./data', train=False, transform=test_tf)\n",
 75 |     "valid_data = torch.utils.data.DataLoader(valid_set, batch_size=256, shuffle=False, num_workers=4)\n",
 76 |     "\n",
 77 |     "net = resnet(3, 10)\n",
 78 |     "optimizer = torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-4)\n",
 79 |     "criterion = nn.CrossEntropyLoss()"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 3,
 85 |    "metadata": {
 86 |     "ExecuteTime": {
 87 |      "end_time": "2017-12-24T09:53:40.434024Z",
 88 |      "start_time": "2017-12-24T09:39:41.984480Z"
 89 |     },
 90 |     "collapsed": false
 91 |    },
 92 |    "outputs": [
 93 |     {
 94 |      "name": "stdout",
 95 |      "output_type": "stream",
 96 |      "text": [
 97 |       "Epoch 0. Train Loss: 1.877906, Train Acc: 0.315410, Valid Loss: 2.198587, Valid Acc: 0.293164, Time 00:00:26\n",
 98 |       "Epoch 1. Train Loss: 1.398501, Train Acc: 0.498657, Valid Loss: 1.877540, Valid Acc: 0.400098, Time 00:00:27\n",
 99 |       "Epoch 2. Train Loss: 1.141419, Train Acc: 0.597628, Valid Loss: 1.872355, Valid Acc: 0.446777, Time 00:00:27\n",
100 |       "Epoch 3. Train Loss: 0.980048, Train Acc: 0.658367, Valid Loss: 1.672951, Valid Acc: 0.475391, Time 00:00:27\n",
101 |       "Epoch 4. Train Loss: 0.871448, Train Acc: 0.695073, Valid Loss: 1.263234, Valid Acc: 0.578613, Time 00:00:28\n",
102 |       "Epoch 5. Train Loss: 0.794649, Train Acc: 0.723992, Valid Loss: 2.142715, Valid Acc: 0.466699, Time 00:00:27\n",
103 |       "Epoch 6. Train Loss: 0.736611, Train Acc: 0.741554, Valid Loss: 1.701331, Valid Acc: 0.500391, Time 00:00:27\n",
104 |       "Epoch 7. Train Loss: 0.695095, Train Acc: 0.756816, Valid Loss: 1.385478, Valid Acc: 0.597656, Time 00:00:28\n",
105 |       "Epoch 8. Train Loss: 0.652659, Train Acc: 0.773796, Valid Loss: 1.029726, Valid Acc: 0.676465, Time 00:00:27\n",
106 |       "Epoch 9. Train Loss: 0.623829, Train Acc: 0.784144, Valid Loss: 0.933388, Valid Acc: 0.682520, Time 00:00:27\n",
107 |       "Epoch 10. Train Loss: 0.581615, Train Acc: 0.798792, Valid Loss: 1.291557, Valid Acc: 0.635938, Time 00:00:27\n",
108 |       "Epoch 11. Train Loss: 0.559358, Train Acc: 0.805708, Valid Loss: 1.430408, Valid Acc: 0.586426, Time 00:00:28\n",
109 |       "Epoch 12. Train Loss: 0.534197, Train Acc: 0.816853, Valid Loss: 0.960802, Valid Acc: 0.704785, Time 00:00:27\n",
110 |       "Epoch 13. Train Loss: 0.512111, Train Acc: 0.822389, Valid Loss: 0.923353, Valid Acc: 0.716602, Time 00:00:27\n",
111 |       "Epoch 14. Train Loss: 0.494577, Train Acc: 0.828225, Valid Loss: 1.023517, Valid Acc: 0.687207, Time 00:00:27\n",
112 |       "Epoch 15. Train Loss: 0.473396, Train Acc: 0.835212, Valid Loss: 0.842679, Valid Acc: 0.727930, Time 00:00:27\n",
113 |       "Epoch 16. Train Loss: 0.459708, Train Acc: 0.840290, Valid Loss: 0.826854, Valid Acc: 0.726953, Time 00:00:28\n",
114 |       "Epoch 17. Train Loss: 0.433836, Train Acc: 0.847931, Valid Loss: 0.730658, Valid Acc: 0.764258, Time 00:00:27\n",
115 |       "Epoch 18. Train Loss: 0.422375, Train Acc: 0.854401, Valid Loss: 0.677953, Valid Acc: 0.778125, Time 00:00:27\n",
116 |       "Epoch 19. Train Loss: 0.410208, Train Acc: 0.857370, Valid Loss: 0.787286, Valid Acc: 0.754102, Time 00:00:27\n",
117 |       "Epoch 20. Train Loss: 0.395556, Train Acc: 0.862923, Valid Loss: 0.859754, Valid Acc: 0.738965, Time 00:00:27\n",
118 |       "Epoch 21. Train Loss: 0.382050, Train Acc: 0.866554, Valid Loss: 1.266704, Valid Acc: 0.651660, Time 00:00:27\n",
119 |       "Epoch 22. Train Loss: 0.368614, Train Acc: 0.871213, Valid Loss: 0.912465, Valid Acc: 0.738672, Time 00:00:27\n",
120 |       "Epoch 23. Train Loss: 0.358302, Train Acc: 0.873964, Valid Loss: 0.963238, Valid Acc: 0.706055, Time 00:00:27\n",
121 |       "Epoch 24. Train Loss: 0.347568, Train Acc: 0.879620, Valid Loss: 0.777171, Valid Acc: 0.751855, Time 00:00:27\n",
122 |       "Epoch 25. Train Loss: 0.339247, Train Acc: 0.882215, Valid Loss: 0.707863, Valid Acc: 0.777734, Time 00:00:27\n",
123 |       "Epoch 26. Train Loss: 0.329292, Train Acc: 0.885830, Valid Loss: 0.682976, Valid Acc: 0.790527, Time 00:00:27\n",
124 |       "Epoch 27. Train Loss: 0.313049, Train Acc: 0.890761, Valid Loss: 0.665912, Valid Acc: 0.795410, Time 00:00:27\n",
125 |       "Epoch 28. Train Loss: 0.305482, Train Acc: 0.891944, Valid Loss: 0.880263, Valid Acc: 0.743848, Time 00:00:27\n",
126 |       "Epoch 29. Train Loss: 0.301507, Train Acc: 0.895289, Valid Loss: 1.062325, Valid Acc: 0.708398, Time 00:00:27\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "writer = SummaryWriter()\n",
132 |     "\n",
133 |     "def get_acc(output, label):\n",
134 |     "    total = output.shape[0]\n",
135 |     "    _, pred_label = output.max(1)\n",
136 |     "    num_correct = (pred_label == label).sum().data[0]\n",
137 |     "    return num_correct / total\n",
138 |     "\n",
139 |     "if torch.cuda.is_available():\n",
140 |     "    net = net.cuda()\n",
141 |     "prev_time = datetime.now()\n",
142 |     "for epoch in range(30):\n",
143 |     "    train_loss = 0\n",
144 |     "    train_acc = 0\n",
145 |     "    net = net.train()\n",
146 |     "    for im, label in train_data:\n",
147 |     "        if torch.cuda.is_available():\n",
148 |     "            im = Variable(im.cuda())  # (bs, 3, h, w)\n",
149 |     "            label = Variable(label.cuda())  # (bs, h, w)\n",
150 |     "        else:\n",
151 |     "            im = Variable(im)\n",
152 |     "            label = Variable(label)\n",
153 |     "        # forward\n",
154 |     "        output = net(im)\n",
155 |     "        loss = criterion(output, label)\n",
156 |     "        # backward\n",
157 |     "        optimizer.zero_grad()\n",
158 |     "        loss.backward()\n",
159 |     "        optimizer.step()\n",
160 |     "\n",
161 |     "        train_loss += loss.data[0]\n",
162 |     "        train_acc += get_acc(output, label)\n",
163 |     "    cur_time = datetime.now()\n",
164 |     "    h, remainder = divmod((cur_time - prev_time).seconds, 3600)\n",
165 |     "    m, s = divmod(remainder, 60)\n",
166 |     "    time_str = \"Time %02d:%02d:%02d\" % (h, m, s)\n",
167 |     "    valid_loss = 0\n",
168 |     "    valid_acc = 0\n",
169 |     "    net = net.eval()\n",
170 |     "    for im, label in valid_data:\n",
171 |     "        if torch.cuda.is_available():\n",
172 |     "            im = Variable(im.cuda(), volatile=True)\n",
173 |     "            label = Variable(label.cuda(), volatile=True)\n",
174 |     "        else:\n",
175 |     "            im = Variable(im, volatile=True)\n",
176 |     "            label = Variable(label, volatile=True)\n",
177 |     "        output = net(im)\n",
178 |     "        loss = criterion(output, label)\n",
179 |     "        valid_loss += loss.data[0]\n",
180 |     "        valid_acc += get_acc(output, label)\n",
181 |     "    epoch_str = (\n",
182 |     "                \"Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, \"\n",
183 |     "                % (epoch, train_loss / len(train_data),\n",
184 |     "                   train_acc / len(train_data), valid_loss / len(valid_data),\n",
185 |     "                   valid_acc / len(valid_data)))\n",
186 |     "    prev_time = cur_time\n",
187 |     "    # ====================== 使用 tensorboard ==================\n",
188 |     "    writer.add_scalars('Loss', {'train': train_loss / len(train_data),\n",
189 |     "                                'valid': valid_loss / len(valid_data)}, epoch)\n",
190 |     "    writer.add_scalars('Acc', {'train': train_acc / len(train_data),\n",
191 |     "                               'valid': valid_acc / len(valid_data)}, epoch)\n",
192 |     "    # =========================================================\n",
193 |     "    print(epoch_str + time_str)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "![](https://ws1.sinaimg.cn/large/006tNc79ly1fms31s3i4yj31gc0qimy6.jpg)"
201 |    ]
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "kernelspec": {
206 |    "display_name": "mx",
207 |    "language": "python",
208 |    "name": "mx"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 3
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython3",
220 |    "version": "3.6.0"
221 |   }
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 2
225 | }
226 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/README.md:
--------------------------------------------------------------------------------
1 | # Deep-Dream
2 | PyTorch implement of Google Deep Dream
3 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/backward/backward.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | 
 4 | # simple gradient
 5 | a = Variable(torch.FloatTensor([2, 3]), requires_grad=True)
 6 | b = a + 3
 7 | c = b * b * 3
 8 | out = c.mean()
 9 | out.backward()
10 | print('*' * 10)
11 | print('=====simple gradient======')
12 | print('input')
13 | print(a.data)
14 | print('compute result is')
15 | print(out.data[0])
16 | print('input gradients are')
17 | print(a.grad.data)
18 | 
19 | # backward on non-scalar output
20 | m = Variable(torch.FloatTensor([[2, 3]]), requires_grad=True)
21 | n = Variable(torch.zeros(1, 2))
22 | n[0, 0] = m[0, 0]**2
23 | n[0, 1] = m[0, 1]**3
24 | n.backward(torch.FloatTensor([[1, 1]]))
25 | print('*' * 10)
26 | print('=====non scalar output======')
27 | print('input')
28 | print(m.data)
29 | print('input gradients are')
30 | print(m.grad.data)
31 | 
32 | # jacobian
33 | j = torch.zeros(2, 2)
34 | k = Variable(torch.zeros(1, 2))
35 | m.grad.data.zero_()
36 | k[0, 0] = m[0, 0]**2 + 3 * m[0, 1]
37 | k[0, 1] = m[0, 1]**2 + 2 * m[0, 0]
38 | k.backward(torch.FloatTensor([[1, 0]]), retain_variables=True)
39 | j[:, 0] = m.grad.data
40 | m.grad.data.zero_()
41 | k.backward(torch.FloatTensor([[0, 1]]))
42 | j[:, 1] = m.grad.data
43 | print('jacobian matrix is')
44 | print(j)
45 | 
46 | # compute jacobian matrix
47 | x = torch.FloatTensor([2, 1]).view(1, 2)
48 | x = Variable(x, requires_grad=True)
49 | y = Variable(torch.FloatTensor([[1, 2], [3, 4]]))
50 | 
51 | z = torch.mm(x, y)
52 | jacobian = torch.zeros((2, 2))
53 | z.backward(
54 |     torch.FloatTensor([[1, 0]]), retain_variables=True)  # dz1/dx1, dz2/dx1
55 | jacobian[:, 0] = x.grad.data
56 | x.grad.data.zero_()
57 | z.backward(torch.FloatTensor([[0, 1]]))  # dz1/dx2, dz2/dx2
58 | jacobian[:, 1] = x.grad.data
59 | print('=========jacobian========')
60 | print('x')
61 | print(x.data)
62 | print('y')
63 | print(y.data)
64 | print('compute result')
65 | print(z.data)
66 | print('jacobian matrix is')
67 | print(jacobian)


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/deepdream.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from util import showtensor
 4 | import scipy.ndimage as nd
 5 | from torch.autograd import Variable
 6 | 
 7 | 
 8 | def objective_L2(dst, guide_features):
 9 |     return dst.data
10 | 
11 | 
12 | def make_step(img, model, control=None, distance=objective_L2):
13 |     mean = np.array([0.485, 0.456, 0.406]).reshape([3, 1, 1])
14 |     std = np.array([0.229, 0.224, 0.225]).reshape([3, 1, 1])
15 | 
16 |     learning_rate = 2e-2
17 |     max_jitter = 32
18 |     num_iterations = 20
19 |     show_every = 10
20 |     end_layer = 3
21 |     guide_features = control
22 | 
23 |     for i in range(num_iterations):
24 |         shift_x, shift_y = np.random.randint(-max_jitter, max_jitter + 1, 2)
25 |         img = np.roll(np.roll(img, shift_x, -1), shift_y, -2)
26 |         # apply jitter shift
27 |         model.zero_grad()
28 |         img_tensor = torch.Tensor(img)
29 |         if torch.cuda.is_available():
30 |             img_variable = Variable(img_tensor.cuda(), requires_grad=True)
31 |         else:
32 |             img_variable = Variable(img_tensor, requires_grad=True)
33 | 
34 |         act_value = model.forward(img_variable, end_layer)
35 |         diff_out = distance(act_value, guide_features)
36 |         act_value.backward(diff_out)
37 |         ratio = np.abs(img_variable.grad.data.cpu().numpy()).mean()
38 |         learning_rate_use = learning_rate / ratio
39 |         img_variable.data.add_(img_variable.grad.data * learning_rate_use)
40 |         img = img_variable.data.cpu().numpy()  # b, c, h, w
41 |         img = np.roll(np.roll(img, -shift_x, -1), -shift_y, -2)
42 |         img[0, :, :, :] = np.clip(img[0, :, :, :], -mean / std,
43 |                                   (1 - mean) / std)
44 |         if i == 0 or (i + 1) % show_every == 0:
45 |             showtensor(img)
46 |     return img
47 | 
48 | 
49 | def dream(model,
50 |           base_img,
51 |           octave_n=6,
52 |           octave_scale=1.4,
53 |           control=None,
54 |           distance=objective_L2):
55 |     octaves = [base_img]
56 |     for i in range(octave_n - 1):
57 |         octaves.append(
58 |             nd.zoom(
59 |                 octaves[-1], (1, 1, 1.0 / octave_scale, 1.0 / octave_scale),
60 |                 order=1))
61 | 
62 |     detail = np.zeros_like(octaves[-1])
63 |     for octave, octave_base in enumerate(octaves[::-1]):
64 |         h, w = octave_base.shape[-2:]
65 |         if octave > 0:
66 |             h1, w1 = detail.shape[-2:]
67 |             detail = nd.zoom(
68 |                 detail, (1, 1, 1.0 * h / h1, 1.0 * w / w1), order=1)
69 | 
70 |         input_oct = octave_base + detail
71 |         print(input_oct.shape)
72 |         out = make_step(input_oct, model, control, distance=distance)
73 |         detail = out - octave_base
74 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/guide_image/flower.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/guide_image/flower.jpg


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/guide_image/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/guide_image/input.png


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/guide_image/kitten.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/guide_image/kitten.jpg


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/resnet.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'SherlockLiao'
 2 | 
 3 | import torch
 4 | from torch import nn
 5 | from torchvision import models
 6 | import torch.utils.model_zoo as model_zoo
 7 | 
 8 | 
 9 | model_urls = {
10 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
11 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
12 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
13 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
14 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
15 | }
16 | 
17 | 
18 | class Bottleneck(nn.Module):
19 |     expansion = 4
20 | 
21 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
22 |         super(Bottleneck, self).__init__()
23 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
24 |         self.bn1 = nn.BatchNorm2d(planes)
25 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
26 |                                padding=1, bias=False)
27 |         self.bn2 = nn.BatchNorm2d(planes)
28 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
29 |         self.bn3 = nn.BatchNorm2d(planes * 4)
30 |         self.relu = nn.ReLU(inplace=True)
31 |         self.downsample = downsample
32 |         self.stride = stride
33 | 
34 |     def forward(self, x):
35 |         residual = x
36 | 
37 |         out = self.conv1(x)
38 |         out = self.bn1(out)
39 |         out = self.relu(out)
40 | 
41 |         out = self.conv2(out)
42 |         out = self.bn2(out)
43 |         out = self.relu(out)
44 | 
45 |         out = self.conv3(out)
46 |         out = self.bn3(out)
47 | 
48 |         if self.downsample is not None:
49 |             residual = self.downsample(x)
50 | 
51 |         out += residual
52 |         out = self.relu(out)
53 | 
54 |         return out
55 | 
56 | 
57 | class CustomResNet(models.resnet.ResNet):
58 |     def forward(self, x, end_layer):
59 |         """
60 |         end_layer range from 1 to 4
61 |         """
62 |         x = self.conv1(x)
63 |         x = self.bn1(x)
64 |         x = self.relu(x)
65 |         x = self.maxpool(x)
66 | 
67 |         layers = [self.layer1, self.layer2, self.layer3, self.layer4]
68 |         for i in range(end_layer):
69 |             x = layers[i](x)
70 |         return x
71 | 
72 | 
73 | def resnet50(pretrained=False, **kwargs):
74 |     model = CustomResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
75 |     if pretrained:
76 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
77 |     return model
78 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/sky.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/Deep-Dream/sky.jpg


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/Deep-Dream/util.py:
--------------------------------------------------------------------------------
 1 | import PIL.Image
 2 | from io import BytesIO
 3 | from IPython.display import clear_output, Image, display
 4 | import numpy as np
 5 | 
 6 | 
 7 | def showarray(a, fmt='jpeg'):
 8 |     a = np.uint8(np.clip(a, 0, 255))
 9 |     f = BytesIO()
10 |     PIL.Image.fromarray(a).save(f, fmt)
11 |     display(Image(data=f.getvalue()))
12 | 
13 | 
14 | def showtensor(a):
15 |     mean = np.array([0.485, 0.456, 0.406]).reshape([1, 1, 3])
16 |     std = np.array([0.229, 0.224, 0.225]).reshape([1, 1, 3])
17 |     inp = a[0, :, :, :]
18 |     inp = inp.transpose(1, 2, 0)
19 |     inp = std * inp + mean
20 |     inp *= 255
21 |     showarray(inp)
22 |     clear_output(wait=True)
23 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/fine_tune/READMD.md:
--------------------------------------------------------------------------------
 1 | ## Fine Tune 教程
 2 | 
 3 | ### Requirements
 4 | 
 5 | [PyTorch 0.3](http://pytorch.org/)
 6 | 
 7 | [MxTorch](https://github.com/SherlockLiao/mxtorch)
 8 | 
 9 | [tensorboardX](https://github.com/lanpa/tensorboard-pytorch)
10 | 
11 | 按照 pytorch 官网安装 pytorch，将 mxtorch 下载下来，放到根目录，安装 tensorboardX 实现 tensorboard 可视化
12 | 
13 | ```bash
14 | \fine_tune
15 | 	\mxtorch
16 | 	\hymenoptera_data
17 | 		\train
18 | 		\val
19 | 	\checkpoints
20 | 	config.py
21 | 	main.py
22 | 	get_data.sh
23 | ```
24 | 
25 | 
26 | 
27 | ### 下载数据
28 | 
29 | 打开终端，运行 bash 脚本来获取数据
30 | 
31 | ```bash
32 | bash get_data.sh
33 | ```
34 | 
35 | 
36 | 
37 | ### 训练模型
38 | 
39 | 所有的配置文件都放在 config.py 里面，通过下面的代码来训练模型
40 | 
41 | ```bash
42 | python main.py train
43 | ```
44 | 
45 | 也可以在终端修改配置，比如改变 epochs 和 batch_size
46 | 
47 | ```bash
48 | python main.py train \ 
49 | 	--max_epochs=100 \
50 | 	--batch_size=16
51 | ```
52 | 
53 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/fine_tune/config.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author: xyliao
 4 | @contact: xyliao1993@qq.com
 5 | """
 6 | import warnings
 7 | from pprint import pprint
 8 | 
 9 | 
10 | class DefaultConfig(object):
11 |     model = 'resnet50'
12 |     # Dataset.
13 |     train_data_path = './hymenoptera_data/train/'
14 |     test_data_path = './hymenoptera_data/val/'
15 | 
16 |     # Store result and save models.
17 |     # result_file = 'result.txt'
18 |     save_file = './checkpoints/'
19 |     save_freq = 30  # save model every N epochs
20 |     save_best = True  # If save best test metric model.
21 | 
22 |     # Visualization results on tensorboard.
23 |     # vis_dir = './vis/'
24 |     plot_freq = 100  # plot in tensorboard every N iterations
25 | 
26 |     # Model hyperparameters.
27 |     use_gpu = True  # use GPU or not
28 |     ctx = 0  # running on which cuda device
29 |     batch_size = 64  # batch size
30 |     num_workers = 4  # how many workers for loading data
31 |     max_epoch = 30
32 |     lr = 1e-2  # initial learning rate
33 |     momentum = 0
34 |     weight_decay = 1e-4
35 |     lr_decay = 0.95
36 |     # lr_decay_freq = 10
37 | 
38 |     def _parse(self, kwargs):
39 |         for k, v in kwargs.items():
40 |             if not hasattr(self, k):
41 |                 warnings.warn("Warning: opt has not attribut %s" % k)
42 |             setattr(self, k, v)
43 | 
44 |         print('=========user config==========')
45 |         pprint(self._state_dict())
46 |         print('============end===============')
47 | 
48 |     def _state_dict(self):
49 |         return {k: getattr(self, k) for k, _ in DefaultConfig.__dict__.items()
50 |                 if not k.startswith('_')}
51 | 
52 | 
53 | opt = DefaultConfig()
54 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/fine_tune/get_data.sh:
--------------------------------------------------------------------------------
1 | # Download data.
2 | wget https://download.pytorch.org/tutorial/hymenoptera_data.zip
3 | 
4 | unzip hymenoptera_data.zip


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/fine_tune/main.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author: xyliao
  4 | @contact: xyliao1993@qq.com
  5 | """
  6 | import copy
  7 | 
  8 | import torch
  9 | from config import opt
 10 | from mxtorch import meter
 11 | from mxtorch import transforms as tfs
 12 | from mxtorch.trainer import *
 13 | from mxtorch.vision import model_zoo
 14 | from torch import nn
 15 | from torch.autograd import Variable
 16 | from torch.utils.data import DataLoader
 17 | from torchvision.datasets import ImageFolder
 18 | from tqdm import tqdm
 19 | 
 20 | train_tf = tfs.Compose([
 21 |     tfs.RandomResizedCrop(224),
 22 |     tfs.RandomHorizontalFlip(),
 23 |     tfs.ToTensor(),
 24 |     tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 25 | ])
 26 | 
 27 | 
 28 | def test_tf(img):
 29 |     img = tfs.Resize(256)(img)
 30 |     img, _ = tfs.CenterCrop(224)(img)
 31 |     normalize = tfs.Compose([
 32 |         tfs.ToTensor(),
 33 |         tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 34 |     ])
 35 |     img = normalize(img)
 36 |     return img
 37 | 
 38 | 
 39 | def get_train_data():
 40 |     train_set = ImageFolder(opt.train_data_path, train_tf)
 41 |     return DataLoader(
 42 |         train_set, opt.batch_size, True, num_workers=opt.num_workers)
 43 | 
 44 | 
 45 | def get_test_data():
 46 |     test_set = ImageFolder(opt.test_data_path, test_tf)
 47 |     return DataLoader(
 48 |         test_set, opt.batch_size, True, num_workers=opt.num_workers)
 49 | 
 50 | 
 51 | def get_model():
 52 |     model = model_zoo.resnet50(pretrained=True)
 53 |     model.fc = nn.Linear(2048, 2)
 54 |     if opt.use_gpu:
 55 |         model = model.cuda(opt.ctx)
 56 |     return model
 57 | 
 58 | 
 59 | def get_loss(score, label):
 60 |     return nn.CrossEntropyLoss()(score, label)
 61 | 
 62 | 
 63 | def get_optimizer(model):
 64 |     optimizer = torch.optim.SGD(
 65 |         model.parameters(),
 66 |         lr=opt.lr,
 67 |         momentum=opt.momentum,
 68 |         weight_decay=opt.weight_decay)
 69 |     return ScheduledOptim(optimizer)
 70 | 
 71 | 
 72 | class FineTuneTrainer(Trainer):
 73 |     def __init__(self):
 74 |         model = get_model()
 75 |         criterion = get_loss
 76 |         optimizer = get_optimizer(model)
 77 |         super().__init__(model, criterion, optimizer)
 78 | 
 79 |         self.metric_meter['loss'] = meter.AverageValueMeter()
 80 |         self.metric_meter['acc'] = meter.AverageValueMeter()
 81 | 
 82 |     def train(self, kwargs):
 83 |         self.reset_meter()
 84 |         self.model.train()
 85 |         train_data = kwargs['train_data']
 86 |         for data in tqdm(train_data):
 87 |             img, label = data
 88 |             if opt.use_gpu:
 89 |                 img = img.cuda(opt.ctx)
 90 |                 label = label.cuda(opt.ctx)
 91 |             img = Variable(img)
 92 |             label = Variable(label)
 93 | 
 94 |             # Forward.
 95 |             score = self.model(img)
 96 |             loss = self.criterion(score, label)
 97 | 
 98 |             # Backward.
 99 |             self.optimizer.zero_grad()
100 |             loss.backward()
101 |             self.optimizer.step()
102 | 
103 |             # Update meters.
104 |             acc = (score.max(1)[1] == label).float().mean()
105 |             self.metric_meter['loss'].add(loss.data[0])
106 |             self.metric_meter['acc'].add(acc.data[0])
107 | 
108 |             # Update to tensorboard.
109 |             if (self.n_iter + 1) % opt.plot_freq == 0:
110 |                 self.writer.add_scalars(
111 |                     'loss', {'train': self.metric_meter['loss'].value()[0]},
112 |                     self.n_plot)
113 |                 self.writer.add_scalars(
114 |                     'acc', {'train': self.metric_meter['acc'].value()[0]},
115 |                     self.n_plot)
116 |                 self.n_plot += 1
117 |             self.n_iter += 1
118 | 
119 |         # Log the train metric dict to print result.
120 |         self.metric_log['train loss'] = self.metric_meter['loss'].value()[0]
121 |         self.metric_log['train acc'] = self.metric_meter['acc'].value()[0]
122 | 
123 |     def test(self, kwargs):
124 |         self.reset_meter()
125 |         self.model.eval()
126 |         test_data = kwargs['test_data']
127 |         for data in tqdm(test_data):
128 |             img, label = data
129 |             if opt.use_gpu:
130 |                 img = img.cuda(opt.ctx)
131 |                 label = label.cuda(opt.ctx)
132 |             img = Variable(img, volatile=True)
133 |             label = Variable(label, volatile=True)
134 | 
135 |             score = self.model(img)
136 |             loss = self.criterion(score, label)
137 |             acc = (score.max(1)[1] == label).float().mean()
138 | 
139 |             self.metric_meter['loss'].add(loss.data[0])
140 |             self.metric_meter['acc'].add(acc.data[0])
141 | 
142 |         # Update to tensorboard.
143 |         self.writer.add_scalars('loss',
144 |                                 {'test': self.metric_meter['loss'].value()[0]},
145 |                                 self.n_plot)
146 |         self.writer.add_scalars(
147 |             'acc', {'test': self.metric_meter['acc'].value()[0]}, self.n_plot)
148 |         self.n_plot += 1
149 | 
150 |         # Log the test metric to dict.
151 |         self.metric_log['test loss'] = self.metric_meter['loss'].value()[0]
152 |         self.metric_log['test acc'] = self.metric_meter['acc'].value()[0]
153 | 
154 |     def get_best_model(self):
155 |         if self.metric_log['test loss'] < self.best_metric:
156 |             self.best_model = copy.deepcopy(self.model.state_dict())
157 |             self.best_metric = self.metric_log['test loss']
158 | 
159 | 
160 | def train(**kwargs):
161 |     opt._parse(kwargs)
162 | 
163 |     train_data = get_train_data()
164 |     test_data = get_test_data()
165 | 
166 |     fine_tune_trainer = FineTuneTrainer()
167 |     fine_tune_trainer.fit(train_data=train_data, test_data=test_data)
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     import fire
172 | 
173 |     fire.Fire()
174 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/kaggle_dog_vs_cat/README.md:
--------------------------------------------------------------------------------
1 | # kaggle competition
2 | ## dog vs cat
3 | 
4 | This is my first competition in Kaggle.
5 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/kaggle_dog_vs_cat/model/dataset.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'SherlockLiao'
 2 | 
 3 | import torch
 4 | from torch.utils.data import Dataset
 5 | import h5py
 6 | 
 7 | 
 8 | class h5Dataset(Dataset):
 9 | 
10 |     def __init__(self, h5py_list):
11 |         label_file = h5py.File(h5py_list[0], 'r')
12 |         self.label = torch.from_numpy(label_file['label'].value)
13 |         self.nSamples = self.label.size(0)
14 |         temp_dataset = torch.FloatTensor()
15 |         for file in h5py_list:
16 |             h5_file = h5py.File(file, 'r')
17 |             dataset = torch.from_numpy(h5_file['data'].value)
18 |             temp_dataset = torch.cat((temp_dataset, dataset), 1)
19 | 
20 |         self.dataset = temp_dataset
21 | 
22 |     def __len__(self):
23 |         return self.nSamples
24 | 
25 |     def __getitem__(self, index):
26 |         assert index < len(self), 'index range error'
27 |         data = self.dataset[index]
28 |         label = self.label[index]
29 |         return (data, label)
30 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/kaggle_dog_vs_cat/model/feature_extraction.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'SherlockLiao'
  2 | 
  3 | import os
  4 | from tqdm import tqdm
  5 | import h5py
  6 | import numpy as np
  7 | import argparse
  8 | 
  9 | import torch
 10 | from torchvision import models, transforms
 11 | from torch import optim, nn
 12 | from torch.autograd import Variable
 13 | from torchvision.datasets import ImageFolder
 14 | from torch.utils.data import DataLoader
 15 | from net import feature_net, classifier
 16 | 
 17 | parse = argparse.ArgumentParser()
 18 | parse.add_argument(
 19 |     '--model', required=True, help='vgg, inceptionv3, resnet152')
 20 | parse.add_argument('--bs', type=int, default=32)
 21 | parse.add_argument('--phase', required=True, help='train, val')
 22 | opt = parse.parse_args()
 23 | print(opt)
 24 | 
 25 | img_transform = transforms.Compose([
 26 |     transforms.Scale(320),
 27 |     transforms.CenterCrop(299),
 28 |     transforms.ToTensor(),
 29 |     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 30 | ])
 31 | 
 32 | root = '/media/sherlock/Files/kaggle_dog_vs_cat/data'
 33 | data_folder = {
 34 |     'train': ImageFolder(os.path.join(root, 'train'), transform=img_transform),
 35 |     'val': ImageFolder(os.path.join(root, 'val'), transform=img_transform)
 36 | }
 37 | 
 38 | # define dataloader to load images
 39 | batch_size = opt.bs
 40 | dataloader = {
 41 |     'train':
 42 |     DataLoader(
 43 |         data_folder['train'],
 44 |         batch_size=batch_size,
 45 |         shuffle=False,
 46 |         num_workers=4),
 47 |     'val':
 48 |     DataLoader(
 49 |         data_folder['val'],
 50 |         batch_size=batch_size,
 51 |         shuffle=False,
 52 |         num_workers=4)
 53 | }
 54 | 
 55 | # get train data size and validation data size
 56 | data_size = {
 57 |     'train': len(dataloader['train'].dataset),
 58 |     'val': len(dataloader['val'].dataset)
 59 | }
 60 | 
 61 | # get numbers of classes
 62 | img_classes = len(dataloader['train'].dataset.classes)
 63 | 
 64 | # test if using GPU
 65 | use_gpu = torch.cuda.is_available()
 66 | 
 67 | 
 68 | def CreateFeature(model, phase, outputPath='.'):
 69 |     """
 70 |     Create h5py dataset for feature extraction.
 71 | 
 72 |     ARGS:
 73 |         outputPath    : h5py output path
 74 |         model         : used model
 75 |         labelList     : list of corresponding groundtruth texts
 76 |     """
 77 |     featurenet = feature_net(model)
 78 |     if use_gpu:
 79 |         featurenet.cuda()
 80 |     feature_map = torch.FloatTensor()
 81 |     label_map = torch.LongTensor()
 82 |     for data in tqdm(dataloader[phase]):
 83 |         img, label = data
 84 |         if use_gpu:
 85 |             img = Variable(img, volatile=True).cuda()
 86 |         else:
 87 |             img = Variable(img, volatile=True)
 88 |         out = featurenet(img)
 89 |         feature_map = torch.cat((feature_map, out.cpu().data), 0)
 90 |         label_map = torch.cat((label_map, label), 0)
 91 |     feature_map = feature_map.numpy()
 92 |     label_map = label_map.numpy()
 93 |     file_name = '_feature_{}.hd5f'.format(model)
 94 |     h5_path = os.path.join(outputPath, phase) + file_name
 95 |     with h5py.File(h5_path, 'w') as h:
 96 |         h.create_dataset('data', data=feature_map)
 97 |         h.create_dataset('label', data=label_map)
 98 | 
 99 | 
100 | CreateFeature(opt.model, opt.phase)
101 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/kaggle_dog_vs_cat/model/feature_train.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'SherlockLiao'
  2 | 
  3 | import argparse
  4 | import time
  5 | import os
  6 | 
  7 | import torch
  8 | from torch import nn
  9 | from torch.autograd import Variable
 10 | from torch import optim
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | from dataset import h5Dataset
 14 | from net import classifier
 15 | 
 16 | parse = argparse.ArgumentParser()
 17 | parse.add_argument(
 18 |     '--model',
 19 |     nargs='+',
 20 |     help='inceptionv3, vgg, resnet152',
 21 |     default=['vgg', 'inceptionv3', 'resnet152'])
 22 | parse.add_argument('--batch_size', type=int, default=64)
 23 | parse.add_argument('--epoch', type=int, default=20)
 24 | parse.add_argument('--n_classes', default=2, type=int)
 25 | parse.add_argument('--num_workers', type=int, default=8)
 26 | opt = parse.parse_args()
 27 | print(opt)
 28 | 
 29 | root = '/media/sherlock/Files/kaggle_dog_vs_cat/'
 30 | train_list = ['train_feature_{}.hd5f'.format(i) for i in opt.model]
 31 | val_list = ['val_feature_{}.hd5f'.format(i) for i in opt.model]
 32 | 
 33 | dataset = {'train': h5Dataset(train_list), 'val': h5Dataset(val_list)}
 34 | 
 35 | datasize = {
 36 |     'train': dataset['train'].dataset.size(0),
 37 |     'val': dataset['val'].dataset.size(0)
 38 | }
 39 | 
 40 | batch_size = opt.batch_size
 41 | epoches = opt.epoch
 42 | 
 43 | dataloader = {
 44 |     'train':
 45 |     DataLoader(
 46 |         dataset['train'],
 47 |         batch_size=batch_size,
 48 |         shuffle=True,
 49 |         num_workers=opt.num_workers),
 50 |     'val':
 51 |     DataLoader(
 52 |         dataset['val'],
 53 |         batch_size=batch_size,
 54 |         shuffle=False,
 55 |         num_workers=opt.num_workers)
 56 | }
 57 | 
 58 | dimension = dataset['train'].dataset.size(1)
 59 | 
 60 | mynet = classifier(dimension, opt.n_classes)
 61 | mynet.cuda()
 62 | 
 63 | criterion = nn.CrossEntropyLoss()
 64 | optimizer = optim.SGD(mynet.parameters(), lr=1e-3)
 65 | # train
 66 | for epoch in range(epoches):
 67 |     print('{}'.format(epoch + 1))
 68 |     print('*' * 10)
 69 |     print('Train')
 70 |     mynet.train()
 71 |     since = time.time()
 72 | 
 73 |     running_loss = 0.0
 74 |     running_acc = 0.0
 75 |     for i, data in enumerate(dataloader['train'], 1):
 76 |         feature, label = data
 77 |         feature = Variable(feature).cuda()
 78 |         label = Variable(label).cuda()
 79 | 
 80 |         # forward
 81 |         out = mynet(feature)
 82 |         loss = criterion(out, label)
 83 |         # backward
 84 |         optimizer.zero_grad()
 85 |         loss.backward()
 86 |         optimizer.step()
 87 | 
 88 |         running_loss += loss.data[0] * label.size(0)
 89 |         _, pred = torch.max(out, 1)
 90 |         num_correct = torch.sum(pred == label)
 91 |         running_acc += num_correct.data[0]
 92 |         if i % 50 == 0:
 93 |             print('Loss: {:.6f}, Acc: {:.6f}'.format(running_loss / (
 94 |                 i * batch_size), running_acc / (i * batch_size)))
 95 | 
 96 |     running_loss /= datasize['train']
 97 |     running_acc /= datasize['train']
 98 |     eplise_time = time.time() - since
 99 |     print('Loss: {:.6f}, Acc: {:.6f}, Time: {:.0f}s'.format(
100 |         running_loss, running_acc, eplise_time))
101 |     print('Validation')
102 |     mynet.eval()
103 |     num_correct = 0.0
104 |     eval_loss = 0.0
105 |     for data in dataloader['val']:
106 |         feature, label = data
107 |         feature = Variable(feature, volatile=True).cuda()
108 |         label = Variable(label, volatile=True).cuda()
109 |         # forward
110 |         out = mynet(feature)
111 |         loss = criterion(out, label)
112 | 
113 |         _, pred = torch.max(out, 1)
114 |         correct = torch.sum(pred == label)
115 |         num_correct += correct.data[0]
116 |         eval_loss += loss.data[0] * label.size(0)
117 | 
118 |     print('Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / datasize['val'],
119 |                                              num_correct / datasize['val']))
120 | print('Finish Training!')
121 | 
122 | save_path = os.path.join(root, 'model_save')
123 | if not os.path.exists(save_path):
124 |     os.mkdir(save_path)
125 | 
126 | torch.save(mynet.state_dict(), save_path + '/feature_model.pth')
127 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/kaggle_dog_vs_cat/model/fix_train.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'SherlockLiao'
  2 | 
  3 | import os
  4 | import time
  5 | 
  6 | import torch
  7 | from torchvision import models, transforms
  8 | from torch import optim, nn
  9 | from torch.autograd import Variable
 10 | from torchvision.datasets import ImageFolder
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | # define image transforms to do data augumentation
 14 | data_transforms = {
 15 |     'train':
 16 |     transforms.Compose([
 17 |         transforms.RandomSizedCrop(299),
 18 |         transforms.RandomHorizontalFlip(),
 19 |         transforms.ToTensor(),
 20 |         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 21 |     ]),
 22 |     'val':
 23 |     transforms.Compose([
 24 |         transforms.Scale(320),
 25 |         transforms.CenterCrop(299),
 26 |         transforms.ToTensor(),
 27 |         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
 28 |     ])
 29 | }
 30 | 
 31 | # define data folder using ImageFolder to get images and classes from folder
 32 | root = '/media/sherlock/Files/kaggle_dog_vs_cat/'
 33 | data_folder = {
 34 |     'train':
 35 |     ImageFolder(
 36 |         os.path.join(root, 'data/train'), transform=data_transforms['train']),
 37 |     'val':
 38 |     ImageFolder(
 39 |         os.path.join(root, 'data/val'), transform=data_transforms['val'])
 40 | }
 41 | 
 42 | # define dataloader to load images
 43 | batch_size = 32
 44 | dataloader = {
 45 |     'train':
 46 |     DataLoader(
 47 |         data_folder['train'],
 48 |         batch_size=batch_size,
 49 |         shuffle=True,
 50 |         num_workers=4),
 51 |     'val':
 52 |     DataLoader(data_folder['val'], batch_size=batch_size, num_workers=4)
 53 | }
 54 | 
 55 | # get train data size and validation data size
 56 | data_size = {
 57 |     'train': len(dataloader['train'].dataset),
 58 |     'val': len(dataloader['val'].dataset)
 59 | }
 60 | 
 61 | # get numbers of classes
 62 | img_classes = len(dataloader['train'].dataset.classes)
 63 | 
 64 | # test if using GPU
 65 | use_gpu = torch.cuda.is_available()
 66 | fix_param = True
 67 | # define model
 68 | transfer_model = models.resnet18(pretrained=True)
 69 | if fix_param:
 70 |     for param in transfer_model.parameters():
 71 |         param.requires_grad = False
 72 | dim_in = transfer_model.fc.in_features
 73 | transfer_model.fc = nn.Linear(dim_in, 2)
 74 | if use_gpu:
 75 |     transfer_model = transfer_model.cuda()
 76 | 
 77 | # define optimize function and loss function
 78 | if fix_param:
 79 |     optimizer = optim.Adam(transfer_model.fc.parameters(), lr=1e-3)
 80 | else:
 81 |     optimizer = optim.Adam(transfer_model.parameters(), lr=1e-3)
 82 | criterion = nn.CrossEntropyLoss()
 83 | 
 84 | # train
 85 | num_epoch = 10
 86 | 
 87 | for epoch in range(num_epoch):
 88 |     print('{}/{}'.format(epoch + 1, num_epoch))
 89 |     print('*' * 10)
 90 |     print('Train')
 91 |     transfer_model.train()
 92 |     running_loss = 0.0
 93 |     running_acc = 0.0
 94 |     since = time.time()
 95 |     for i, data in enumerate(dataloader['train'], 1):
 96 |         img, label = data
 97 |         if use_gpu:
 98 |             img = img.cuda()
 99 |             label = label.cuda()
100 |         img = Variable(img)
101 |         label = Variable(label)
102 | 
103 |         # forward
104 |         out = transfer_model(img)
105 |         loss = criterion(out, label)
106 |         _, pred = torch.max(out, 1)
107 | 
108 |         # backward
109 |         optimizer.zero_grad()
110 |         loss.backward()
111 |         optimizer.step()
112 | 
113 |         running_loss += loss.data[0] * label.size(0)
114 |         num_correct = torch.sum(pred == label)
115 |         running_acc += num_correct.data[0]
116 |         if i % 100 == 0:
117 |             print('Loss: {:.6f}, Acc: {:.4f}'.format(running_loss / (
118 |                 i * batch_size), running_acc / (i * batch_size)))
119 |     running_loss /= data_size['train']
120 |     running_acc /= data_size['train']
121 |     elips_time = time.time() - since
122 |     print('Loss: {:.6f}, Acc: {:.4f}, Time: {:.0f}s'.format(
123 |         running_loss, running_acc, elips_time))
124 |     print('Validation')
125 |     transfer_model.eval()
126 |     num_correct = 0.0
127 |     total = 0.0
128 |     eval_loss = 0.0
129 |     for data in dataloader['val']:
130 |         img, label = data
131 |         img = Variable(img, volatile=True).cuda()
132 |         label = Variable(label, volatile=True).cuda()
133 |         out = transfer_model(img)
134 |         _, pred = torch.max(out.data, 1)
135 |         loss = criterion(out, label)
136 |         eval_loss += loss.data[0] * label.size(0)
137 |         num_correct += (pred.cpu() == label.data.cpu()).sum()
138 |         total += label.size(0)
139 |     print('Loss: {:.6f} Acc: {:.4f}'.format(eval_loss / total, num_correct /
140 |                                             total))
141 |     print()
142 | print('Finish Training!')
143 | print()
144 | save_path = os.path.join(root, 'model_save')
145 | if not os.path.exists(save_path):
146 |     os.mkdir(save_path)
147 | torch.save(transfer_model.state_dict(), save_path + '/resnet18.pth')
148 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/kaggle_dog_vs_cat/model/net.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'SherlockLiao'
 2 | 
 3 | import torch
 4 | from torchvision import models
 5 | from torch import nn
 6 | 
 7 | 
 8 | class feature_net(nn.Module):
 9 |     def __init__(self, model):
10 |         super(feature_net, self).__init__()
11 | 
12 |         if model == 'vgg':
13 |             vgg = models.vgg19(pretrained=True)
14 |             self.feature = nn.Sequential(*list(vgg.children())[:-1])
15 |             self.feature.add_module('global average', nn.AvgPool2d(9))
16 |         elif model == 'inceptionv3':
17 |             inception = models.inception_v3(pretrained=True)
18 |             self.feature = nn.Sequential(*list(inception.children())[:-1])
19 |             self.feature._modules.pop('13')
20 |             self.feature.add_module('global average', nn.AvgPool2d(35))
21 |         elif model == 'resnet152':
22 |             resnet = models.resnet152(pretrained=True)
23 |             self.feature = nn.Sequential(*list(resnet.children())[:-1])
24 | 
25 |     def forward(self, x):
26 |         """
27 |         model includes vgg19, inceptionv3, resnet152
28 |         """
29 |         x = self.feature(x)
30 |         x = x.view(x.size(0), -1)
31 |         return x
32 | 
33 | 
34 | class classifier(nn.Module):
35 |     def __init__(self, dim, n_classes):
36 |         super(classifier, self).__init__()
37 |         self.fc = nn.Sequential(
38 |             nn.Linear(dim, 1000),
39 |             nn.ReLU(True),
40 |             nn.Dropout(0.5),
41 |             nn.Linear(1000, n_classes)
42 |         )
43 | 
44 |     def forward(self, x):
45 |         x = self.fc(x)
46 |         return x
47 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/README.md:
--------------------------------------------------------------------------------
1 | # neural-transfer
2 | This is my implement of neural-transfer according to http://pytorch.org/tutorials/advanced/neural_style_tutorial.html#sphx-glr-advanced-neural-style-tutorial-py
3 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/build_model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision.models as models
 4 | 
 5 | import loss
 6 | 
 7 | vgg = models.vgg19(pretrained=True).features
 8 | if torch.cuda.is_available():
 9 |     vgg = vgg.cuda()
10 | 
11 | content_layers_default = ['conv_4']
12 | style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
13 | 
14 | 
15 | def get_style_model_and_loss(style_img,
16 |                              content_img,
17 |                              cnn=vgg,
18 |                              style_weight=1000,
19 |                              content_weight=1,
20 |                              content_layers=content_layers_default,
21 |                              style_layers=style_layers_default):
22 | 
23 |     content_loss_list = []
24 |     style_loss_list = []
25 | 
26 |     model = nn.Sequential()
27 |     if torch.cuda.is_available():
28 |         model = model.cuda()
29 |     gram = loss.Gram()
30 |     if torch.cuda.is_available():
31 |         gram = gram.cuda()
32 | 
33 |     i = 1
34 |     for layer in cnn:
35 |         if isinstance(layer, nn.Conv2d):
36 |             name = 'conv_' + str(i)
37 |             model.add_module(name, layer)
38 | 
39 |             if name in content_layers_default:
40 |                 target = model(content_img)
41 |                 content_loss = loss.Content_Loss(target, content_weight)
42 |                 model.add_module('content_loss_' + str(i), content_loss)
43 |                 content_loss_list.append(content_loss)
44 | 
45 |             if name in style_layers_default:
46 |                 target = model(style_img)
47 |                 target = gram(target)
48 |                 style_loss = loss.Style_Loss(target, style_weight)
49 |                 model.add_module('style_loss_' + str(i), style_loss)
50 |                 style_loss_list.append(style_loss)
51 | 
52 |             i += 1
53 |         if isinstance(layer, nn.MaxPool2d):
54 |             name = 'pool_' + str(i)
55 |             model.add_module(name, layer)
56 | 
57 |         if isinstance(layer, nn.ReLU):
58 |             name = 'relu' + str(i)
59 |             model.add_module(name, layer)
60 | 
61 |     return model, style_loss_list, content_loss_list
62 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/load_img.py:
--------------------------------------------------------------------------------
 1 | import PIL.Image as Image
 2 | import torchvision.transforms as transforms
 3 | 
 4 | img_size = 512
 5 | 
 6 | 
 7 | def load_img(img_path):
 8 |     img = Image.open(img_path).convert('RGB')
 9 |     img = img.resize((img_size, img_size))
10 |     img = transforms.ToTensor()(img)
11 |     img = img.unsqueeze(0)
12 |     return img
13 | 
14 | 
15 | def show_img(img):
16 |     img = img.squeeze(0)
17 |     img = transforms.ToPILImage()(img)
18 |     img.show()
19 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | class Content_Loss(nn.Module):
 6 |     def __init__(self, target, weight):
 7 |         super(Content_Loss, self).__init__()
 8 |         self.weight = weight
 9 |         self.target = target.detach() * self.weight
10 |         # 必须要用detach来分离出target，这时候target不再是一个Variable，这是为了动态计算梯度，否则forward会出错，不能向前传播
11 |         self.criterion = nn.MSELoss()
12 | 
13 |     def forward(self, input):
14 |         self.loss = self.criterion(input * self.weight, self.target)
15 |         out = input.clone()
16 |         return out
17 | 
18 |     def backward(self, retain_variabels=True):
19 |         self.loss.backward(retain_variables=retain_variabels)
20 |         return self.loss
21 | 
22 | 
23 | class Gram(nn.Module):
24 |     def __init__(self):
25 |         super(Gram, self).__init__()
26 | 
27 |     def forward(self, input):
28 |         a, b, c, d = input.size()
29 |         feature = input.view(a * b, c * d)
30 |         gram = torch.mm(feature, feature.t())
31 |         gram /= (a * b * c * d)
32 |         return gram
33 | 
34 | 
35 | class Style_Loss(nn.Module):
36 |     def __init__(self, target, weight):
37 |         super(Style_Loss, self).__init__()
38 |         self.weight = weight
39 |         self.target = target.detach() * self.weight
40 |         self.gram = Gram()
41 |         self.criterion = nn.MSELoss()
42 | 
43 |     def forward(self, input):
44 |         G = self.gram(input) * self.weight
45 |         self.loss = self.criterion(G, self.target)
46 |         out = input.clone()
47 |         return out
48 | 
49 |     def backward(self, retain_variabels=True):
50 |         self.loss.backward(retain_variables=retain_variabels)
51 |         return self.loss
52 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/picture/content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/neural-transfer/picture/content.png


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/picture/saved_picture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/neural-transfer/picture/saved_picture.png


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/picture/style.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/L1aoXingyu/code-of-learn-deep-learning-with-pytorch/ea24810dc26265e85e3eec34e1d091d21202d800/chapter9_Computer-Vision/neural-transfer/picture/style.png


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/neural-transfer/run_code.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.optim as optim
 3 | 
 4 | from build_model import get_style_model_and_loss
 5 | 
 6 | 
 7 | def get_input_param_optimier(input_img):
 8 |     """
 9 |     input_img is a Variable
10 |     """
11 |     input_param = nn.Parameter(input_img.data)
12 |     optimizer = optim.LBFGS([input_param])
13 |     return input_param, optimizer
14 | 
15 | 
16 | def run_style_transfer(content_img, style_img, input_img, num_epoches=300):
17 |     print('Building the style transfer model..')
18 |     model, style_loss_list, content_loss_list = get_style_model_and_loss(
19 |         style_img, content_img)
20 |     input_param, optimizer = get_input_param_optimier(input_img)
21 | 
22 |     print('Opimizing...')
23 |     epoch = [0]
24 |     while epoch[0] < num_epoches:
25 | 
26 |         def closure():
27 |             input_param.data.clamp_(0, 1)
28 | 
29 |             model(input_param)
30 |             style_score = 0
31 |             content_score = 0
32 | 
33 |             optimizer.zero_grad()
34 |             for sl in style_loss_list:
35 |                 style_score += sl.backward()
36 |             for cl in content_loss_list:
37 |                 content_score += cl.backward()
38 | 
39 |             epoch[0] += 1
40 |             if epoch[0] % 50 == 0:
41 |                 print('run {}'.format(epoch))
42 |                 print('Style Loss: {:.4f} Content Loss: {:.4f}'.format(
43 |                     style_score.data[0], content_score.data[0]))
44 |                 print()
45 | 
46 |             return style_score + content_score
47 | 
48 |         optimizer.step(closure)
49 | 
50 |         input_param.data.clamp_(0, 1)
51 | 
52 |     return input_param.data
53 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/README.md:
--------------------------------------------------------------------------------
 1 | ## 语意分割教程
 2 | 
 3 | ### Requirements
 4 | 
 5 | [PyTorch 0.3](http://pytorch.org/)
 6 | 
 7 | [MxTorch](https://github.com/SherlockLiao/mxtorch)
 8 | 
 9 | [tensorboardX](https://github.com/lanpa/tensorboard-pytorch)
10 | 
11 | 按照 pytorch 官网安装 pytorch，将 mxtorch 下载下来，放到根目录，安装 tensorboardX 实现 tensorboard 可视化
12 | 
13 | ```bash
14 | \segmentation
15 | 	\mxtorch
16 | 	\data
17 | 	\models
18 | 	\dataset
19 | 	\checkpoints
20 | 	config.py
21 | 	main.py
22 | ```
23 | 
24 | 
25 | 
26 | ### 下载数据
27 | 
28 | 打开终端，运行 bash 脚本来获取数据
29 | 
30 | ```bash
31 | bash get_data.sh
32 | ```
33 | 
34 | 
35 | 
36 | ### 训练模型
37 | 
38 | 所有的配置文件都放在 config.py 里面，通过下面的代码来训练模型
39 | 
40 | ```bash
41 | python main.py train
42 | ```
43 | 
44 | 也可以在终端修改配置，比如改变 epochs 和 batch_size
45 | 
46 | ```bash
47 | python main.py train \ 
48 | 	--max_epochs=100 \
49 | 	--batch_size=16
50 | ```
51 | 
52 | 
53 | 
54 | ### 训练效果
55 | 
56 | #### 准确率，iou 和 loss
57 | 
58 | ![](https://ws3.sinaimg.cn/large/006tNc79gy1fojg2ye52uj30td07sgm6.jpg)
59 | 
60 | #### 分割效果
61 | 
62 | ![](https://ws1.sinaimg.cn/large/006tNc79gy1fojg42xvvaj30us0haq4o.jpg)
63 | 
64 | 
65 | 
66 | ![](https://ws3.sinaimg.cn/large/006tNc79gy1fojiid8vpbj30hk0fvq3l.jpg)


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/config.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author: xyliao
 4 | @contact: xyliao1993@qq.com
 5 | """
 6 | import warnings
 7 | from pprint import pprint
 8 | 
 9 | 
10 | class DefaultConfig(object):
11 |     model = 'FcnResNet'
12 | 
13 |     # Dataset.
14 |     voc_root = './dataset/VOCdevkit/VOC2012/'
15 |     crop_size = (320, 480)
16 |     num_classes = 21
17 | 
18 |     # Store result and save models.
19 |     result_file = 'result.txt'
20 |     save_file = './checkpoints/'
21 |     save_freq = 20  # save model every N epochs
22 |     save_best = True  # If save best test metric model.
23 | 
24 |     # Visualization parameters.
25 |     vis_dir = './vis/'
26 |     plot_freq = 30  # plot in tensorboard every N iterations
27 | 
28 |     # Model hyperparameters.
29 |     use_gpu = True  # use GPU or not
30 |     ctx = 0  # running on which cuda device
31 |     batch_size = 32  # batch size
32 |     num_workers = 4  # how many workers for loading data
33 |     max_epoch = 80
34 |     lr = 1e-2  # initial learning rate
35 |     lr_decay = 0.1
36 |     lr_decay_freq = 50
37 |     weight_decay = 1e-4
38 | 
39 |     def _parse(self, kwargs):
40 |         for k, v in kwargs.items():
41 |             if not hasattr(self, k):
42 |                 warnings.warn("Warning: opt has not attribute %s" % k)
43 |             setattr(self, k, v)
44 | 
45 |         print('=========user config==========')
46 |         pprint(self._state_dict())
47 |         print('============end===============')
48 | 
49 |     def _state_dict(self):
50 |         return {
51 |             k: getattr(self, k)
52 |             for k, _ in DefaultConfig.__dict__.items() if not k.startswith('_')
53 |         }
54 | 
55 | 
56 | opt = DefaultConfig()
57 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/data/__init__.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | """
3 | @author: xyliao
4 | @contact: xyliao1993@qq.com
5 | """
6 | from .voc import VocSegDataset, img_transforms, COLORMAP, CLASSES, inverse_normalization
7 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/data/voc.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author: xyliao
  4 | @contact: xyliao1993@qq.com
  5 | """
  6 | import os
  7 | 
  8 | import numpy as np
  9 | import torch
 10 | from PIL import Image
 11 | from mxtorch import transforms as tfs
 12 | 
 13 | 
 14 | def read_images(root, train):
 15 |     txt_fname = os.path.join(root, 'ImageSets/Segmentation/') + ('train.txt' if train else 'val.txt')
 16 |     with open(txt_fname, 'r') as f:
 17 |         images = f.read().split()
 18 |     data = [os.path.join(root, 'JPEGImages', i + '.jpg') for i in images]
 19 |     label = [os.path.join(root, 'SegmentationClass', i + '.png') for i in images]
 20 |     return data, label
 21 | 
 22 | 
 23 | def random_crop(data, label, crop_size):
 24 |     height, width = crop_size
 25 |     data, rect = tfs.RandomCrop((height, width))(data)
 26 |     label = tfs.FixedCrop(*rect)(label)
 27 |     return data, label
 28 | 
 29 | 
 30 | def image2label(img):
 31 |     cm2lbl = np.zeros(256 ** 3)
 32 |     for i, cm in enumerate(COLORMAP):
 33 |         cm2lbl[(cm[0] * 256 + cm[1]) * 256 + cm[2]] = i
 34 | 
 35 |     data = np.array(img, dtype=np.int32)
 36 |     idx = (data[:, :, 0] * 256 + data[:, :, 1] * 256 + data[:, :, 2])
 37 |     return np.array(cm2lbl[idx], dtype=np.int64)
 38 | 
 39 | 
 40 | def img_transforms(img, label, crop_size):
 41 |     img, label = random_crop(img, label, crop_size)
 42 |     img_tfs = tfs.Compose([
 43 |         tfs.ToTensor(),
 44 |         tfs.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 45 |     ])
 46 | 
 47 |     img = img_tfs(img)
 48 |     label = image2label(label)
 49 |     label = torch.from_numpy(label)
 50 |     return img, label
 51 | 
 52 | 
 53 | def inverse_normalization(img):
 54 |     """Convert normalized image to origin image.
 55 | 
 56 |     :param img:(~torch.FloatTensor) normalized image, (C, H, W)
 57 |     :return:
 58 |         Origin image.
 59 |     """
 60 |     img = img * torch.FloatTensor([0.229, 0.224, 0.225])[:, None, None] \
 61 |           + torch.FloatTensor([0.485, 0.456, 0.406])[:, None, None]
 62 |     origin_img = torch.clamp(img, min=0, max=1) * 255
 63 |     origin_img = origin_img.permute(1, 2, 0).numpy()
 64 |     return origin_img.astype(np.uint8)
 65 | 
 66 | 
 67 | class VocSegDataset(object):
 68 |     def __init__(self, voc_root, train, crop_size, transforms):
 69 |         self.crop_size = crop_size
 70 |         self.transforms = transforms
 71 |         data_list, label_list = read_images(voc_root, train)
 72 |         self.data_list = self._filter(data_list)
 73 |         self.label_list = self._filter(label_list)
 74 | 
 75 |     def _filter(self, images):
 76 |         return [img for img in images if (Image.open(img).size[1] >= self.crop_size[0] and
 77 |                                           Image.open(img).size[0] >= self.crop_size[1])]
 78 | 
 79 |     def __getitem__(self, item):
 80 |         img = self.data_list[item]
 81 |         label = self.label_list[item]
 82 |         img = Image.open(img)
 83 |         label = Image.open(label).convert('RGB')
 84 |         img, label = self.transforms(img, label, self.crop_size)
 85 |         return img, label
 86 | 
 87 |     def __len__(self):
 88 |         return len(self.data_list)
 89 | 
 90 | 
 91 | CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
 92 |            'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
 93 |            'dog', 'horse', 'motorbike', 'person', 'potted plant',
 94 |            'sheep', 'sofa', 'train', 'tv/monitor']
 95 | 
 96 | # RGB color for each class.
 97 | COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128],
 98 |             [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], [192, 0, 0],
 99 |             [64, 128, 0], [192, 128, 0], [64, 0, 128], [192, 0, 128],
100 |             [64, 128, 128], [192, 128, 128], [0, 64, 0], [128, 64, 0],
101 |             [0, 192, 0], [128, 192, 0], [0, 64, 128]]
102 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/get_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
 4 | 
 5 | if [ ! -e ./dataset ]; then
 6 |     mkdir ./dataset
 7 | fi
 8 | 
 9 | tar -xf VOCtrainval_11-May-2012.tar -C ./dataset
10 | rm VOCtrainval_11-May-2012.tar
11 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/main.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author: xyliao
  4 | @contact: xyliao1993@qq.com
  5 | """
  6 | import warnings
  7 | from copy import deepcopy
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | import torch.nn.functional as F
 12 | from mxtorch import meter
 13 | from mxtorch.trainer import Trainer, ScheduledOptim
 14 | from mxtorch.vision.eval_tools import eval_semantic_segmentation
 15 | from torch.autograd import Variable
 16 | from torch.utils.data import DataLoader
 17 | from tqdm import tqdm
 18 | 
 19 | import models
 20 | from config import opt
 21 | from data import VocSegDataset, img_transforms, COLORMAP, inverse_normalization
 22 | 
 23 | warnings.filterwarnings('ignore')
 24 | 
 25 | cm = np.array(COLORMAP, dtype=np.uint8)
 26 | 
 27 | 
 28 | def get_data(is_train):
 29 |     voc_data = VocSegDataset(opt.voc_root, is_train, opt.crop_size,
 30 |                              img_transforms)
 31 |     return DataLoader(
 32 |         voc_data, opt.batch_size, True, num_workers=opt.num_workers)
 33 | 
 34 | 
 35 | def get_model(num_classes):
 36 |     model = getattr(models, opt.model)(num_classes)
 37 |     if opt.use_gpu:
 38 |         model.cuda()
 39 |     return model
 40 | 
 41 | 
 42 | def get_optimizer(model):
 43 |     optimizer = torch.optim.SGD(
 44 |         model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay)
 45 |     return ScheduledOptim(optimizer)
 46 | 
 47 | 
 48 | def get_loss(scores, labels):
 49 |     scores = F.log_softmax(scores, dim=1)
 50 |     return torch.nn.NLLLoss2d()(scores, labels)
 51 | 
 52 | 
 53 | all_metrcis = ['loss', 'acc', 'iou']
 54 | 
 55 | 
 56 | class FcnTrainer(Trainer):
 57 |     def __init__(self):
 58 |         model = get_model(opt.num_classes)
 59 |         criterion = get_loss
 60 |         optimizer = get_optimizer(model)
 61 | 
 62 |         super().__init__(model=model, criterion=criterion, optimizer=optimizer)
 63 | 
 64 |         self.config += ('Crop size: ' + str(opt.crop_size) + '\n')
 65 |         self.best_metric = 0
 66 |         for m in all_metrcis:
 67 |             self.metric_meter[m] = meter.AverageValueMeter()
 68 | 
 69 |     def train(self, kwargs):
 70 |         self.reset_meter()
 71 |         self.model.train()
 72 |         train_data = kwargs['train_data']
 73 |         for data in tqdm(train_data):
 74 |             imgs, labels = data
 75 |             if opt.use_gpu:
 76 |                 imgs = imgs.cuda()
 77 |                 labels = labels.cuda()
 78 |             imgs = Variable(imgs)
 79 |             labels = Variable(labels)
 80 | 
 81 |             # Forward.
 82 |             scores = self.model(imgs)
 83 |             loss = self.criterion(scores, labels)
 84 | 
 85 |             # Backward.
 86 |             self.optimizer.zero_grad()
 87 |             loss.backward()
 88 |             self.optimizer.step()
 89 | 
 90 |             # Update to metrics.
 91 |             pred_labels = scores.max(dim=1)[1].data.cpu().numpy()
 92 |             pred_labels = [i for i in pred_labels]
 93 | 
 94 |             true_labels = labels.data.cpu().numpy()
 95 |             true_labels = [i for i in true_labels]
 96 | 
 97 |             eval_metrics = eval_semantic_segmentation(pred_labels, true_labels)
 98 |             self.metric_meter['loss'].add(loss.data[0])
 99 |             self.metric_meter['acc'].add(eval_metrics['mean_class_accuracy'])
100 |             self.metric_meter['iou'].add(eval_metrics['miou'])
101 | 
102 |             if (self.n_iter + 1) % opt.plot_freq == 0:
103 |                 # Plot metrics curve in tensorboard.
104 |                 self.writer.add_scalars(
105 |                     'loss', {'train': self.metric_meter['loss'].value()[0]},
106 |                     self.n_plot)
107 |                 self.writer.add_scalars(
108 |                     'acc', {'train': self.metric_meter['acc'].value()[0]},
109 |                     self.n_plot)
110 |                 self.writer.add_scalars(
111 |                     'iou', {'train': self.metric_meter['iou'].value()[0]},
112 |                     self.n_plot)
113 | 
114 |                 # Show segmentation images.
115 |                 # Get prediction segmentation and ground truth segmentation.
116 |                 origin_image = inverse_normalization(imgs[0].cpu().data)
117 |                 pred_seg = cm[pred_labels[0]]
118 |                 gt_seg = cm[true_labels[0]]
119 | 
120 |                 self.writer.add_image('train ori_img', origin_image,
121 |                                       self.n_plot)
122 |                 self.writer.add_image('train gt', gt_seg, self.n_plot)
123 |                 self.writer.add_image('train pred', pred_seg, self.n_plot)
124 |                 self.n_plot += 1
125 | 
126 |             self.n_iter += 1
127 | 
128 |         self.metric_log['Train Loss'] = self.metric_meter['loss'].value()[0]
129 |         self.metric_log['Train Mean Class Accuracy'] = self.metric_meter[
130 |             'acc'].value()[0]
131 |         self.metric_log['Train Mean IoU'] = self.metric_meter['iou'].value()[0]
132 | 
133 |     def test(self, kwargs):
134 |         self.reset_meter()
135 |         self.model.eval()
136 |         test_data = kwargs['test_data']
137 |         for data in tqdm(test_data):
138 |             imgs, labels = data
139 |             if opt.use_gpu:
140 |                 imgs = imgs.cuda()
141 |                 labels = labels.cuda()
142 |             imgs = Variable(imgs, volatile=True)
143 |             labels = Variable(labels, volatile=True)
144 | 
145 |             # Forward.
146 |             scores = self.model(imgs)
147 |             loss = self.criterion(scores, labels)
148 | 
149 |             # Update to metrics.
150 |             pred_labels = scores.max(dim=1)[1].data.cpu().numpy()
151 |             pred_labels = [i for i in pred_labels]
152 | 
153 |             true_labels = labels.data.cpu().numpy()
154 |             true_labels = [i for i in true_labels]
155 | 
156 |             eval_metrics = eval_semantic_segmentation(pred_labels, true_labels)
157 |             self.metric_meter['loss'].add(loss.data[0])
158 |             self.metric_meter['acc'].add(eval_metrics['mean_class_accuracy'])
159 |             self.metric_meter['iou'].add(eval_metrics['miou'])
160 | 
161 |         # Plot metrics curve in tensorboard.
162 |         self.writer.add_scalars('loss',
163 |                                 {'test': self.metric_meter['loss'].value()[0]},
164 |                                 self.n_plot)
165 |         self.writer.add_scalars(
166 |             'acc', {'test': self.metric_meter['acc'].value()[0]}, self.n_plot)
167 |         self.writer.add_scalars(
168 |             'iou', {'test': self.metric_meter['iou'].value()[0]}, self.n_plot)
169 | 
170 |         origin_img = inverse_normalization(imgs[0].cpu().data)
171 |         pred_seg = cm[pred_labels[0]]
172 |         gt_seg = cm[true_labels[0]]
173 |         self.writer.add_image('test ori_img', origin_img, self.n_plot)
174 |         self.writer.add_image('test gt', gt_seg, self.n_plot)
175 |         self.writer.add_image('test pred', pred_seg, self.n_plot)
176 | 
177 |         self.n_plot += 1
178 | 
179 |         self.metric_log['Test Loss'] = self.metric_meter['loss'].value()[0]
180 |         self.metric_log['Test Mean Class Accuracy'] = self.metric_meter[
181 |             'acc'].value()[0]
182 |         self.metric_log['Test Mean IoU'] = self.metric_meter['iou'].value()[0]
183 | 
184 |     def get_best_model(self):
185 |         if self.metric_log['Test Mean IoU'] > self.best_metric:
186 |             self.best_model = deepcopy(self.model.state_dict())
187 |             self.best_metric = self.metric_log['Test Mean IoU']
188 | 
189 | 
190 | def train(**kwargs):
191 |     opt._parse(kwargs)
192 | 
193 |     # Set default cuda device.
194 |     torch.cuda.set_device(opt.ctx)
195 | 
196 |     fcn_trainer = FcnTrainer()
197 |     train_data = get_data(is_train=True)
198 |     test_data = get_data(is_train=False)
199 |     fcn_trainer.fit(
200 |         train_data=train_data, test_data=test_data, epochs=opt.max_epoch)
201 | 
202 | 
203 | if __name__ == '__main__':
204 |     import fire
205 | 
206 |     fire.Fire()
207 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/models/__init__.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | """
3 | @author: xyliao
4 | @contact: xyliao1993@qq.com
5 | """
6 | 
7 | from .fcn import FcnResNet
8 | 


--------------------------------------------------------------------------------
/chapter9_Computer-Vision/segmentation/models/fcn.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | """
 3 | @author: xyliao
 4 | @contact: xyliao1993@qq.com
 5 | 
 6 | This file contains three FCN models according to paper "Fully Convolutional Networks for Semantic Segmentation"
 7 | """
 8 | 
 9 | import numpy as np
10 | import torch
11 | from mxtorch.vision import model_zoo
12 | from torch import nn
13 | 
14 | 
15 | def bilinear_kernel(in_channels, out_channels, kernel_size):
16 |     """Define a bilinear kernel according to in channels and out channels.
17 | 
18 |     Returns:
19 |         return a bilinear filter tensor
20 |     """
21 |     factor = (kernel_size + 1) // 2
22 |     if kernel_size % 2 == 1:
23 |         center = factor - 1
24 |     else:
25 |         center = factor - 0.5
26 |     og = np.ogrid[:kernel_size, :kernel_size]
27 |     bilinear_filter = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
28 |     weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32)
29 |     weight[range(in_channels), range(out_channels), :, :] = bilinear_filter
30 |     return torch.from_numpy(weight)
31 | 
32 | 
33 | pretrained_net = model_zoo.resnet34(pretrained=True)
34 | 
35 | 
36 | class FcnResNet(nn.Module):
37 |     def __init__(self, num_classes):
38 |         super().__init__()
39 | 
40 |         self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4])
41 |         self.stage2 = list(pretrained_net.children())[-4]
42 |         self.stage3 = list(pretrained_net.children())[-3]
43 | 
44 |         self.scores1 = nn.Conv2d(512, num_classes, 1)
45 |         self.scores2 = nn.Conv2d(256, num_classes, 1)
46 |         self.scores3 = nn.Conv2d(128, num_classes, 1)
47 | 
48 |         self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
49 |         self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16)
50 | 
51 |         self.upsample_4x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
52 |         self.upsample_4x.weight.data = bilinear_kernel(num_classes, num_classes, 4)
53 | 
54 |         self.upsample_2x = nn.ConvTranspose2d(num_classes, num_classes, 4, 2, 1, bias=False)
55 |         self.upsample_2x.weight.data = bilinear_kernel(num_classes, num_classes, 4)
56 | 
57 |     def forward(self, x):
58 |         x = self.stage1(x)
59 |         s1 = x
60 | 
61 |         x = self.stage2(x)
62 |         s2 = x
63 | 
64 |         x = self.stage3(x)
65 |         s3 = x
66 | 
67 |         s3 = self.scores1(s3)
68 |         s3 = self.upsample_2x(s3)
69 |         s2 = self.scores2(s2)
70 |         s2 = s2 + s3
71 | 
72 |         s1 = self.scores3(s1)
73 |         s2 = self.upsample_4x(s2)
74 |         s = s1 + s2
75 | 
76 |         s = self.upsample_8x(s)
77 |         return s
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | import torch
  4 | import torch.nn.functional as F
  5 | from torch import nn
  6 | from torch.autograd import Variable
  7 | 
  8 | 
  9 | def get_acc(output, label):
 10 |     total = output.shape[0]
 11 |     _, pred_label = output.max(1)
 12 |     num_correct = (pred_label == label).sum().data[0]
 13 |     return num_correct / total
 14 | 
 15 | 
 16 | def train(net, train_data, valid_data, num_epochs, optimizer, criterion):
 17 |     if torch.cuda.is_available():
 18 |         net = net.cuda()
 19 |     prev_time = datetime.now()
 20 |     for epoch in range(num_epochs):
 21 |         train_loss = 0
 22 |         train_acc = 0
 23 |         net = net.train()
 24 |         for im, label in train_data:
 25 |             if torch.cuda.is_available():
 26 |                 im = Variable(im.cuda())  # (bs, 3, h, w)
 27 |                 label = Variable(label.cuda())  # (bs, h, w)
 28 |             else:
 29 |                 im = Variable(im)
 30 |                 label = Variable(label)
 31 |             # forward
 32 |             output = net(im)
 33 |             loss = criterion(output, label)
 34 |             # backward
 35 |             optimizer.zero_grad()
 36 |             loss.backward()
 37 |             optimizer.step()
 38 | 
 39 |             train_loss += loss.data[0]
 40 |             train_acc += get_acc(output, label)
 41 | 
 42 |         cur_time = datetime.now()
 43 |         h, remainder = divmod((cur_time - prev_time).seconds, 3600)
 44 |         m, s = divmod(remainder, 60)
 45 |         time_str = "Time %02d:%02d:%02d" % (h, m, s)
 46 |         if valid_data is not None:
 47 |             valid_loss = 0
 48 |             valid_acc = 0
 49 |             net = net.eval()
 50 |             for im, label in valid_data:
 51 |                 if torch.cuda.is_available():
 52 |                     im = Variable(im.cuda(), volatile=True)
 53 |                     label = Variable(label.cuda(), volatile=True)
 54 |                 else:
 55 |                     im = Variable(im, volatile=True)
 56 |                     label = Variable(label, volatile=True)
 57 |                 output = net(im)
 58 |                 loss = criterion(output, label)
 59 |                 valid_loss += loss.data[0]
 60 |                 valid_acc += get_acc(output, label)
 61 |             epoch_str = (
 62 |                 "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
 63 |                 % (epoch, train_loss / len(train_data),
 64 |                    train_acc / len(train_data), valid_loss / len(valid_data),
 65 |                    valid_acc / len(valid_data)))
 66 |         else:
 67 |             epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
 68 |                          (epoch, train_loss / len(train_data),
 69 |                           train_acc / len(train_data)))
 70 |         prev_time = cur_time
 71 |         print(epoch_str + time_str)
 72 | 
 73 | 
 74 | def conv3x3(in_channel, out_channel, stride=1):
 75 |     return nn.Conv2d(
 76 |         in_channel, out_channel, 3, stride=stride, padding=1, bias=False)
 77 | 
 78 | 
 79 | class residual_block(nn.Module):
 80 |     def __init__(self, in_channel, out_channel, same_shape=True):
 81 |         super(residual_block, self).__init__()
 82 |         self.same_shape = same_shape
 83 |         stride = 1 if self.same_shape else 2
 84 | 
 85 |         self.conv1 = conv3x3(in_channel, out_channel, stride=stride)
 86 |         self.bn1 = nn.BatchNorm2d(out_channel)
 87 | 
 88 |         self.conv2 = conv3x3(out_channel, out_channel)
 89 |         self.bn2 = nn.BatchNorm2d(out_channel)
 90 |         if not self.same_shape:
 91 |             self.conv3 = nn.Conv2d(in_channel, out_channel, 1, stride=stride)
 92 | 
 93 |     def forward(self, x):
 94 |         out = self.conv1(x)
 95 |         out = F.relu(self.bn1(out), True)
 96 |         out = self.conv2(out)
 97 |         out = F.relu(self.bn2(out), True)
 98 | 
 99 |         if not self.same_shape:
100 |             x = self.conv3(x)
101 |         return F.relu(x + out, True)
102 | 
103 | 
104 | class resnet(nn.Module):
105 |     def __init__(self, in_channel, num_classes, verbose=False):
106 |         super(resnet, self).__init__()
107 |         self.verbose = verbose
108 | 
109 |         self.block1 = nn.Conv2d(in_channel, 64, 7, 2)
110 | 
111 |         self.block2 = nn.Sequential(
112 |             nn.MaxPool2d(3, 2), residual_block(64, 64), residual_block(64, 64))
113 | 
114 |         self.block3 = nn.Sequential(
115 |             residual_block(64, 128, False), residual_block(128, 128))
116 | 
117 |         self.block4 = nn.Sequential(
118 |             residual_block(128, 256, False), residual_block(256, 256))
119 | 
120 |         self.block5 = nn.Sequential(
121 |             residual_block(256, 512, False),
122 |             residual_block(512, 512), nn.AvgPool2d(3))
123 | 
124 |         self.classifier = nn.Linear(512, num_classes)
125 | 
126 |     def forward(self, x):
127 |         x = self.block1(x)
128 |         if self.verbose:
129 |             print('block 1 output: {}'.format(x.shape))
130 |         x = self.block2(x)
131 |         if self.verbose:
132 |             print('block 2 output: {}'.format(x.shape))
133 |         x = self.block3(x)
134 |         if self.verbose:
135 |             print('block 3 output: {}'.format(x.shape))
136 |         x = self.block4(x)
137 |         if self.verbose:
138 |             print('block 4 output: {}'.format(x.shape))
139 |         x = self.block5(x)
140 |         if self.verbose:
141 |             print('block 5 output: {}'.format(x.shape))
142 |         x = x.view(x.shape[0], -1)
143 |         x = self.classifier(x)
144 |         return x
145 | 


--------------------------------------------------------------------------------