├── README.md
├── chapter_attention-mechanisms
    ├── attention-scoring-functions.ipynb
    ├── bahdanau-attention.ipynb
    ├── multihead-attention.ipynb
    ├── nadaraya-waston.ipynb
    ├── rise.css
    ├── self-attention-and-positional-encoding.ipynb
    └── transformer.ipynb
├── chapter_computational-performance
    ├── multiple-gpus-concise.ipynb
    ├── multiple-gpus.ipynb
    └── rise.css
├── chapter_computer-vision
    ├── anchor.ipynb
    ├── bounding-box.ipynb
    ├── fcn.ipynb
    ├── fine-tuning.ipynb
    ├── image-augmentation.ipynb
    ├── kaggle-cifar10.ipynb
    ├── kaggle-dog.ipynb
    ├── multiscale-object-detection.ipynb
    ├── neural-style.ipynb
    ├── object-detection-dataset.ipynb
    ├── rise.css
    ├── semantic-segmentation-and-dataset.ipynb
    ├── ssd.ipynb
    └── transposed-conv.ipynb
├── chapter_convolutional-modern
    ├── alexnet.ipynb
    ├── batch-norm.ipynb
    ├── densenet.ipynb
    ├── googlenet.ipynb
    ├── nin.ipynb
    ├── resnet.ipynb
    ├── rise.css
    └── vgg.ipynb
├── chapter_convolutional-neural-networks
    ├── channels.ipynb
    ├── conv-layer.ipynb
    ├── lenet.ipynb
    ├── padding-and-strides.ipynb
    ├── pooling.ipynb
    └── rise.css
├── chapter_deep-learning-computation
    ├── custom-layer.ipynb
    ├── model-construction.ipynb
    ├── parameters.ipynb
    ├── read-write.ipynb
    ├── rise.css
    └── use-gpu.ipynb
├── chapter_linear-networks
    ├── image-classification-dataset.ipynb
    ├── linear-regression-concise.ipynb
    ├── linear-regression-scratch.ipynb
    ├── linear-regression.ipynb
    ├── rise.css
    ├── softmax-regression-concise.ipynb
    └── softmax-regression-scratch.ipynb
├── chapter_multilayer-perceptrons
    ├── dropout.ipynb
    ├── kaggle-house-price.ipynb
    ├── mlp-concise.ipynb
    ├── mlp-scratch.ipynb
    ├── mlp.ipynb
    ├── numerical-stability-and-init.ipynb
    ├── rise.css
    ├── underfit-overfit.ipynb
    └── weight-decay.ipynb
├── chapter_natural-language-processing-applications
    ├── natural-language-inference-and-dataset.ipynb
    ├── natural-language-inference-bert.ipynb
    └── rise.css
├── chapter_preliminaries
    ├── autograd.ipynb
    ├── calculus.ipynb
    ├── linear-algebra.ipynb
    ├── lookup-api.ipynb
    ├── ndarray.ipynb
    ├── pandas.ipynb
    └── rise.css
├── chapter_recurrent-modern
    ├── bi-rnn.ipynb
    ├── deep-rnn.ipynb
    ├── encoder-decoder.ipynb
    ├── gru.ipynb
    ├── lstm.ipynb
    ├── machine-translation-and-dataset.ipynb
    ├── rise.css
    └── seq2seq.ipynb
└── chapter_recurrent-neural-networks
    ├── language-models-and-dataset.ipynb
    ├── rise.css
    ├── rnn-concise.ipynb
    ├── rnn-scratch.ipynb
    ├── sequence.ipynb
    └── text-preprocessing.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | # d2l-ai/d2l-zh-pytorch-slides
 2 | 
 3 | This repo contains generated notebook slides. To open it locally, we suggest you to install the [rise](https://rise.readthedocs.io/en/stable/) extension.
 4 | 
 5 | You can also preview them in nbviwer:
 6 |  - [chapter_preliminaries/ndarray.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/ndarray.ipynb)
 7 |  - [chapter_preliminaries/pandas.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/pandas.ipynb)
 8 |  - [chapter_preliminaries/linear-algebra.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/linear-algebra.ipynb)
 9 |  - [chapter_preliminaries/calculus.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/calculus.ipynb)
10 |  - [chapter_preliminaries/autograd.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/autograd.ipynb)
11 |  - [chapter_preliminaries/lookup-api.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/lookup-api.ipynb)
12 |  - [chapter_linear-networks/linear-regression.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression.ipynb)
13 |  - [chapter_linear-networks/linear-regression-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression-scratch.ipynb)
14 |  - [chapter_linear-networks/linear-regression-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression-concise.ipynb)
15 |  - [chapter_linear-networks/image-classification-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/image-classification-dataset.ipynb)
16 |  - [chapter_linear-networks/softmax-regression-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/softmax-regression-scratch.ipynb)
17 |  - [chapter_linear-networks/softmax-regression-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/softmax-regression-concise.ipynb)
18 |  - [chapter_multilayer-perceptrons/mlp.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp.ipynb)
19 |  - [chapter_multilayer-perceptrons/mlp-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp-scratch.ipynb)
20 |  - [chapter_multilayer-perceptrons/mlp-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp-concise.ipynb)
21 |  - [chapter_multilayer-perceptrons/underfit-overfit.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/underfit-overfit.ipynb)
22 |  - [chapter_multilayer-perceptrons/weight-decay.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/weight-decay.ipynb)
23 |  - [chapter_multilayer-perceptrons/dropout.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/dropout.ipynb)
24 |  - [chapter_multilayer-perceptrons/numerical-stability-and-init.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/numerical-stability-and-init.ipynb)
25 |  - [chapter_multilayer-perceptrons/kaggle-house-price.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/kaggle-house-price.ipynb)
26 |  - [chapter_deep-learning-computation/model-construction.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/model-construction.ipynb)
27 |  - [chapter_deep-learning-computation/parameters.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/parameters.ipynb)
28 |  - [chapter_deep-learning-computation/custom-layer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/custom-layer.ipynb)
29 |  - [chapter_deep-learning-computation/read-write.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/read-write.ipynb)
30 |  - [chapter_deep-learning-computation/use-gpu.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/use-gpu.ipynb)
31 |  - [chapter_convolutional-neural-networks/conv-layer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/conv-layer.ipynb)
32 |  - [chapter_convolutional-neural-networks/padding-and-strides.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/padding-and-strides.ipynb)
33 |  - [chapter_convolutional-neural-networks/channels.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/channels.ipynb)
34 |  - [chapter_convolutional-neural-networks/pooling.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/pooling.ipynb)
35 |  - [chapter_convolutional-neural-networks/lenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/lenet.ipynb)
36 |  - [chapter_convolutional-modern/alexnet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/alexnet.ipynb)
37 |  - [chapter_convolutional-modern/vgg.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/vgg.ipynb)
38 |  - [chapter_convolutional-modern/nin.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/nin.ipynb)
39 |  - [chapter_convolutional-modern/googlenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/googlenet.ipynb)
40 |  - [chapter_convolutional-modern/batch-norm.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/batch-norm.ipynb)
41 |  - [chapter_convolutional-modern/resnet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/resnet.ipynb)
42 |  - [chapter_convolutional-modern/densenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/densenet.ipynb)
43 |  - [chapter_recurrent-neural-networks/sequence.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/sequence.ipynb)
44 |  - [chapter_recurrent-neural-networks/text-preprocessing.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/text-preprocessing.ipynb)
45 |  - [chapter_recurrent-neural-networks/language-models-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/language-models-and-dataset.ipynb)
46 |  - [chapter_recurrent-neural-networks/rnn-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/rnn-scratch.ipynb)
47 |  - [chapter_recurrent-neural-networks/rnn-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/rnn-concise.ipynb)
48 |  - [chapter_recurrent-modern/gru.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/gru.ipynb)
49 |  - [chapter_recurrent-modern/lstm.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/lstm.ipynb)
50 |  - [chapter_recurrent-modern/deep-rnn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/deep-rnn.ipynb)
51 |  - [chapter_recurrent-modern/bi-rnn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/bi-rnn.ipynb)
52 |  - [chapter_recurrent-modern/machine-translation-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/machine-translation-and-dataset.ipynb)
53 |  - [chapter_recurrent-modern/encoder-decoder.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/encoder-decoder.ipynb)
54 |  - [chapter_recurrent-modern/seq2seq.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/seq2seq.ipynb)
55 |  - [chapter_attention-mechanisms/nadaraya-waston.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/nadaraya-waston.ipynb)
56 |  - [chapter_attention-mechanisms/attention-scoring-functions.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/attention-scoring-functions.ipynb)
57 |  - [chapter_attention-mechanisms/bahdanau-attention.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/bahdanau-attention.ipynb)
58 |  - [chapter_attention-mechanisms/multihead-attention.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/multihead-attention.ipynb)
59 |  - [chapter_attention-mechanisms/self-attention-and-positional-encoding.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/self-attention-and-positional-encoding.ipynb)
60 |  - [chapter_attention-mechanisms/transformer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/transformer.ipynb)
61 |  - [chapter_computational-performance/multiple-gpus.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computational-performance/multiple-gpus.ipynb)
62 |  - [chapter_computational-performance/multiple-gpus-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computational-performance/multiple-gpus-concise.ipynb)
63 |  - [chapter_computer-vision/image-augmentation.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/image-augmentation.ipynb)
64 |  - [chapter_computer-vision/fine-tuning.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/fine-tuning.ipynb)
65 |  - [chapter_computer-vision/bounding-box.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/bounding-box.ipynb)
66 |  - [chapter_computer-vision/anchor.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/anchor.ipynb)
67 |  - [chapter_computer-vision/multiscale-object-detection.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/multiscale-object-detection.ipynb)
68 |  - [chapter_computer-vision/object-detection-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/object-detection-dataset.ipynb)
69 |  - [chapter_computer-vision/ssd.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/ssd.ipynb)
70 |  - [chapter_computer-vision/semantic-segmentation-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/semantic-segmentation-and-dataset.ipynb)
71 |  - [chapter_computer-vision/transposed-conv.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/transposed-conv.ipynb)
72 |  - [chapter_computer-vision/fcn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/fcn.ipynb)
73 |  - [chapter_computer-vision/neural-style.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/neural-style.ipynb)
74 |  - [chapter_computer-vision/kaggle-cifar10.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/kaggle-cifar10.ipynb)
75 |  - [chapter_computer-vision/kaggle-dog.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/kaggle-dog.ipynb)
76 |  - [chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb)
77 |  - [chapter_natural-language-processing-applications/natural-language-inference-bert.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_natural-language-processing-applications/natural-language-inference-bert.ipynb)


--------------------------------------------------------------------------------
/chapter_attention-mechanisms/multihead-attention.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "cb4d82f7",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 多头注意力\n",
 13 |     "\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "id": "dc55ba33",
 20 |    "metadata": {
 21 |     "execution": {
 22 |      "iopub.execute_input": "2023-08-18T07:01:32.189972Z",
 23 |      "iopub.status.busy": "2023-08-18T07:01:32.189240Z",
 24 |      "iopub.status.idle": "2023-08-18T07:01:34.516491Z",
 25 |      "shell.execute_reply": "2023-08-18T07:01:34.515475Z"
 26 |     },
 27 |     "origin_pos": 2,
 28 |     "tab": [
 29 |      "pytorch"
 30 |     ]
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import math\n",
 35 |     "import torch\n",
 36 |     "from torch import nn\n",
 37 |     "from d2l import torch as d2l"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "id": "22964f2f",
 43 |    "metadata": {
 44 |     "slideshow": {
 45 |      "slide_type": "slide"
 46 |     }
 47 |    },
 48 |    "source": [
 49 |     "选择缩放点积注意力作为每一个注意力头"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 2,
 55 |    "id": "1bb10990",
 56 |    "metadata": {
 57 |     "execution": {
 58 |      "iopub.execute_input": "2023-08-18T07:01:34.521491Z",
 59 |      "iopub.status.busy": "2023-08-18T07:01:34.521131Z",
 60 |      "iopub.status.idle": "2023-08-18T07:01:34.530492Z",
 61 |      "shell.execute_reply": "2023-08-18T07:01:34.529556Z"
 62 |     },
 63 |     "origin_pos": 7,
 64 |     "tab": [
 65 |      "pytorch"
 66 |     ]
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "class MultiHeadAttention(nn.Module):\n",
 71 |     "    \"\"\"多头注意力\"\"\"\n",
 72 |     "    def __init__(self, key_size, query_size, value_size, num_hiddens,\n",
 73 |     "                 num_heads, dropout, bias=False, **kwargs):\n",
 74 |     "        super(MultiHeadAttention, self).__init__(**kwargs)\n",
 75 |     "        self.num_heads = num_heads\n",
 76 |     "        self.attention = d2l.DotProductAttention(dropout)\n",
 77 |     "        self.W_q = nn.Linear(query_size, num_hiddens, bias=bias)\n",
 78 |     "        self.W_k = nn.Linear(key_size, num_hiddens, bias=bias)\n",
 79 |     "        self.W_v = nn.Linear(value_size, num_hiddens, bias=bias)\n",
 80 |     "        self.W_o = nn.Linear(num_hiddens, num_hiddens, bias=bias)\n",
 81 |     "\n",
 82 |     "    def forward(self, queries, keys, values, valid_lens):\n",
 83 |     "        queries = transpose_qkv(self.W_q(queries), self.num_heads)\n",
 84 |     "        keys = transpose_qkv(self.W_k(keys), self.num_heads)\n",
 85 |     "        values = transpose_qkv(self.W_v(values), self.num_heads)\n",
 86 |     "\n",
 87 |     "        if valid_lens is not None:\n",
 88 |     "            valid_lens = torch.repeat_interleave(\n",
 89 |     "                valid_lens, repeats=self.num_heads, dim=0)\n",
 90 |     "\n",
 91 |     "        output = self.attention(queries, keys, values, valid_lens)\n",
 92 |     "\n",
 93 |     "        output_concat = transpose_output(output, self.num_heads)\n",
 94 |     "        return self.W_o(output_concat)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "d376aca2",
100 |    "metadata": {
101 |     "slideshow": {
102 |      "slide_type": "slide"
103 |     }
104 |    },
105 |    "source": [
106 |     "使多个头并行计算"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 3,
112 |    "id": "b2af5ed8",
113 |    "metadata": {
114 |     "execution": {
115 |      "iopub.execute_input": "2023-08-18T07:01:34.534820Z",
116 |      "iopub.status.busy": "2023-08-18T07:01:34.534308Z",
117 |      "iopub.status.idle": "2023-08-18T07:01:34.540852Z",
118 |      "shell.execute_reply": "2023-08-18T07:01:34.539927Z"
119 |     },
120 |     "origin_pos": 12,
121 |     "tab": [
122 |      "pytorch"
123 |     ]
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "def transpose_qkv(X, num_heads):\n",
128 |     "    \"\"\"为了多注意力头的并行计算而变换形状\"\"\"\n",
129 |     "    X = X.reshape(X.shape[0], X.shape[1], num_heads, -1)\n",
130 |     "\n",
131 |     "    X = X.permute(0, 2, 1, 3)\n",
132 |     "\n",
133 |     "    return X.reshape(-1, X.shape[2], X.shape[3])\n",
134 |     "\n",
135 |     "\n",
136 |     "def transpose_output(X, num_heads):\n",
137 |     "    \"\"\"逆转transpose_qkv函数的操作\"\"\"\n",
138 |     "    X = X.reshape(-1, num_heads, X.shape[1], X.shape[2])\n",
139 |     "    X = X.permute(0, 2, 1, 3)\n",
140 |     "    return X.reshape(X.shape[0], X.shape[1], -1)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "id": "015e3e67",
146 |    "metadata": {
147 |     "slideshow": {
148 |      "slide_type": "slide"
149 |     }
150 |    },
151 |    "source": [
152 |     "测试"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 4,
158 |    "id": "d06baadf",
159 |    "metadata": {
160 |     "execution": {
161 |      "iopub.execute_input": "2023-08-18T07:01:34.545405Z",
162 |      "iopub.status.busy": "2023-08-18T07:01:34.544605Z",
163 |      "iopub.status.idle": "2023-08-18T07:01:34.571251Z",
164 |      "shell.execute_reply": "2023-08-18T07:01:34.570476Z"
165 |     },
166 |     "origin_pos": 17,
167 |     "tab": [
168 |      "pytorch"
169 |     ]
170 |    },
171 |    "outputs": [
172 |     {
173 |      "data": {
174 |       "text/plain": [
175 |        "MultiHeadAttention(\n",
176 |        "  (attention): DotProductAttention(\n",
177 |        "    (dropout): Dropout(p=0.5, inplace=False)\n",
178 |        "  )\n",
179 |        "  (W_q): Linear(in_features=100, out_features=100, bias=False)\n",
180 |        "  (W_k): Linear(in_features=100, out_features=100, bias=False)\n",
181 |        "  (W_v): Linear(in_features=100, out_features=100, bias=False)\n",
182 |        "  (W_o): Linear(in_features=100, out_features=100, bias=False)\n",
183 |        ")"
184 |       ]
185 |      },
186 |      "execution_count": 4,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "num_hiddens, num_heads = 100, 5\n",
193 |     "attention = MultiHeadAttention(num_hiddens, num_hiddens, num_hiddens,\n",
194 |     "                               num_hiddens, num_heads, 0.5)\n",
195 |     "attention.eval()"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 5,
201 |    "id": "8da65afc",
202 |    "metadata": {
203 |     "execution": {
204 |      "iopub.execute_input": "2023-08-18T07:01:34.574642Z",
205 |      "iopub.status.busy": "2023-08-18T07:01:34.574021Z",
206 |      "iopub.status.idle": "2023-08-18T07:01:34.588848Z",
207 |      "shell.execute_reply": "2023-08-18T07:01:34.587945Z"
208 |     },
209 |     "origin_pos": 20,
210 |     "tab": [
211 |      "pytorch"
212 |     ]
213 |    },
214 |    "outputs": [
215 |     {
216 |      "data": {
217 |       "text/plain": [
218 |        "torch.Size([2, 4, 100])"
219 |       ]
220 |      },
221 |      "execution_count": 5,
222 |      "metadata": {},
223 |      "output_type": "execute_result"
224 |     }
225 |    ],
226 |    "source": [
227 |     "batch_size, num_queries = 2, 4\n",
228 |     "num_kvpairs, valid_lens =  6, torch.tensor([3, 2])\n",
229 |     "X = torch.ones((batch_size, num_queries, num_hiddens))\n",
230 |     "Y = torch.ones((batch_size, num_kvpairs, num_hiddens))\n",
231 |     "attention(X, Y, Y, valid_lens).shape"
232 |    ]
233 |   }
234 |  ],
235 |  "metadata": {
236 |   "celltoolbar": "Slideshow",
237 |   "language_info": {
238 |    "name": "python"
239 |   },
240 |   "required_libs": [],
241 |   "rise": {
242 |    "autolaunch": true,
243 |    "enable_chalkboard": true,
244 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
245 |    "scroll": true
246 |   }
247 |  },
248 |  "nbformat": 4,
249 |  "nbformat_minor": 5
250 | }


--------------------------------------------------------------------------------
/chapter_attention-mechanisms/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_computational-performance/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_computer-vision/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_computer-vision/transposed-conv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "db0f208a",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 转置卷积\n",
 13 |     "\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "id": "1f39b5ef",
 20 |    "metadata": {
 21 |     "execution": {
 22 |      "iopub.execute_input": "2023-08-18T07:05:22.451701Z",
 23 |      "iopub.status.busy": "2023-08-18T07:05:22.451411Z",
 24 |      "iopub.status.idle": "2023-08-18T07:05:24.490785Z",
 25 |      "shell.execute_reply": "2023-08-18T07:05:24.489970Z"
 26 |     },
 27 |     "origin_pos": 2,
 28 |     "tab": [
 29 |      "pytorch"
 30 |     ]
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import torch\n",
 35 |     "from torch import nn\n",
 36 |     "from d2l import torch as d2l"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "id": "1f3a83f3",
 42 |    "metadata": {
 43 |     "slideshow": {
 44 |      "slide_type": "-"
 45 |     }
 46 |    },
 47 |    "source": [
 48 |     "实现基本的转置卷积运算"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "id": "e6931d90",
 55 |    "metadata": {
 56 |     "execution": {
 57 |      "iopub.execute_input": "2023-08-18T07:05:24.494981Z",
 58 |      "iopub.status.busy": "2023-08-18T07:05:24.494307Z",
 59 |      "iopub.status.idle": "2023-08-18T07:05:24.499745Z",
 60 |      "shell.execute_reply": "2023-08-18T07:05:24.498885Z"
 61 |     },
 62 |     "origin_pos": 5,
 63 |     "tab": [
 64 |      "pytorch"
 65 |     ]
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "def trans_conv(X, K):\n",
 70 |     "    h, w = K.shape\n",
 71 |     "    Y = torch.zeros((X.shape[0] + h - 1, X.shape[1] + w - 1))\n",
 72 |     "    for i in range(X.shape[0]):\n",
 73 |     "        for j in range(X.shape[1]):\n",
 74 |     "            Y[i: i + h, j: j + w] += X[i, j] * K\n",
 75 |     "    return Y"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "id": "f3baa22e",
 81 |    "metadata": {
 82 |     "slideshow": {
 83 |      "slide_type": "slide"
 84 |     }
 85 |    },
 86 |    "source": [
 87 |     "验证上述实现输出"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 3,
 93 |    "id": "a7c6e2fd",
 94 |    "metadata": {
 95 |     "execution": {
 96 |      "iopub.execute_input": "2023-08-18T07:05:24.503202Z",
 97 |      "iopub.status.busy": "2023-08-18T07:05:24.502646Z",
 98 |      "iopub.status.idle": "2023-08-18T07:05:24.531448Z",
 99 |      "shell.execute_reply": "2023-08-18T07:05:24.530730Z"
100 |     },
101 |     "origin_pos": 7,
102 |     "tab": [
103 |      "pytorch"
104 |     ]
105 |    },
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "tensor([[ 0.,  0.,  1.],\n",
111 |        "        [ 0.,  4.,  6.],\n",
112 |        "        [ 4., 12.,  9.]])"
113 |       ]
114 |      },
115 |      "execution_count": 3,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "X = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
122 |     "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
123 |     "trans_conv(X, K)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "id": "9f9dd301",
129 |    "metadata": {
130 |     "slideshow": {
131 |      "slide_type": "slide"
132 |     }
133 |    },
134 |    "source": [
135 |     "使用高级API获得相同的结果"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 4,
141 |    "id": "b9de6d80",
142 |    "metadata": {
143 |     "execution": {
144 |      "iopub.execute_input": "2023-08-18T07:05:24.535386Z",
145 |      "iopub.status.busy": "2023-08-18T07:05:24.534826Z",
146 |      "iopub.status.idle": "2023-08-18T07:05:24.544484Z",
147 |      "shell.execute_reply": "2023-08-18T07:05:24.543747Z"
148 |     },
149 |     "origin_pos": 10,
150 |     "tab": [
151 |      "pytorch"
152 |     ]
153 |    },
154 |    "outputs": [
155 |     {
156 |      "data": {
157 |       "text/plain": [
158 |        "tensor([[[[ 0.,  0.,  1.],\n",
159 |        "          [ 0.,  4.,  6.],\n",
160 |        "          [ 4., 12.,  9.]]]], grad_fn=<ConvolutionBackward0>)"
161 |       ]
162 |      },
163 |      "execution_count": 4,
164 |      "metadata": {},
165 |      "output_type": "execute_result"
166 |     }
167 |    ],
168 |    "source": [
169 |     "X, K = X.reshape(1, 1, 2, 2), K.reshape(1, 1, 2, 2)\n",
170 |     "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, bias=False)\n",
171 |     "tconv.weight.data = K\n",
172 |     "tconv(X)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "id": "f8811b58",
178 |    "metadata": {
179 |     "slideshow": {
180 |      "slide_type": "slide"
181 |     }
182 |    },
183 |    "source": [
184 |     "填充、步幅和多通道"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 5,
190 |    "id": "cd114de1",
191 |    "metadata": {
192 |     "execution": {
193 |      "iopub.execute_input": "2023-08-18T07:05:24.548040Z",
194 |      "iopub.status.busy": "2023-08-18T07:05:24.547398Z",
195 |      "iopub.status.idle": "2023-08-18T07:05:24.553659Z",
196 |      "shell.execute_reply": "2023-08-18T07:05:24.552864Z"
197 |     },
198 |     "origin_pos": 14,
199 |     "tab": [
200 |      "pytorch"
201 |     ]
202 |    },
203 |    "outputs": [
204 |     {
205 |      "data": {
206 |       "text/plain": [
207 |        "tensor([[[[4.]]]], grad_fn=<ConvolutionBackward0>)"
208 |       ]
209 |      },
210 |      "execution_count": 5,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, padding=1, bias=False)\n",
217 |     "tconv.weight.data = K\n",
218 |     "tconv(X)"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 6,
224 |    "id": "48064406",
225 |    "metadata": {
226 |     "execution": {
227 |      "iopub.execute_input": "2023-08-18T07:05:24.557362Z",
228 |      "iopub.status.busy": "2023-08-18T07:05:24.556727Z",
229 |      "iopub.status.idle": "2023-08-18T07:05:24.563081Z",
230 |      "shell.execute_reply": "2023-08-18T07:05:24.562365Z"
231 |     },
232 |     "origin_pos": 18,
233 |     "tab": [
234 |      "pytorch"
235 |     ]
236 |    },
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "tensor([[[[0., 0., 0., 1.],\n",
242 |        "          [0., 0., 2., 3.],\n",
243 |        "          [0., 2., 0., 3.],\n",
244 |        "          [4., 6., 6., 9.]]]], grad_fn=<ConvolutionBackward0>)"
245 |       ]
246 |      },
247 |      "execution_count": 6,
248 |      "metadata": {},
249 |      "output_type": "execute_result"
250 |     }
251 |    ],
252 |    "source": [
253 |     "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, stride=2, bias=False)\n",
254 |     "tconv.weight.data = K\n",
255 |     "tconv(X)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": 7,
261 |    "id": "5e7033d7",
262 |    "metadata": {
263 |     "execution": {
264 |      "iopub.execute_input": "2023-08-18T07:05:24.566613Z",
265 |      "iopub.status.busy": "2023-08-18T07:05:24.565990Z",
266 |      "iopub.status.idle": "2023-08-18T07:05:24.577437Z",
267 |      "shell.execute_reply": "2023-08-18T07:05:24.576434Z"
268 |     },
269 |     "origin_pos": 22,
270 |     "tab": [
271 |      "pytorch"
272 |     ]
273 |    },
274 |    "outputs": [
275 |     {
276 |      "data": {
277 |       "text/plain": [
278 |        "True"
279 |       ]
280 |      },
281 |      "execution_count": 7,
282 |      "metadata": {},
283 |      "output_type": "execute_result"
284 |     }
285 |    ],
286 |    "source": [
287 |     "X = torch.rand(size=(1, 10, 16, 16))\n",
288 |     "conv = nn.Conv2d(10, 20, kernel_size=5, padding=2, stride=3)\n",
289 |     "tconv = nn.ConvTranspose2d(20, 10, kernel_size=5, padding=2, stride=3)\n",
290 |     "tconv(conv(X)).shape == X.shape"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "id": "12aa0878",
296 |    "metadata": {
297 |     "slideshow": {
298 |      "slide_type": "slide"
299 |     }
300 |    },
301 |    "source": [
302 |     "与矩阵变换的联系"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 8,
308 |    "id": "260d5c6d",
309 |    "metadata": {
310 |     "execution": {
311 |      "iopub.execute_input": "2023-08-18T07:05:24.581485Z",
312 |      "iopub.status.busy": "2023-08-18T07:05:24.580866Z",
313 |      "iopub.status.idle": "2023-08-18T07:05:24.589179Z",
314 |      "shell.execute_reply": "2023-08-18T07:05:24.588233Z"
315 |     },
316 |     "origin_pos": 25,
317 |     "tab": [
318 |      "pytorch"
319 |     ]
320 |    },
321 |    "outputs": [
322 |     {
323 |      "data": {
324 |       "text/plain": [
325 |        "tensor([[27., 37.],\n",
326 |        "        [57., 67.]])"
327 |       ]
328 |      },
329 |      "execution_count": 8,
330 |      "metadata": {},
331 |      "output_type": "execute_result"
332 |     }
333 |    ],
334 |    "source": [
335 |     "X = torch.arange(9.0).reshape(3, 3)\n",
336 |     "K = torch.tensor([[1.0, 2.0], [3.0, 4.0]])\n",
337 |     "Y = d2l.corr2d(X, K)\n",
338 |     "Y"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 9,
344 |    "id": "d9f6ce2b",
345 |    "metadata": {
346 |     "execution": {
347 |      "iopub.execute_input": "2023-08-18T07:05:24.592769Z",
348 |      "iopub.status.busy": "2023-08-18T07:05:24.592164Z",
349 |      "iopub.status.idle": "2023-08-18T07:05:24.602392Z",
350 |      "shell.execute_reply": "2023-08-18T07:05:24.601439Z"
351 |     },
352 |     "origin_pos": 28,
353 |     "tab": [
354 |      "pytorch"
355 |     ]
356 |    },
357 |    "outputs": [
358 |     {
359 |      "data": {
360 |       "text/plain": [
361 |        "tensor([[1., 2., 0., 3., 4., 0., 0., 0., 0.],\n",
362 |        "        [0., 1., 2., 0., 3., 4., 0., 0., 0.],\n",
363 |        "        [0., 0., 0., 1., 2., 0., 3., 4., 0.],\n",
364 |        "        [0., 0., 0., 0., 1., 2., 0., 3., 4.]])"
365 |       ]
366 |      },
367 |      "execution_count": 9,
368 |      "metadata": {},
369 |      "output_type": "execute_result"
370 |     }
371 |    ],
372 |    "source": [
373 |     "def kernel2matrix(K):\n",
374 |     "    k, W = torch.zeros(5), torch.zeros((4, 9))\n",
375 |     "    k[:2], k[3:5] = K[0, :], K[1, :]\n",
376 |     "    W[0, :5], W[1, 1:6], W[2, 3:8], W[3, 4:] = k, k, k, k\n",
377 |     "    return W\n",
378 |     "\n",
379 |     "W = kernel2matrix(K)\n",
380 |     "W"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "code",
385 |    "execution_count": 10,
386 |    "id": "1fb803d0",
387 |    "metadata": {
388 |     "execution": {
389 |      "iopub.execute_input": "2023-08-18T07:05:24.606249Z",
390 |      "iopub.status.busy": "2023-08-18T07:05:24.605496Z",
391 |      "iopub.status.idle": "2023-08-18T07:05:24.612872Z",
392 |      "shell.execute_reply": "2023-08-18T07:05:24.611900Z"
393 |     },
394 |     "origin_pos": 31,
395 |     "tab": [
396 |      "pytorch"
397 |     ]
398 |    },
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/plain": [
403 |        "tensor([[True, True],\n",
404 |        "        [True, True]])"
405 |       ]
406 |      },
407 |      "execution_count": 10,
408 |      "metadata": {},
409 |      "output_type": "execute_result"
410 |     }
411 |    ],
412 |    "source": [
413 |     "Y == torch.matmul(W, X.reshape(-1)).reshape(2, 2)"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": 11,
419 |    "id": "f1a55ff1",
420 |    "metadata": {
421 |     "execution": {
422 |      "iopub.execute_input": "2023-08-18T07:05:24.616575Z",
423 |      "iopub.status.busy": "2023-08-18T07:05:24.615826Z",
424 |      "iopub.status.idle": "2023-08-18T07:05:24.623063Z",
425 |      "shell.execute_reply": "2023-08-18T07:05:24.622144Z"
426 |     },
427 |     "origin_pos": 34,
428 |     "tab": [
429 |      "pytorch"
430 |     ]
431 |    },
432 |    "outputs": [
433 |     {
434 |      "data": {
435 |       "text/plain": [
436 |        "tensor([[True, True, True],\n",
437 |        "        [True, True, True],\n",
438 |        "        [True, True, True]])"
439 |       ]
440 |      },
441 |      "execution_count": 11,
442 |      "metadata": {},
443 |      "output_type": "execute_result"
444 |     }
445 |    ],
446 |    "source": [
447 |     "Z = trans_conv(Y, K)\n",
448 |     "Z == torch.matmul(W.T, Y.reshape(-1)).reshape(3, 3)"
449 |    ]
450 |   }
451 |  ],
452 |  "metadata": {
453 |   "celltoolbar": "Slideshow",
454 |   "language_info": {
455 |    "name": "python"
456 |   },
457 |   "required_libs": [],
458 |   "rise": {
459 |    "autolaunch": true,
460 |    "enable_chalkboard": true,
461 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
462 |    "scroll": true
463 |   }
464 |  },
465 |  "nbformat": 4,
466 |  "nbformat_minor": 5
467 | }


--------------------------------------------------------------------------------
/chapter_convolutional-modern/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/channels.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "a0f37efc",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 多输入多输出通道\n",
 13 |     "\n",
 14 |     "实现一下多输入通道互相关运算"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "0cff24d4",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:02:38.339612Z",
 24 |      "iopub.status.busy": "2023-08-18T07:02:38.339031Z",
 25 |      "iopub.status.idle": "2023-08-18T07:02:38.344485Z",
 26 |      "shell.execute_reply": "2023-08-18T07:02:38.343326Z"
 27 |     },
 28 |     "origin_pos": 4,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import torch\n",
 36 |     "from d2l import torch as d2l\n",
 37 |     "\n",
 38 |     "def corr2d_multi_in(X, K):\n",
 39 |     "    return sum(d2l.corr2d(x, k) for x, k in zip(X, K))"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "id": "aafb58cc",
 45 |    "metadata": {
 46 |     "slideshow": {
 47 |      "slide_type": "-"
 48 |     }
 49 |    },
 50 |    "source": [
 51 |     "验证互相关运算的输出"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "id": "5a60b8f9",
 58 |    "metadata": {
 59 |     "execution": {
 60 |      "iopub.execute_input": "2023-08-18T07:02:38.347937Z",
 61 |      "iopub.status.busy": "2023-08-18T07:02:38.347463Z",
 62 |      "iopub.status.idle": "2023-08-18T07:02:38.380997Z",
 63 |      "shell.execute_reply": "2023-08-18T07:02:38.379885Z"
 64 |     },
 65 |     "origin_pos": 7,
 66 |     "tab": [
 67 |      "pytorch"
 68 |     ]
 69 |    },
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/plain": [
 74 |        "tensor([[ 56.,  72.],\n",
 75 |        "        [104., 120.]])"
 76 |       ]
 77 |      },
 78 |      "execution_count": 3,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],\n",
 85 |     "               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])\n",
 86 |     "K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])\n",
 87 |     "\n",
 88 |     "corr2d_multi_in(X, K)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "id": "4b4c9aa1",
 94 |    "metadata": {
 95 |     "slideshow": {
 96 |      "slide_type": "slide"
 97 |     }
 98 |    },
 99 |    "source": [
100 |     "计算多个通道的输出的互相关函数"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 5,
106 |    "id": "6dde7543",
107 |    "metadata": {
108 |     "execution": {
109 |      "iopub.execute_input": "2023-08-18T07:02:38.392733Z",
110 |      "iopub.status.busy": "2023-08-18T07:02:38.392298Z",
111 |      "iopub.status.idle": "2023-08-18T07:02:38.399310Z",
112 |      "shell.execute_reply": "2023-08-18T07:02:38.398211Z"
113 |     },
114 |     "origin_pos": 11,
115 |     "tab": [
116 |      "pytorch"
117 |     ]
118 |    },
119 |    "outputs": [
120 |     {
121 |      "data": {
122 |       "text/plain": [
123 |        "torch.Size([3, 2, 2, 2])"
124 |       ]
125 |      },
126 |      "execution_count": 5,
127 |      "metadata": {},
128 |      "output_type": "execute_result"
129 |     }
130 |    ],
131 |    "source": [
132 |     "def corr2d_multi_in_out(X, K):\n",
133 |     "    return torch.stack([corr2d_multi_in(X, k) for k in K], 0)\n",
134 |     "\n",
135 |     "K = torch.stack((K, K + 1, K + 2), 0)\n",
136 |     "K.shape"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 6,
142 |    "id": "86b2b71f",
143 |    "metadata": {
144 |     "execution": {
145 |      "iopub.execute_input": "2023-08-18T07:02:38.403159Z",
146 |      "iopub.status.busy": "2023-08-18T07:02:38.402457Z",
147 |      "iopub.status.idle": "2023-08-18T07:02:38.410409Z",
148 |      "shell.execute_reply": "2023-08-18T07:02:38.409310Z"
149 |     },
150 |     "origin_pos": 13,
151 |     "tab": [
152 |      "pytorch"
153 |     ]
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/plain": [
159 |        "tensor([[[ 56.,  72.],\n",
160 |        "         [104., 120.]],\n",
161 |        "\n",
162 |        "        [[ 76., 100.],\n",
163 |        "         [148., 172.]],\n",
164 |        "\n",
165 |        "        [[ 96., 128.],\n",
166 |        "         [192., 224.]]])"
167 |       ]
168 |      },
169 |      "execution_count": 6,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "corr2d_multi_in_out(X, K)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "markdown",
180 |    "id": "cafe51a2",
181 |    "metadata": {
182 |     "slideshow": {
183 |      "slide_type": "slide"
184 |     }
185 |    },
186 |    "source": [
187 |     "1x1卷积"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 9,
193 |    "id": "7250eae2",
194 |    "metadata": {
195 |     "execution": {
196 |      "iopub.execute_input": "2023-08-18T07:02:38.430613Z",
197 |      "iopub.status.busy": "2023-08-18T07:02:38.430184Z",
198 |      "iopub.status.idle": "2023-08-18T07:02:38.438715Z",
199 |      "shell.execute_reply": "2023-08-18T07:02:38.437662Z"
200 |     },
201 |     "origin_pos": 19,
202 |     "tab": [
203 |      "pytorch"
204 |     ]
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "def corr2d_multi_in_out_1x1(X, K):\n",
209 |     "    c_i, h, w = X.shape\n",
210 |     "    c_o = K.shape[0]\n",
211 |     "    X = X.reshape((c_i, h * w))\n",
212 |     "    K = K.reshape((c_o, c_i))\n",
213 |     "    Y = torch.matmul(K, X)\n",
214 |     "    return Y.reshape((c_o, h, w))\n",
215 |     "\n",
216 |     "X = torch.normal(0, 1, (3, 3, 3))\n",
217 |     "K = torch.normal(0, 1, (2, 3, 1, 1))\n",
218 |     "\n",
219 |     "Y1 = corr2d_multi_in_out_1x1(X, K)\n",
220 |     "Y2 = corr2d_multi_in_out(X, K)\n",
221 |     "assert float(torch.abs(Y1 - Y2).sum()) < 1e-6"
222 |    ]
223 |   }
224 |  ],
225 |  "metadata": {
226 |   "celltoolbar": "Slideshow",
227 |   "language_info": {
228 |    "name": "python"
229 |   },
230 |   "required_libs": [],
231 |   "rise": {
232 |    "autolaunch": true,
233 |    "enable_chalkboard": true,
234 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
235 |    "scroll": true
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 5
240 | }


--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/conv-layer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "50832220",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 图像卷积\n",
 13 |     "\n",
 14 |     "互相关运算"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "16abe7ca",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:07:28.563668Z",
 24 |      "iopub.status.busy": "2023-08-18T07:07:28.562986Z",
 25 |      "iopub.status.idle": "2023-08-18T07:07:28.569424Z",
 26 |      "shell.execute_reply": "2023-08-18T07:07:28.568319Z"
 27 |     },
 28 |     "origin_pos": 4,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import torch\n",
 36 |     "from torch import nn\n",
 37 |     "from d2l import torch as d2l\n",
 38 |     "\n",
 39 |     "def corr2d(X, K):  \n",
 40 |     "    \"\"\"计算二维互相关运算\"\"\"\n",
 41 |     "    h, w = K.shape\n",
 42 |     "    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
 43 |     "    for i in range(Y.shape[0]):\n",
 44 |     "        for j in range(Y.shape[1]):\n",
 45 |     "            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n",
 46 |     "    return Y"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "id": "42171987",
 52 |    "metadata": {
 53 |     "slideshow": {
 54 |      "slide_type": "slide"
 55 |     }
 56 |    },
 57 |    "source": [
 58 |     "验证上述二维互相关运算的输出"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 3,
 64 |    "id": "6f84e512",
 65 |    "metadata": {
 66 |     "execution": {
 67 |      "iopub.execute_input": "2023-08-18T07:07:28.572958Z",
 68 |      "iopub.status.busy": "2023-08-18T07:07:28.572449Z",
 69 |      "iopub.status.idle": "2023-08-18T07:07:28.604854Z",
 70 |      "shell.execute_reply": "2023-08-18T07:07:28.603813Z"
 71 |     },
 72 |     "origin_pos": 7,
 73 |     "tab": [
 74 |      "pytorch"
 75 |     ]
 76 |    },
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "tensor([[19., 25.],\n",
 82 |        "        [37., 43.]])"
 83 |       ]
 84 |      },
 85 |      "execution_count": 3,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n",
 92 |     "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
 93 |     "corr2d(X, K)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "id": "5abc9d97",
 99 |    "metadata": {
100 |     "slideshow": {
101 |      "slide_type": "slide"
102 |     }
103 |    },
104 |    "source": [
105 |     "实现二维卷积层"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 4,
111 |    "id": "450def67",
112 |    "metadata": {
113 |     "execution": {
114 |      "iopub.execute_input": "2023-08-18T07:07:28.610672Z",
115 |      "iopub.status.busy": "2023-08-18T07:07:28.609819Z",
116 |      "iopub.status.idle": "2023-08-18T07:07:28.615602Z",
117 |      "shell.execute_reply": "2023-08-18T07:07:28.614632Z"
118 |     },
119 |     "origin_pos": 10,
120 |     "tab": [
121 |      "pytorch"
122 |     ]
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "class Conv2D(nn.Module):\n",
127 |     "    def __init__(self, kernel_size):\n",
128 |     "        super().__init__()\n",
129 |     "        self.weight = nn.Parameter(torch.rand(kernel_size))\n",
130 |     "        self.bias = nn.Parameter(torch.zeros(1))\n",
131 |     "\n",
132 |     "    def forward(self, x):\n",
133 |     "        return corr2d(x, self.weight) + self.bias"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "id": "c5b49b95",
139 |    "metadata": {
140 |     "slideshow": {
141 |      "slide_type": "slide"
142 |     }
143 |    },
144 |    "source": [
145 |     "卷积层的一个简单应用：\n",
146 |     "检测图像中不同颜色的边缘"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 5,
152 |    "id": "dee1bc79",
153 |    "metadata": {
154 |     "execution": {
155 |      "iopub.execute_input": "2023-08-18T07:07:28.620077Z",
156 |      "iopub.status.busy": "2023-08-18T07:07:28.619277Z",
157 |      "iopub.status.idle": "2023-08-18T07:07:28.626719Z",
158 |      "shell.execute_reply": "2023-08-18T07:07:28.625746Z"
159 |     },
160 |     "origin_pos": 14,
161 |     "tab": [
162 |      "pytorch"
163 |     ]
164 |    },
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": [
169 |        "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n",
170 |        "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
171 |        "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
172 |        "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
173 |        "        [1., 1., 0., 0., 0., 0., 1., 1.],\n",
174 |        "        [1., 1., 0., 0., 0., 0., 1., 1.]])"
175 |       ]
176 |      },
177 |      "execution_count": 5,
178 |      "metadata": {},
179 |      "output_type": "execute_result"
180 |     }
181 |    ],
182 |    "source": [
183 |     "X = torch.ones((6, 8))\n",
184 |     "X[:, 2:6] = 0\n",
185 |     "X"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 6,
191 |    "id": "d042bda0",
192 |    "metadata": {
193 |     "execution": {
194 |      "iopub.execute_input": "2023-08-18T07:07:28.630101Z",
195 |      "iopub.status.busy": "2023-08-18T07:07:28.629606Z",
196 |      "iopub.status.idle": "2023-08-18T07:07:28.634133Z",
197 |      "shell.execute_reply": "2023-08-18T07:07:28.633165Z"
198 |     },
199 |     "origin_pos": 17,
200 |     "tab": [
201 |      "pytorch"
202 |     ]
203 |    },
204 |    "outputs": [],
205 |    "source": [
206 |     "K = torch.tensor([[1.0, -1.0]])"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "id": "02307562",
212 |    "metadata": {
213 |     "slideshow": {
214 |      "slide_type": "slide"
215 |     }
216 |    },
217 |    "source": [
218 |     "输出`Y`中的1代表从白色到黑色的边缘，-1代表从黑色到白色的边缘"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": 7,
224 |    "id": "36de9e2a",
225 |    "metadata": {
226 |     "execution": {
227 |      "iopub.execute_input": "2023-08-18T07:07:28.639056Z",
228 |      "iopub.status.busy": "2023-08-18T07:07:28.638505Z",
229 |      "iopub.status.idle": "2023-08-18T07:07:28.646532Z",
230 |      "shell.execute_reply": "2023-08-18T07:07:28.645509Z"
231 |     },
232 |     "origin_pos": 19,
233 |     "tab": [
234 |      "pytorch"
235 |     ]
236 |    },
237 |    "outputs": [
238 |     {
239 |      "data": {
240 |       "text/plain": [
241 |        "tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
242 |        "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
243 |        "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
244 |        "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
245 |        "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],\n",
246 |        "        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])"
247 |       ]
248 |      },
249 |      "execution_count": 7,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "Y = corr2d(X, K)\n",
256 |     "Y"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "id": "f8fc54d3",
262 |    "metadata": {
263 |     "slideshow": {
264 |      "slide_type": "slide"
265 |     }
266 |    },
267 |    "source": [
268 |     "卷积核`K`只可以检测垂直边缘"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": 8,
274 |    "id": "0a754b2d",
275 |    "metadata": {
276 |     "execution": {
277 |      "iopub.execute_input": "2023-08-18T07:07:28.651371Z",
278 |      "iopub.status.busy": "2023-08-18T07:07:28.650819Z",
279 |      "iopub.status.idle": "2023-08-18T07:07:28.658419Z",
280 |      "shell.execute_reply": "2023-08-18T07:07:28.657436Z"
281 |     },
282 |     "origin_pos": 21,
283 |     "tab": [
284 |      "pytorch"
285 |     ]
286 |    },
287 |    "outputs": [
288 |     {
289 |      "data": {
290 |       "text/plain": [
291 |        "tensor([[0., 0., 0., 0., 0.],\n",
292 |        "        [0., 0., 0., 0., 0.],\n",
293 |        "        [0., 0., 0., 0., 0.],\n",
294 |        "        [0., 0., 0., 0., 0.],\n",
295 |        "        [0., 0., 0., 0., 0.],\n",
296 |        "        [0., 0., 0., 0., 0.],\n",
297 |        "        [0., 0., 0., 0., 0.],\n",
298 |        "        [0., 0., 0., 0., 0.]])"
299 |       ]
300 |      },
301 |      "execution_count": 8,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "corr2d(X.t(), K)"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "markdown",
312 |    "id": "d3c3d2e5",
313 |    "metadata": {
314 |     "slideshow": {
315 |      "slide_type": "slide"
316 |     }
317 |    },
318 |    "source": [
319 |     "学习由`X`生成`Y`的卷积核"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": 9,
325 |    "id": "2b423578",
326 |    "metadata": {
327 |     "execution": {
328 |      "iopub.execute_input": "2023-08-18T07:07:28.662260Z",
329 |      "iopub.status.busy": "2023-08-18T07:07:28.661527Z",
330 |      "iopub.status.idle": "2023-08-18T07:07:28.681412Z",
331 |      "shell.execute_reply": "2023-08-18T07:07:28.680192Z"
332 |     },
333 |     "origin_pos": 24,
334 |     "tab": [
335 |      "pytorch"
336 |     ]
337 |    },
338 |    "outputs": [
339 |     {
340 |      "name": "stdout",
341 |      "output_type": "stream",
342 |      "text": [
343 |       "epoch 2, loss 6.422\n",
344 |       "epoch 4, loss 1.225\n",
345 |       "epoch 6, loss 0.266\n",
346 |       "epoch 8, loss 0.070\n",
347 |       "epoch 10, loss 0.022\n"
348 |      ]
349 |     }
350 |    ],
351 |    "source": [
352 |     "conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), bias=False)\n",
353 |     "\n",
354 |     "X = X.reshape((1, 1, 6, 8))\n",
355 |     "Y = Y.reshape((1, 1, 6, 7))\n",
356 |     "lr = 3e-2\n",
357 |     "\n",
358 |     "for i in range(10):\n",
359 |     "    Y_hat = conv2d(X)\n",
360 |     "    l = (Y_hat - Y) ** 2\n",
361 |     "    conv2d.zero_grad()\n",
362 |     "    l.sum().backward()\n",
363 |     "    conv2d.weight.data[:] -= lr * conv2d.weight.grad\n",
364 |     "    if (i + 1) % 2 == 0:\n",
365 |     "        print(f'epoch {i+1}, loss {l.sum():.3f}')"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "id": "292614cd",
371 |    "metadata": {
372 |     "slideshow": {
373 |      "slide_type": "slide"
374 |     }
375 |    },
376 |    "source": [
377 |     "所学的卷积核的权重张量"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 10,
383 |    "id": "b40515e8",
384 |    "metadata": {
385 |     "execution": {
386 |      "iopub.execute_input": "2023-08-18T07:07:28.684721Z",
387 |      "iopub.status.busy": "2023-08-18T07:07:28.684428Z",
388 |      "iopub.status.idle": "2023-08-18T07:07:28.691507Z",
389 |      "shell.execute_reply": "2023-08-18T07:07:28.690512Z"
390 |     },
391 |     "origin_pos": 29,
392 |     "tab": [
393 |      "pytorch"
394 |     ]
395 |    },
396 |    "outputs": [
397 |     {
398 |      "data": {
399 |       "text/plain": [
400 |        "tensor([[ 1.0010, -0.9739]])"
401 |       ]
402 |      },
403 |      "execution_count": 10,
404 |      "metadata": {},
405 |      "output_type": "execute_result"
406 |     }
407 |    ],
408 |    "source": [
409 |     "conv2d.weight.data.reshape((1, 2))"
410 |    ]
411 |   }
412 |  ],
413 |  "metadata": {
414 |   "celltoolbar": "Slideshow",
415 |   "language_info": {
416 |    "name": "python"
417 |   },
418 |   "required_libs": [],
419 |   "rise": {
420 |    "autolaunch": true,
421 |    "enable_chalkboard": true,
422 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
423 |    "scroll": true
424 |   }
425 |  },
426 |  "nbformat": 4,
427 |  "nbformat_minor": 5
428 | }


--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/padding-and-strides.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "7d4404d4",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 填充和步幅\n",
 13 |     "\n",
 14 |     "在所有侧边填充1个像素"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "ee25ca28",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:00:27.440657Z",
 24 |      "iopub.status.busy": "2023-08-18T07:00:27.439788Z",
 25 |      "iopub.status.idle": "2023-08-18T07:00:28.396461Z",
 26 |      "shell.execute_reply": "2023-08-18T07:00:28.395508Z"
 27 |     },
 28 |     "origin_pos": 2,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "torch.Size([8, 8])"
 38 |       ]
 39 |      },
 40 |      "execution_count": 1,
 41 |      "metadata": {},
 42 |      "output_type": "execute_result"
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "import torch\n",
 47 |     "from torch import nn\n",
 48 |     "\n",
 49 |     "\n",
 50 |     "def comp_conv2d(conv2d, X):\n",
 51 |     "    X = X.reshape((1, 1) + X.shape)\n",
 52 |     "    Y = conv2d(X)\n",
 53 |     "    return Y.reshape(Y.shape[2:])\n",
 54 |     "\n",
 55 |     "conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)\n",
 56 |     "X = torch.rand(size=(8, 8))\n",
 57 |     "comp_conv2d(conv2d, X).shape"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "id": "04e04824",
 63 |    "metadata": {
 64 |     "slideshow": {
 65 |      "slide_type": "slide"
 66 |     }
 67 |    },
 68 |    "source": [
 69 |     "填充不同的高度和宽度"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 2,
 75 |    "id": "5dadebb1",
 76 |    "metadata": {
 77 |     "execution": {
 78 |      "iopub.execute_input": "2023-08-18T07:00:28.400923Z",
 79 |      "iopub.status.busy": "2023-08-18T07:00:28.400085Z",
 80 |      "iopub.status.idle": "2023-08-18T07:00:28.406887Z",
 81 |      "shell.execute_reply": "2023-08-18T07:00:28.406085Z"
 82 |     },
 83 |     "origin_pos": 7,
 84 |     "tab": [
 85 |      "pytorch"
 86 |     ]
 87 |    },
 88 |    "outputs": [
 89 |     {
 90 |      "data": {
 91 |       "text/plain": [
 92 |        "torch.Size([8, 8])"
 93 |       ]
 94 |      },
 95 |      "execution_count": 2,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))\n",
102 |     "comp_conv2d(conv2d, X).shape"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "id": "01e7aa78",
108 |    "metadata": {
109 |     "slideshow": {
110 |      "slide_type": "slide"
111 |     }
112 |    },
113 |    "source": [
114 |     "将高度和宽度的步幅设置为2"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 3,
120 |    "id": "7b6ac278",
121 |    "metadata": {
122 |     "execution": {
123 |      "iopub.execute_input": "2023-08-18T07:00:28.410395Z",
124 |      "iopub.status.busy": "2023-08-18T07:00:28.410090Z",
125 |      "iopub.status.idle": "2023-08-18T07:00:28.416621Z",
126 |      "shell.execute_reply": "2023-08-18T07:00:28.415848Z"
127 |     },
128 |     "origin_pos": 12,
129 |     "tab": [
130 |      "pytorch"
131 |     ]
132 |    },
133 |    "outputs": [
134 |     {
135 |      "data": {
136 |       "text/plain": [
137 |        "torch.Size([4, 4])"
138 |       ]
139 |      },
140 |      "execution_count": 3,
141 |      "metadata": {},
142 |      "output_type": "execute_result"
143 |     }
144 |    ],
145 |    "source": [
146 |     "conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)\n",
147 |     "comp_conv2d(conv2d, X).shape"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "id": "53265c61",
153 |    "metadata": {
154 |     "slideshow": {
155 |      "slide_type": "-"
156 |     }
157 |    },
158 |    "source": [
159 |     "一个稍微复杂的例子"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 4,
165 |    "id": "6f1c0e6c",
166 |    "metadata": {
167 |     "execution": {
168 |      "iopub.execute_input": "2023-08-18T07:00:28.422070Z",
169 |      "iopub.status.busy": "2023-08-18T07:00:28.421461Z",
170 |      "iopub.status.idle": "2023-08-18T07:00:28.429200Z",
171 |      "shell.execute_reply": "2023-08-18T07:00:28.427969Z"
172 |     },
173 |     "origin_pos": 17,
174 |     "tab": [
175 |      "pytorch"
176 |     ]
177 |    },
178 |    "outputs": [
179 |     {
180 |      "data": {
181 |       "text/plain": [
182 |        "torch.Size([2, 2])"
183 |       ]
184 |      },
185 |      "execution_count": 4,
186 |      "metadata": {},
187 |      "output_type": "execute_result"
188 |     }
189 |    ],
190 |    "source": [
191 |     "conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))\n",
192 |     "comp_conv2d(conv2d, X).shape"
193 |    ]
194 |   }
195 |  ],
196 |  "metadata": {
197 |   "celltoolbar": "Slideshow",
198 |   "language_info": {
199 |    "name": "python"
200 |   },
201 |   "required_libs": [],
202 |   "rise": {
203 |    "autolaunch": true,
204 |    "enable_chalkboard": true,
205 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
206 |    "scroll": true
207 |   }
208 |  },
209 |  "nbformat": 4,
210 |  "nbformat_minor": 5
211 | }


--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/pooling.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "a3a254cb",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 汇聚层\n",
 13 |     "\n",
 14 |     "实现汇聚层的前向传播"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "fe35adac",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:02:20.228639Z",
 24 |      "iopub.status.busy": "2023-08-18T07:02:20.227964Z",
 25 |      "iopub.status.idle": "2023-08-18T07:02:20.234155Z",
 26 |      "shell.execute_reply": "2023-08-18T07:02:20.233266Z"
 27 |     },
 28 |     "origin_pos": 4,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import torch\n",
 36 |     "from torch import nn\n",
 37 |     "from d2l import torch as d2l\n",
 38 |     "\n",
 39 |     "def pool2d(X, pool_size, mode='max'):\n",
 40 |     "    p_h, p_w = pool_size\n",
 41 |     "    Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))\n",
 42 |     "    for i in range(Y.shape[0]):\n",
 43 |     "        for j in range(Y.shape[1]):\n",
 44 |     "            if mode == 'max':\n",
 45 |     "                Y[i, j] = X[i: i + p_h, j: j + p_w].max()\n",
 46 |     "            elif mode == 'avg':\n",
 47 |     "                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()\n",
 48 |     "    return Y"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "id": "0cee43f1",
 54 |    "metadata": {
 55 |     "slideshow": {
 56 |      "slide_type": "slide"
 57 |     }
 58 |    },
 59 |    "source": [
 60 |     "验证二维最大汇聚层的输出"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 3,
 66 |    "id": "3a781c85",
 67 |    "metadata": {
 68 |     "execution": {
 69 |      "iopub.execute_input": "2023-08-18T07:02:20.237767Z",
 70 |      "iopub.status.busy": "2023-08-18T07:02:20.237211Z",
 71 |      "iopub.status.idle": "2023-08-18T07:02:20.268065Z",
 72 |      "shell.execute_reply": "2023-08-18T07:02:20.267212Z"
 73 |     },
 74 |     "origin_pos": 7,
 75 |     "tab": [
 76 |      "pytorch"
 77 |     ]
 78 |    },
 79 |    "outputs": [
 80 |     {
 81 |      "data": {
 82 |       "text/plain": [
 83 |        "tensor([[4., 5.],\n",
 84 |        "        [7., 8.]])"
 85 |       ]
 86 |      },
 87 |      "execution_count": 3,
 88 |      "metadata": {},
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n",
 94 |     "pool2d(X, (2, 2))"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "2a7b5d65",
100 |    "metadata": {
101 |     "slideshow": {
102 |      "slide_type": "-"
103 |     }
104 |    },
105 |    "source": [
106 |     "验证平均汇聚层"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "id": "4f9a1ffd",
113 |    "metadata": {
114 |     "execution": {
115 |      "iopub.execute_input": "2023-08-18T07:02:20.272001Z",
116 |      "iopub.status.busy": "2023-08-18T07:02:20.271411Z",
117 |      "iopub.status.idle": "2023-08-18T07:02:20.277849Z",
118 |      "shell.execute_reply": "2023-08-18T07:02:20.276928Z"
119 |     },
120 |     "origin_pos": 9,
121 |     "tab": [
122 |      "pytorch"
123 |     ]
124 |    },
125 |    "outputs": [
126 |     {
127 |      "data": {
128 |       "text/plain": [
129 |        "tensor([[2., 3.],\n",
130 |        "        [5., 6.]])"
131 |       ]
132 |      },
133 |      "execution_count": 4,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "pool2d(X, (2, 2), 'avg')"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "markdown",
144 |    "id": "941b8124",
145 |    "metadata": {
146 |     "slideshow": {
147 |      "slide_type": "slide"
148 |     }
149 |    },
150 |    "source": [
151 |     "填充和步幅"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 5,
157 |    "id": "140d08f5",
158 |    "metadata": {
159 |     "execution": {
160 |      "iopub.execute_input": "2023-08-18T07:02:20.281458Z",
161 |      "iopub.status.busy": "2023-08-18T07:02:20.280874Z",
162 |      "iopub.status.idle": "2023-08-18T07:02:20.287391Z",
163 |      "shell.execute_reply": "2023-08-18T07:02:20.286578Z"
164 |     },
165 |     "origin_pos": 12,
166 |     "tab": [
167 |      "pytorch"
168 |     ]
169 |    },
170 |    "outputs": [
171 |     {
172 |      "data": {
173 |       "text/plain": [
174 |        "tensor([[[[ 0.,  1.,  2.,  3.],\n",
175 |        "          [ 4.,  5.,  6.,  7.],\n",
176 |        "          [ 8.,  9., 10., 11.],\n",
177 |        "          [12., 13., 14., 15.]]]])"
178 |       ]
179 |      },
180 |      "execution_count": 5,
181 |      "metadata": {},
182 |      "output_type": "execute_result"
183 |     }
184 |    ],
185 |    "source": [
186 |     "X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))\n",
187 |     "X"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "id": "43710341",
193 |    "metadata": {
194 |     "slideshow": {
195 |      "slide_type": "-"
196 |     }
197 |    },
198 |    "source": [
199 |     "深度学习框架中的步幅与汇聚窗口的大小相同"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": 6,
205 |    "id": "a3cc01e3",
206 |    "metadata": {
207 |     "execution": {
208 |      "iopub.execute_input": "2023-08-18T07:02:20.291052Z",
209 |      "iopub.status.busy": "2023-08-18T07:02:20.290402Z",
210 |      "iopub.status.idle": "2023-08-18T07:02:20.296276Z",
211 |      "shell.execute_reply": "2023-08-18T07:02:20.295476Z"
212 |     },
213 |     "origin_pos": 17,
214 |     "tab": [
215 |      "pytorch"
216 |     ]
217 |    },
218 |    "outputs": [
219 |     {
220 |      "data": {
221 |       "text/plain": [
222 |        "tensor([[[[10.]]]])"
223 |       ]
224 |      },
225 |      "execution_count": 6,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "pool2d = nn.MaxPool2d(3)\n",
232 |     "pool2d(X)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "id": "4b86f339",
238 |    "metadata": {
239 |     "slideshow": {
240 |      "slide_type": "slide"
241 |     }
242 |    },
243 |    "source": [
244 |     "填充和步幅可以手动设定"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 7,
250 |    "id": "9c247428",
251 |    "metadata": {
252 |     "execution": {
253 |      "iopub.execute_input": "2023-08-18T07:02:20.299965Z",
254 |      "iopub.status.busy": "2023-08-18T07:02:20.299310Z",
255 |      "iopub.status.idle": "2023-08-18T07:02:20.307455Z",
256 |      "shell.execute_reply": "2023-08-18T07:02:20.306477Z"
257 |     },
258 |     "origin_pos": 22,
259 |     "tab": [
260 |      "pytorch"
261 |     ]
262 |    },
263 |    "outputs": [
264 |     {
265 |      "data": {
266 |       "text/plain": [
267 |        "tensor([[[[ 5.,  7.],\n",
268 |        "          [13., 15.]]]])"
269 |       ]
270 |      },
271 |      "execution_count": 7,
272 |      "metadata": {},
273 |      "output_type": "execute_result"
274 |     }
275 |    ],
276 |    "source": [
277 |     "pool2d = nn.MaxPool2d(3, padding=1, stride=2)\n",
278 |     "pool2d(X)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "markdown",
283 |    "id": "7295d3e3",
284 |    "metadata": {
285 |     "slideshow": {
286 |      "slide_type": "-"
287 |     }
288 |    },
289 |    "source": [
290 |     "设定一个任意大小的矩形汇聚窗口，并分别设定填充和步幅的高度和宽度"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "code",
295 |    "execution_count": 8,
296 |    "id": "7c169b2f",
297 |    "metadata": {
298 |     "execution": {
299 |      "iopub.execute_input": "2023-08-18T07:02:20.311794Z",
300 |      "iopub.status.busy": "2023-08-18T07:02:20.311492Z",
301 |      "iopub.status.idle": "2023-08-18T07:02:20.320399Z",
302 |      "shell.execute_reply": "2023-08-18T07:02:20.319108Z"
303 |     },
304 |     "origin_pos": 30,
305 |     "tab": [
306 |      "pytorch"
307 |     ]
308 |    },
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "tensor([[[[ 5.,  7.],\n",
314 |        "          [13., 15.]]]])"
315 |       ]
316 |      },
317 |      "execution_count": 8,
318 |      "metadata": {},
319 |      "output_type": "execute_result"
320 |     }
321 |    ],
322 |    "source": [
323 |     "pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))\n",
324 |     "pool2d(X)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "markdown",
329 |    "id": "daa999f2",
330 |    "metadata": {
331 |     "slideshow": {
332 |      "slide_type": "slide"
333 |     }
334 |    },
335 |    "source": [
336 |     "汇聚层在每个输入通道上单独运算"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 9,
342 |    "id": "c0a30a7f",
343 |    "metadata": {
344 |     "execution": {
345 |      "iopub.execute_input": "2023-08-18T07:02:20.325617Z",
346 |      "iopub.status.busy": "2023-08-18T07:02:20.324879Z",
347 |      "iopub.status.idle": "2023-08-18T07:02:20.335303Z",
348 |      "shell.execute_reply": "2023-08-18T07:02:20.334055Z"
349 |     },
350 |     "origin_pos": 35,
351 |     "tab": [
352 |      "pytorch"
353 |     ]
354 |    },
355 |    "outputs": [
356 |     {
357 |      "data": {
358 |       "text/plain": [
359 |        "tensor([[[[ 0.,  1.,  2.,  3.],\n",
360 |        "          [ 4.,  5.,  6.,  7.],\n",
361 |        "          [ 8.,  9., 10., 11.],\n",
362 |        "          [12., 13., 14., 15.]],\n",
363 |        "\n",
364 |        "         [[ 1.,  2.,  3.,  4.],\n",
365 |        "          [ 5.,  6.,  7.,  8.],\n",
366 |        "          [ 9., 10., 11., 12.],\n",
367 |        "          [13., 14., 15., 16.]]]])"
368 |       ]
369 |      },
370 |      "execution_count": 9,
371 |      "metadata": {},
372 |      "output_type": "execute_result"
373 |     }
374 |    ],
375 |    "source": [
376 |     "X = torch.cat((X, X + 1), 1)\n",
377 |     "X"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 10,
383 |    "id": "e534c8f3",
384 |    "metadata": {
385 |     "execution": {
386 |      "iopub.execute_input": "2023-08-18T07:02:20.340529Z",
387 |      "iopub.status.busy": "2023-08-18T07:02:20.339767Z",
388 |      "iopub.status.idle": "2023-08-18T07:02:20.349365Z",
389 |      "shell.execute_reply": "2023-08-18T07:02:20.348159Z"
390 |     },
391 |     "origin_pos": 39,
392 |     "tab": [
393 |      "pytorch"
394 |     ]
395 |    },
396 |    "outputs": [
397 |     {
398 |      "data": {
399 |       "text/plain": [
400 |        "tensor([[[[ 5.,  7.],\n",
401 |        "          [13., 15.]],\n",
402 |        "\n",
403 |        "         [[ 6.,  8.],\n",
404 |        "          [14., 16.]]]])"
405 |       ]
406 |      },
407 |      "execution_count": 10,
408 |      "metadata": {},
409 |      "output_type": "execute_result"
410 |     }
411 |    ],
412 |    "source": [
413 |     "pool2d = nn.MaxPool2d(3, padding=1, stride=2)\n",
414 |     "pool2d(X)"
415 |    ]
416 |   }
417 |  ],
418 |  "metadata": {
419 |   "celltoolbar": "Slideshow",
420 |   "language_info": {
421 |    "name": "python"
422 |   },
423 |   "required_libs": [],
424 |   "rise": {
425 |    "autolaunch": true,
426 |    "enable_chalkboard": true,
427 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
428 |    "scroll": true
429 |   }
430 |  },
431 |  "nbformat": 4,
432 |  "nbformat_minor": 5
433 | }


--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_deep-learning-computation/custom-layer.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "e91e1a31",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 自定义层\n",
 13 |     "\n",
 14 |     "构造一个没有任何参数的自定义层"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "dec68045",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:07:17.497408Z",
 24 |      "iopub.status.busy": "2023-08-18T07:07:17.497077Z",
 25 |      "iopub.status.idle": "2023-08-18T07:07:17.508357Z",
 26 |      "shell.execute_reply": "2023-08-18T07:07:17.507175Z"
 27 |     },
 28 |     "origin_pos": 7,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "tensor([-2., -1.,  0.,  1.,  2.])"
 38 |       ]
 39 |      },
 40 |      "execution_count": 2,
 41 |      "metadata": {},
 42 |      "output_type": "execute_result"
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "import torch\n",
 47 |     "import torch.nn.functional as F\n",
 48 |     "from torch import nn\n",
 49 |     "\n",
 50 |     "\n",
 51 |     "class CenteredLayer(nn.Module):\n",
 52 |     "    def __init__(self):\n",
 53 |     "        super().__init__()\n",
 54 |     "\n",
 55 |     "    def forward(self, X):\n",
 56 |     "        return X - X.mean()\n",
 57 |     "\n",
 58 |     "layer = CenteredLayer()\n",
 59 |     "layer(torch.FloatTensor([1, 2, 3, 4, 5]))"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "id": "05f8a223",
 65 |    "metadata": {
 66 |     "slideshow": {
 67 |      "slide_type": "slide"
 68 |     }
 69 |    },
 70 |    "source": [
 71 |     "将层作为组件合并到更复杂的模型中"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 4,
 77 |    "id": "6ab302a0",
 78 |    "metadata": {
 79 |     "execution": {
 80 |      "iopub.execute_input": "2023-08-18T07:07:17.523517Z",
 81 |      "iopub.status.busy": "2023-08-18T07:07:17.523140Z",
 82 |      "iopub.status.idle": "2023-08-18T07:07:17.534718Z",
 83 |      "shell.execute_reply": "2023-08-18T07:07:17.533593Z"
 84 |     },
 85 |     "origin_pos": 16,
 86 |     "tab": [
 87 |      "pytorch"
 88 |     ]
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "data": {
 93 |       "text/plain": [
 94 |        "tensor(7.4506e-09, grad_fn=<MeanBackward0>)"
 95 |       ]
 96 |      },
 97 |      "execution_count": 4,
 98 |      "metadata": {},
 99 |      "output_type": "execute_result"
100 |     }
101 |    ],
102 |    "source": [
103 |     "net = nn.Sequential(nn.Linear(8, 128), CenteredLayer())\n",
104 |     "\n",
105 |     "Y = net(torch.rand(4, 8))\n",
106 |     "Y.mean()"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "id": "568d0d74",
112 |    "metadata": {
113 |     "slideshow": {
114 |      "slide_type": "slide"
115 |     }
116 |    },
117 |    "source": [
118 |     "带参数的层"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 6,
124 |    "id": "4490005a",
125 |    "metadata": {
126 |     "execution": {
127 |      "iopub.execute_input": "2023-08-18T07:07:17.550522Z",
128 |      "iopub.status.busy": "2023-08-18T07:07:17.550152Z",
129 |      "iopub.status.idle": "2023-08-18T07:07:17.558364Z",
130 |      "shell.execute_reply": "2023-08-18T07:07:17.557338Z"
131 |     },
132 |     "origin_pos": 28,
133 |     "tab": [
134 |      "pytorch"
135 |     ]
136 |    },
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "Parameter containing:\n",
142 |        "tensor([[ 0.1775, -1.4539,  0.3972],\n",
143 |        "        [-0.1339,  0.5273,  1.3041],\n",
144 |        "        [-0.3327, -0.2337, -0.6334],\n",
145 |        "        [ 1.2076, -0.3937,  0.6851],\n",
146 |        "        [-0.4716,  0.0894, -0.9195]], requires_grad=True)"
147 |       ]
148 |      },
149 |      "execution_count": 6,
150 |      "metadata": {},
151 |      "output_type": "execute_result"
152 |     }
153 |    ],
154 |    "source": [
155 |     "class MyLinear(nn.Module):\n",
156 |     "    def __init__(self, in_units, units):\n",
157 |     "        super().__init__()\n",
158 |     "        self.weight = nn.Parameter(torch.randn(in_units, units))\n",
159 |     "        self.bias = nn.Parameter(torch.randn(units,))\n",
160 |     "    def forward(self, X):\n",
161 |     "        linear = torch.matmul(X, self.weight.data) + self.bias.data\n",
162 |     "        return F.relu(linear)\n",
163 |     "\n",
164 |     "linear = MyLinear(5, 3)\n",
165 |     "linear.weight"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "id": "9c8d6fca",
171 |    "metadata": {
172 |     "slideshow": {
173 |      "slide_type": "slide"
174 |     }
175 |    },
176 |    "source": [
177 |     "使用自定义层直接执行前向传播计算"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": 7,
183 |    "id": "25f2aabf",
184 |    "metadata": {
185 |     "execution": {
186 |      "iopub.execute_input": "2023-08-18T07:07:17.562706Z",
187 |      "iopub.status.busy": "2023-08-18T07:07:17.562337Z",
188 |      "iopub.status.idle": "2023-08-18T07:07:17.570015Z",
189 |      "shell.execute_reply": "2023-08-18T07:07:17.568916Z"
190 |     },
191 |     "origin_pos": 32,
192 |     "tab": [
193 |      "pytorch"
194 |     ]
195 |    },
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "tensor([[0., 0., 0.],\n",
201 |        "        [0., 0., 0.]])"
202 |       ]
203 |      },
204 |      "execution_count": 7,
205 |      "metadata": {},
206 |      "output_type": "execute_result"
207 |     }
208 |    ],
209 |    "source": [
210 |     "linear(torch.rand(2, 5))"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "id": "393a154e",
216 |    "metadata": {
217 |     "slideshow": {
218 |      "slide_type": "-"
219 |     }
220 |    },
221 |    "source": [
222 |     "使用自定义层构建模型"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": 8,
228 |    "id": "fb2953e8",
229 |    "metadata": {
230 |     "execution": {
231 |      "iopub.execute_input": "2023-08-18T07:07:17.574378Z",
232 |      "iopub.status.busy": "2023-08-18T07:07:17.574000Z",
233 |      "iopub.status.idle": "2023-08-18T07:07:17.582792Z",
234 |      "shell.execute_reply": "2023-08-18T07:07:17.581735Z"
235 |     },
236 |     "origin_pos": 37,
237 |     "tab": [
238 |      "pytorch"
239 |     ]
240 |    },
241 |    "outputs": [
242 |     {
243 |      "data": {
244 |       "text/plain": [
245 |        "tensor([[0.],\n",
246 |        "        [0.]])"
247 |       ]
248 |      },
249 |      "execution_count": 8,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))\n",
256 |     "net(torch.rand(2, 64))"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "celltoolbar": "Slideshow",
262 |   "language_info": {
263 |    "name": "python"
264 |   },
265 |   "required_libs": [],
266 |   "rise": {
267 |    "autolaunch": true,
268 |    "enable_chalkboard": true,
269 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
270 |    "scroll": true
271 |   }
272 |  },
273 |  "nbformat": 4,
274 |  "nbformat_minor": 5
275 | }


--------------------------------------------------------------------------------
/chapter_deep-learning-computation/model-construction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "8d628b46",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 层和块\n",
 13 |     "\n",
 14 |     "我们先回顾一下多层感知机"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "9895e279",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T06:57:00.244437Z",
 24 |      "iopub.status.busy": "2023-08-18T06:57:00.243813Z",
 25 |      "iopub.status.idle": "2023-08-18T06:57:01.320999Z",
 26 |      "shell.execute_reply": "2023-08-18T06:57:01.320186Z"
 27 |     },
 28 |     "origin_pos": 2,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "tensor([[ 0.0343,  0.0264,  0.2505, -0.0243,  0.0945,  0.0012, -0.0141,  0.0666,\n",
 38 |        "         -0.0547, -0.0667],\n",
 39 |        "        [ 0.0772, -0.0274,  0.2638, -0.0191,  0.0394, -0.0324,  0.0102,  0.0707,\n",
 40 |        "         -0.1481, -0.1031]], grad_fn=<AddmmBackward0>)"
 41 |       ]
 42 |      },
 43 |      "execution_count": 1,
 44 |      "metadata": {},
 45 |      "output_type": "execute_result"
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "import torch\n",
 50 |     "from torch import nn\n",
 51 |     "from torch.nn import functional as F\n",
 52 |     "\n",
 53 |     "net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))\n",
 54 |     "\n",
 55 |     "X = torch.rand(2, 20)\n",
 56 |     "net(X)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "id": "389483a7",
 62 |    "metadata": {
 63 |     "slideshow": {
 64 |      "slide_type": "-"
 65 |     }
 66 |    },
 67 |    "source": [
 68 |     "`nn.Sequential`定义了一种特殊的`Module`"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "id": "c9adf2a5",
 74 |    "metadata": {
 75 |     "slideshow": {
 76 |      "slide_type": "slide"
 77 |     }
 78 |    },
 79 |    "source": [
 80 |     "自定义块"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 2,
 86 |    "id": "876df867",
 87 |    "metadata": {
 88 |     "execution": {
 89 |      "iopub.execute_input": "2023-08-18T06:57:01.325541Z",
 90 |      "iopub.status.busy": "2023-08-18T06:57:01.324828Z",
 91 |      "iopub.status.idle": "2023-08-18T06:57:01.330411Z",
 92 |      "shell.execute_reply": "2023-08-18T06:57:01.329591Z"
 93 |     },
 94 |     "origin_pos": 14,
 95 |     "tab": [
 96 |      "pytorch"
 97 |     ]
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "class MLP(nn.Module):\n",
102 |     "    def __init__(self):\n",
103 |     "        super().__init__()\n",
104 |     "        self.hidden = nn.Linear(20, 256)\n",
105 |     "        self.out = nn.Linear(256, 10)\n",
106 |     "\n",
107 |     "    def forward(self, X):\n",
108 |     "        return self.out(F.relu(self.hidden(X)))"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "id": "d63bddd3",
114 |    "metadata": {
115 |     "slideshow": {
116 |      "slide_type": "slide"
117 |     }
118 |    },
119 |    "source": [
120 |     "实例化多层感知机的层，然后在每次调用前向传播函数时调用这些层"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 3,
126 |    "id": "f7a34ec3",
127 |    "metadata": {
128 |     "execution": {
129 |      "iopub.execute_input": "2023-08-18T06:57:01.334346Z",
130 |      "iopub.status.busy": "2023-08-18T06:57:01.333603Z",
131 |      "iopub.status.idle": "2023-08-18T06:57:01.340473Z",
132 |      "shell.execute_reply": "2023-08-18T06:57:01.339676Z"
133 |     },
134 |     "origin_pos": 19,
135 |     "tab": [
136 |      "pytorch"
137 |     ]
138 |    },
139 |    "outputs": [
140 |     {
141 |      "data": {
142 |       "text/plain": [
143 |        "tensor([[ 0.0669,  0.2202, -0.0912, -0.0064,  0.1474, -0.0577, -0.3006,  0.1256,\n",
144 |        "         -0.0280,  0.4040],\n",
145 |        "        [ 0.0545,  0.2591, -0.0297,  0.1141,  0.1887,  0.0094, -0.2686,  0.0732,\n",
146 |        "         -0.0135,  0.3865]], grad_fn=<AddmmBackward0>)"
147 |       ]
148 |      },
149 |      "execution_count": 3,
150 |      "metadata": {},
151 |      "output_type": "execute_result"
152 |     }
153 |    ],
154 |    "source": [
155 |     "net = MLP()\n",
156 |     "net(X)"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "id": "584a9ee2",
162 |    "metadata": {
163 |     "slideshow": {
164 |      "slide_type": "slide"
165 |     }
166 |    },
167 |    "source": [
168 |     "顺序块"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 5,
174 |    "id": "9672de9a",
175 |    "metadata": {
176 |     "execution": {
177 |      "iopub.execute_input": "2023-08-18T06:57:01.353302Z",
178 |      "iopub.status.busy": "2023-08-18T06:57:01.352727Z",
179 |      "iopub.status.idle": "2023-08-18T06:57:01.360268Z",
180 |      "shell.execute_reply": "2023-08-18T06:57:01.359462Z"
181 |     },
182 |     "origin_pos": 31,
183 |     "tab": [
184 |      "pytorch"
185 |     ]
186 |    },
187 |    "outputs": [
188 |     {
189 |      "data": {
190 |       "text/plain": [
191 |        "tensor([[ 2.2759e-01, -4.7003e-02,  4.2846e-01, -1.2546e-01,  1.5296e-01,\n",
192 |        "          1.8972e-01,  9.7048e-02,  4.5479e-04, -3.7986e-02,  6.4842e-02],\n",
193 |        "        [ 2.7825e-01, -9.7517e-02,  4.8541e-01, -2.4519e-01, -8.4580e-02,\n",
194 |        "          2.8538e-01,  3.6861e-02,  2.9411e-02, -1.0612e-01,  1.2620e-01]],\n",
195 |        "       grad_fn=<AddmmBackward0>)"
196 |       ]
197 |      },
198 |      "execution_count": 5,
199 |      "metadata": {},
200 |      "output_type": "execute_result"
201 |     }
202 |    ],
203 |    "source": [
204 |     "class MySequential(nn.Module):\n",
205 |     "    def __init__(self, *args):\n",
206 |     "        super().__init__()\n",
207 |     "        for idx, module in enumerate(args):\n",
208 |     "            self._modules[str(idx)] = module\n",
209 |     "\n",
210 |     "    def forward(self, X):\n",
211 |     "        for block in self._modules.values():\n",
212 |     "            X = block(X)\n",
213 |     "        return X\n",
214 |     "\n",
215 |     "net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))\n",
216 |     "net(X)"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "id": "3ce57d60",
222 |    "metadata": {
223 |     "slideshow": {
224 |      "slide_type": "slide"
225 |     }
226 |    },
227 |    "source": [
228 |     "在前向传播函数中执行代码"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 7,
234 |    "id": "00ebc567",
235 |    "metadata": {
236 |     "execution": {
237 |      "iopub.execute_input": "2023-08-18T06:57:01.373508Z",
238 |      "iopub.status.busy": "2023-08-18T06:57:01.372789Z",
239 |      "iopub.status.idle": "2023-08-18T06:57:01.380049Z",
240 |      "shell.execute_reply": "2023-08-18T06:57:01.379025Z"
241 |     },
242 |     "origin_pos": 40,
243 |     "tab": [
244 |      "pytorch"
245 |     ]
246 |    },
247 |    "outputs": [
248 |     {
249 |      "data": {
250 |       "text/plain": [
251 |        "tensor(0.1862, grad_fn=<SumBackward0>)"
252 |       ]
253 |      },
254 |      "execution_count": 7,
255 |      "metadata": {},
256 |      "output_type": "execute_result"
257 |     }
258 |    ],
259 |    "source": [
260 |     "class FixedHiddenMLP(nn.Module):\n",
261 |     "    def __init__(self):\n",
262 |     "        super().__init__()\n",
263 |     "        self.rand_weight = torch.rand((20, 20), requires_grad=False)\n",
264 |     "        self.linear = nn.Linear(20, 20)\n",
265 |     "\n",
266 |     "    def forward(self, X):\n",
267 |     "        X = self.linear(X)\n",
268 |     "        X = F.relu(torch.mm(X, self.rand_weight) + 1)\n",
269 |     "        X = self.linear(X)\n",
270 |     "        while X.abs().sum() > 1:\n",
271 |     "            X /= 2\n",
272 |     "        return X.sum()\n",
273 |     "\n",
274 |     "net = FixedHiddenMLP()\n",
275 |     "net(X)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "id": "053d1e5a",
281 |    "metadata": {
282 |     "slideshow": {
283 |      "slide_type": "slide"
284 |     }
285 |    },
286 |    "source": [
287 |     "混合搭配各种组合块的方法"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 8,
293 |    "id": "6ca3b399",
294 |    "metadata": {
295 |     "execution": {
296 |      "iopub.execute_input": "2023-08-18T06:57:01.384091Z",
297 |      "iopub.status.busy": "2023-08-18T06:57:01.383236Z",
298 |      "iopub.status.idle": "2023-08-18T06:57:01.394649Z",
299 |      "shell.execute_reply": "2023-08-18T06:57:01.393535Z"
300 |     },
301 |     "origin_pos": 43,
302 |     "tab": [
303 |      "pytorch"
304 |     ]
305 |    },
306 |    "outputs": [
307 |     {
308 |      "data": {
309 |       "text/plain": [
310 |        "tensor(0.2183, grad_fn=<SumBackward0>)"
311 |       ]
312 |      },
313 |      "execution_count": 8,
314 |      "metadata": {},
315 |      "output_type": "execute_result"
316 |     }
317 |    ],
318 |    "source": [
319 |     "class NestMLP(nn.Module):\n",
320 |     "    def __init__(self):\n",
321 |     "        super().__init__()\n",
322 |     "        self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),\n",
323 |     "                                 nn.Linear(64, 32), nn.ReLU())\n",
324 |     "        self.linear = nn.Linear(32, 16)\n",
325 |     "\n",
326 |     "    def forward(self, X):\n",
327 |     "        return self.linear(self.net(X))\n",
328 |     "\n",
329 |     "chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())\n",
330 |     "chimera(X)"
331 |    ]
332 |   }
333 |  ],
334 |  "metadata": {
335 |   "celltoolbar": "Slideshow",
336 |   "language_info": {
337 |    "name": "python"
338 |   },
339 |   "required_libs": [],
340 |   "rise": {
341 |    "autolaunch": true,
342 |    "enable_chalkboard": true,
343 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
344 |    "scroll": true
345 |   }
346 |  },
347 |  "nbformat": 4,
348 |  "nbformat_minor": 5
349 | }


--------------------------------------------------------------------------------
/chapter_deep-learning-computation/parameters.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "23850d90",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 参数管理\n",
 13 |     "\n",
 14 |     "我们首先看一下具有单隐藏层的多层感知机"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "ab7ef7a0",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:01:09.649068Z",
 24 |      "iopub.status.busy": "2023-08-18T07:01:09.648305Z",
 25 |      "iopub.status.idle": "2023-08-18T07:01:10.928992Z",
 26 |      "shell.execute_reply": "2023-08-18T07:01:10.927959Z"
 27 |     },
 28 |     "origin_pos": 2,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "tensor([[-0.0970],\n",
 38 |        "        [-0.0827]], grad_fn=<AddmmBackward0>)"
 39 |       ]
 40 |      },
 41 |      "execution_count": 1,
 42 |      "metadata": {},
 43 |      "output_type": "execute_result"
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "import torch\n",
 48 |     "from torch import nn\n",
 49 |     "\n",
 50 |     "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))\n",
 51 |     "X = torch.rand(size=(2, 4))\n",
 52 |     "net(X)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "id": "2dbaff55",
 58 |    "metadata": {
 59 |     "slideshow": {
 60 |      "slide_type": "slide"
 61 |     }
 62 |    },
 63 |    "source": [
 64 |     "参数访问"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 2,
 70 |    "id": "5e2fff9a",
 71 |    "metadata": {
 72 |     "execution": {
 73 |      "iopub.execute_input": "2023-08-18T07:01:10.933865Z",
 74 |      "iopub.status.busy": "2023-08-18T07:01:10.933267Z",
 75 |      "iopub.status.idle": "2023-08-18T07:01:10.939922Z",
 76 |      "shell.execute_reply": "2023-08-18T07:01:10.938931Z"
 77 |     },
 78 |     "origin_pos": 7,
 79 |     "tab": [
 80 |      "pytorch"
 81 |     ]
 82 |    },
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "OrderedDict([('weight', tensor([[-0.0427, -0.2939, -0.1894,  0.0220, -0.1709, -0.1522, -0.0334, -0.2263]])), ('bias', tensor([0.0887]))])\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "print(net[2].state_dict())"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "id": "f4e174dc",
 99 |    "metadata": {
100 |     "slideshow": {
101 |      "slide_type": "slide"
102 |     }
103 |    },
104 |    "source": [
105 |     "目标参数"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 3,
111 |    "id": "d0682fff",
112 |    "metadata": {
113 |     "execution": {
114 |      "iopub.execute_input": "2023-08-18T07:01:10.945104Z",
115 |      "iopub.status.busy": "2023-08-18T07:01:10.944250Z",
116 |      "iopub.status.idle": "2023-08-18T07:01:10.951764Z",
117 |      "shell.execute_reply": "2023-08-18T07:01:10.950790Z"
118 |     },
119 |     "origin_pos": 11,
120 |     "tab": [
121 |      "pytorch"
122 |     ]
123 |    },
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "<class 'torch.nn.parameter.Parameter'>\n",
130 |       "Parameter containing:\n",
131 |       "tensor([0.0887], requires_grad=True)\n",
132 |       "tensor([0.0887])\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "print(type(net[2].bias))\n",
138 |     "print(net[2].bias)\n",
139 |     "print(net[2].bias.data)"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 4,
145 |    "id": "3cf4d55b",
146 |    "metadata": {
147 |     "execution": {
148 |      "iopub.execute_input": "2023-08-18T07:01:10.956378Z",
149 |      "iopub.status.busy": "2023-08-18T07:01:10.955542Z",
150 |      "iopub.status.idle": "2023-08-18T07:01:10.961810Z",
151 |      "shell.execute_reply": "2023-08-18T07:01:10.960767Z"
152 |     },
153 |     "origin_pos": 16,
154 |     "tab": [
155 |      "pytorch"
156 |     ]
157 |    },
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "True"
163 |       ]
164 |      },
165 |      "execution_count": 4,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "net[2].weight.grad == None"
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "markdown",
176 |    "id": "170b54ab",
177 |    "metadata": {
178 |     "slideshow": {
179 |      "slide_type": "slide"
180 |     }
181 |    },
182 |    "source": [
183 |     "一次性访问所有参数"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 5,
189 |    "id": "916939ce",
190 |    "metadata": {
191 |     "execution": {
192 |      "iopub.execute_input": "2023-08-18T07:01:10.966725Z",
193 |      "iopub.status.busy": "2023-08-18T07:01:10.965969Z",
194 |      "iopub.status.idle": "2023-08-18T07:01:10.972600Z",
195 |      "shell.execute_reply": "2023-08-18T07:01:10.971655Z"
196 |     },
197 |     "origin_pos": 19,
198 |     "tab": [
199 |      "pytorch"
200 |     ]
201 |    },
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))\n",
208 |       "('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "print(*[(name, param.shape) for name, param in net[0].named_parameters()])\n",
214 |     "print(*[(name, param.shape) for name, param in net.named_parameters()])"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 6,
220 |    "id": "116207ef",
221 |    "metadata": {
222 |     "execution": {
223 |      "iopub.execute_input": "2023-08-18T07:01:10.977269Z",
224 |      "iopub.status.busy": "2023-08-18T07:01:10.976623Z",
225 |      "iopub.status.idle": "2023-08-18T07:01:10.983222Z",
226 |      "shell.execute_reply": "2023-08-18T07:01:10.982309Z"
227 |     },
228 |     "origin_pos": 23,
229 |     "tab": [
230 |      "pytorch"
231 |     ]
232 |    },
233 |    "outputs": [
234 |     {
235 |      "data": {
236 |       "text/plain": [
237 |        "tensor([0.0887])"
238 |       ]
239 |      },
240 |      "execution_count": 6,
241 |      "metadata": {},
242 |      "output_type": "execute_result"
243 |     }
244 |    ],
245 |    "source": [
246 |     "net.state_dict()['2.bias'].data"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "id": "707279d0",
252 |    "metadata": {
253 |     "slideshow": {
254 |      "slide_type": "slide"
255 |     }
256 |    },
257 |    "source": [
258 |     "从嵌套块收集参数"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 7,
264 |    "id": "712e31fd",
265 |    "metadata": {
266 |     "execution": {
267 |      "iopub.execute_input": "2023-08-18T07:01:10.988088Z",
268 |      "iopub.status.busy": "2023-08-18T07:01:10.987352Z",
269 |      "iopub.status.idle": "2023-08-18T07:01:10.998245Z",
270 |      "shell.execute_reply": "2023-08-18T07:01:10.997197Z"
271 |     },
272 |     "origin_pos": 28,
273 |     "tab": [
274 |      "pytorch"
275 |     ]
276 |    },
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/plain": [
281 |        "tensor([[0.2596],\n",
282 |        "        [0.2596]], grad_fn=<AddmmBackward0>)"
283 |       ]
284 |      },
285 |      "execution_count": 7,
286 |      "metadata": {},
287 |      "output_type": "execute_result"
288 |     }
289 |    ],
290 |    "source": [
291 |     "def block1():\n",
292 |     "    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n",
293 |     "                         nn.Linear(8, 4), nn.ReLU())\n",
294 |     "\n",
295 |     "def block2():\n",
296 |     "    net = nn.Sequential()\n",
297 |     "    for i in range(4):\n",
298 |     "        net.add_module(f'block {i}', block1())\n",
299 |     "    return net\n",
300 |     "\n",
301 |     "rgnet = nn.Sequential(block2(), nn.Linear(4, 1))\n",
302 |     "rgnet(X)"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "markdown",
307 |    "id": "df7a2644",
308 |    "metadata": {
309 |     "slideshow": {
310 |      "slide_type": "slide"
311 |     }
312 |    },
313 |    "source": [
314 |     "设计了网络后，我们看看它是如何工作的"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": 8,
320 |    "id": "c7d7717d",
321 |    "metadata": {
322 |     "execution": {
323 |      "iopub.execute_input": "2023-08-18T07:01:11.002889Z",
324 |      "iopub.status.busy": "2023-08-18T07:01:11.002264Z",
325 |      "iopub.status.idle": "2023-08-18T07:01:11.007643Z",
326 |      "shell.execute_reply": "2023-08-18T07:01:11.006464Z"
327 |     },
328 |     "origin_pos": 33,
329 |     "tab": [
330 |      "pytorch"
331 |     ]
332 |    },
333 |    "outputs": [
334 |     {
335 |      "name": "stdout",
336 |      "output_type": "stream",
337 |      "text": [
338 |       "Sequential(\n",
339 |       "  (0): Sequential(\n",
340 |       "    (block 0): Sequential(\n",
341 |       "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
342 |       "      (1): ReLU()\n",
343 |       "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
344 |       "      (3): ReLU()\n",
345 |       "    )\n",
346 |       "    (block 1): Sequential(\n",
347 |       "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
348 |       "      (1): ReLU()\n",
349 |       "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
350 |       "      (3): ReLU()\n",
351 |       "    )\n",
352 |       "    (block 2): Sequential(\n",
353 |       "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
354 |       "      (1): ReLU()\n",
355 |       "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
356 |       "      (3): ReLU()\n",
357 |       "    )\n",
358 |       "    (block 3): Sequential(\n",
359 |       "      (0): Linear(in_features=4, out_features=8, bias=True)\n",
360 |       "      (1): ReLU()\n",
361 |       "      (2): Linear(in_features=8, out_features=4, bias=True)\n",
362 |       "      (3): ReLU()\n",
363 |       "    )\n",
364 |       "  )\n",
365 |       "  (1): Linear(in_features=4, out_features=1, bias=True)\n",
366 |       ")\n"
367 |      ]
368 |     }
369 |    ],
370 |    "source": [
371 |     "print(rgnet)"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": 9,
377 |    "id": "939ba4d3",
378 |    "metadata": {
379 |     "execution": {
380 |      "iopub.execute_input": "2023-08-18T07:01:11.012522Z",
381 |      "iopub.status.busy": "2023-08-18T07:01:11.011839Z",
382 |      "iopub.status.idle": "2023-08-18T07:01:11.018508Z",
383 |      "shell.execute_reply": "2023-08-18T07:01:11.017590Z"
384 |     },
385 |     "origin_pos": 37,
386 |     "tab": [
387 |      "pytorch"
388 |     ]
389 |    },
390 |    "outputs": [
391 |     {
392 |      "data": {
393 |       "text/plain": [
394 |        "tensor([ 0.1999, -0.4073, -0.1200, -0.2033, -0.1573,  0.3546, -0.2141, -0.2483])"
395 |       ]
396 |      },
397 |      "execution_count": 9,
398 |      "metadata": {},
399 |      "output_type": "execute_result"
400 |     }
401 |    ],
402 |    "source": [
403 |     "rgnet[0][1][0].bias.data"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "id": "77b45fbb",
409 |    "metadata": {
410 |     "slideshow": {
411 |      "slide_type": "slide"
412 |     }
413 |    },
414 |    "source": [
415 |     "内置初始化"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 10,
421 |    "id": "2f00d5e7",
422 |    "metadata": {
423 |     "execution": {
424 |      "iopub.execute_input": "2023-08-18T07:01:11.023955Z",
425 |      "iopub.status.busy": "2023-08-18T07:01:11.023046Z",
426 |      "iopub.status.idle": "2023-08-18T07:01:11.033287Z",
427 |      "shell.execute_reply": "2023-08-18T07:01:11.032096Z"
428 |     },
429 |     "origin_pos": 47,
430 |     "tab": [
431 |      "pytorch"
432 |     ]
433 |    },
434 |    "outputs": [
435 |     {
436 |      "data": {
437 |       "text/plain": [
438 |        "(tensor([-0.0214, -0.0015, -0.0100, -0.0058]), tensor(0.))"
439 |       ]
440 |      },
441 |      "execution_count": 10,
442 |      "metadata": {},
443 |      "output_type": "execute_result"
444 |     }
445 |    ],
446 |    "source": [
447 |     "def init_normal(m):\n",
448 |     "    if type(m) == nn.Linear:\n",
449 |     "        nn.init.normal_(m.weight, mean=0, std=0.01)\n",
450 |     "        nn.init.zeros_(m.bias)\n",
451 |     "net.apply(init_normal)\n",
452 |     "net[0].weight.data[0], net[0].bias.data[0]"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 11,
458 |    "id": "49ee306c",
459 |    "metadata": {
460 |     "execution": {
461 |      "iopub.execute_input": "2023-08-18T07:01:11.038321Z",
462 |      "iopub.status.busy": "2023-08-18T07:01:11.037607Z",
463 |      "iopub.status.idle": "2023-08-18T07:01:11.049009Z",
464 |      "shell.execute_reply": "2023-08-18T07:01:11.047793Z"
465 |     },
466 |     "origin_pos": 52,
467 |     "tab": [
468 |      "pytorch"
469 |     ]
470 |    },
471 |    "outputs": [
472 |     {
473 |      "data": {
474 |       "text/plain": [
475 |        "(tensor([1., 1., 1., 1.]), tensor(0.))"
476 |       ]
477 |      },
478 |      "execution_count": 11,
479 |      "metadata": {},
480 |      "output_type": "execute_result"
481 |     }
482 |    ],
483 |    "source": [
484 |     "def init_constant(m):\n",
485 |     "    if type(m) == nn.Linear:\n",
486 |     "        nn.init.constant_(m.weight, 1)\n",
487 |     "        nn.init.zeros_(m.bias)\n",
488 |     "net.apply(init_constant)\n",
489 |     "net[0].weight.data[0], net[0].bias.data[0]"
490 |    ]
491 |   },
492 |   {
493 |    "cell_type": "markdown",
494 |    "id": "478059aa",
495 |    "metadata": {
496 |     "slideshow": {
497 |      "slide_type": "slide"
498 |     }
499 |    },
500 |    "source": [
501 |     "对某些块应用不同的初始化方法"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "code",
506 |    "execution_count": 12,
507 |    "id": "1a90ffaa",
508 |    "metadata": {
509 |     "execution": {
510 |      "iopub.execute_input": "2023-08-18T07:01:11.054335Z",
511 |      "iopub.status.busy": "2023-08-18T07:01:11.053550Z",
512 |      "iopub.status.idle": "2023-08-18T07:01:11.063215Z",
513 |      "shell.execute_reply": "2023-08-18T07:01:11.062244Z"
514 |     },
515 |     "origin_pos": 57,
516 |     "tab": [
517 |      "pytorch"
518 |     ]
519 |    },
520 |    "outputs": [
521 |     {
522 |      "name": "stdout",
523 |      "output_type": "stream",
524 |      "text": [
525 |       "tensor([ 0.5236,  0.0516, -0.3236,  0.3794])\n",
526 |       "tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])\n"
527 |      ]
528 |     }
529 |    ],
530 |    "source": [
531 |     "def init_xavier(m):\n",
532 |     "    if type(m) == nn.Linear:\n",
533 |     "        nn.init.xavier_uniform_(m.weight)\n",
534 |     "def init_42(m):\n",
535 |     "    if type(m) == nn.Linear:\n",
536 |     "        nn.init.constant_(m.weight, 42)\n",
537 |     "\n",
538 |     "net[0].apply(init_xavier)\n",
539 |     "net[2].apply(init_42)\n",
540 |     "print(net[0].weight.data[0])\n",
541 |     "print(net[2].weight.data)"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "markdown",
546 |    "id": "0a70ae16",
547 |    "metadata": {
548 |     "slideshow": {
549 |      "slide_type": "slide"
550 |     }
551 |    },
552 |    "source": [
553 |     "自定义初始化"
554 |    ]
555 |   },
556 |   {
557 |    "cell_type": "code",
558 |    "execution_count": 13,
559 |    "id": "9166f6e3",
560 |    "metadata": {
561 |     "execution": {
562 |      "iopub.execute_input": "2023-08-18T07:01:11.068164Z",
563 |      "iopub.status.busy": "2023-08-18T07:01:11.067460Z",
564 |      "iopub.status.idle": "2023-08-18T07:01:11.079228Z",
565 |      "shell.execute_reply": "2023-08-18T07:01:11.078069Z"
566 |     },
567 |     "origin_pos": 66,
568 |     "tab": [
569 |      "pytorch"
570 |     ]
571 |    },
572 |    "outputs": [
573 |     {
574 |      "name": "stdout",
575 |      "output_type": "stream",
576 |      "text": [
577 |       "Init weight torch.Size([8, 4])\n",
578 |       "Init weight torch.Size([1, 8])\n"
579 |      ]
580 |     },
581 |     {
582 |      "data": {
583 |       "text/plain": [
584 |        "tensor([[5.4079, 9.3334, 5.0616, 8.3095],\n",
585 |        "        [0.0000, 7.2788, -0.0000, -0.0000]], grad_fn=<SliceBackward0>)"
586 |       ]
587 |      },
588 |      "execution_count": 13,
589 |      "metadata": {},
590 |      "output_type": "execute_result"
591 |     }
592 |    ],
593 |    "source": [
594 |     "def my_init(m):\n",
595 |     "    if type(m) == nn.Linear:\n",
596 |     "        print(\"Init\", *[(name, param.shape)\n",
597 |     "                        for name, param in m.named_parameters()][0])\n",
598 |     "        nn.init.uniform_(m.weight, -10, 10)\n",
599 |     "        m.weight.data *= m.weight.data.abs() >= 5\n",
600 |     "\n",
601 |     "net.apply(my_init)\n",
602 |     "net[0].weight[:2]"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "code",
607 |    "execution_count": 14,
608 |    "id": "5b9af1f8",
609 |    "metadata": {
610 |     "execution": {
611 |      "iopub.execute_input": "2023-08-18T07:01:11.084158Z",
612 |      "iopub.status.busy": "2023-08-18T07:01:11.083416Z",
613 |      "iopub.status.idle": "2023-08-18T07:01:11.092672Z",
614 |      "shell.execute_reply": "2023-08-18T07:01:11.091537Z"
615 |     },
616 |     "origin_pos": 71,
617 |     "tab": [
618 |      "pytorch"
619 |     ]
620 |    },
621 |    "outputs": [
622 |     {
623 |      "data": {
624 |       "text/plain": [
625 |        "tensor([42.0000, 10.3334,  6.0616,  9.3095])"
626 |       ]
627 |      },
628 |      "execution_count": 14,
629 |      "metadata": {},
630 |      "output_type": "execute_result"
631 |     }
632 |    ],
633 |    "source": [
634 |     "net[0].weight.data[:] += 1\n",
635 |     "net[0].weight.data[0, 0] = 42\n",
636 |     "net[0].weight.data[0]"
637 |    ]
638 |   },
639 |   {
640 |    "cell_type": "markdown",
641 |    "id": "9031168e",
642 |    "metadata": {
643 |     "slideshow": {
644 |      "slide_type": "slide"
645 |     }
646 |    },
647 |    "source": [
648 |     "参数绑定"
649 |    ]
650 |   },
651 |   {
652 |    "cell_type": "code",
653 |    "execution_count": 15,
654 |    "id": "69660fa7",
655 |    "metadata": {
656 |     "execution": {
657 |      "iopub.execute_input": "2023-08-18T07:01:11.097767Z",
658 |      "iopub.status.busy": "2023-08-18T07:01:11.096948Z",
659 |      "iopub.status.idle": "2023-08-18T07:01:11.108904Z",
660 |      "shell.execute_reply": "2023-08-18T07:01:11.107763Z"
661 |     },
662 |     "origin_pos": 77,
663 |     "tab": [
664 |      "pytorch"
665 |     ]
666 |    },
667 |    "outputs": [
668 |     {
669 |      "name": "stdout",
670 |      "output_type": "stream",
671 |      "text": [
672 |       "tensor([True, True, True, True, True, True, True, True])\n",
673 |       "tensor([True, True, True, True, True, True, True, True])\n"
674 |      ]
675 |     }
676 |    ],
677 |    "source": [
678 |     "shared = nn.Linear(8, 8)\n",
679 |     "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n",
680 |     "                    shared, nn.ReLU(),\n",
681 |     "                    shared, nn.ReLU(),\n",
682 |     "                    nn.Linear(8, 1))\n",
683 |     "net(X)\n",
684 |     "print(net[2].weight.data[0] == net[4].weight.data[0])\n",
685 |     "net[2].weight.data[0, 0] = 100\n",
686 |     "print(net[2].weight.data[0] == net[4].weight.data[0])"
687 |    ]
688 |   }
689 |  ],
690 |  "metadata": {
691 |   "celltoolbar": "Slideshow",
692 |   "language_info": {
693 |    "name": "python"
694 |   },
695 |   "required_libs": [],
696 |   "rise": {
697 |    "autolaunch": true,
698 |    "enable_chalkboard": true,
699 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
700 |    "scroll": true
701 |   }
702 |  },
703 |  "nbformat": 4,
704 |  "nbformat_minor": 5
705 | }


--------------------------------------------------------------------------------
/chapter_deep-learning-computation/read-write.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "3c764167",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 读写文件\n",
 13 |     "\n",
 14 |     "加载和保存张量"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "id": "1ab53461",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T06:56:43.733002Z",
 24 |      "iopub.status.busy": "2023-08-18T06:56:43.732347Z",
 25 |      "iopub.status.idle": "2023-08-18T06:56:43.741208Z",
 26 |      "shell.execute_reply": "2023-08-18T06:56:43.740416Z"
 27 |     },
 28 |     "origin_pos": 7,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "tensor([0, 1, 2, 3])"
 38 |       ]
 39 |      },
 40 |      "execution_count": 2,
 41 |      "metadata": {},
 42 |      "output_type": "execute_result"
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "import torch\n",
 47 |     "from torch import nn\n",
 48 |     "from torch.nn import functional as F\n",
 49 |     "\n",
 50 |     "x = torch.arange(4)\n",
 51 |     "torch.save(x, 'x-file')\n",
 52 |     "\n",
 53 |     "x2 = torch.load('x-file')\n",
 54 |     "x2"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "id": "6c8adb8b",
 60 |    "metadata": {
 61 |     "slideshow": {
 62 |      "slide_type": "slide"
 63 |     }
 64 |    },
 65 |    "source": [
 66 |     "存储一个张量列表，然后把它们读回内存"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 3,
 72 |    "id": "81027fe1",
 73 |    "metadata": {
 74 |     "execution": {
 75 |      "iopub.execute_input": "2023-08-18T06:56:43.744676Z",
 76 |      "iopub.status.busy": "2023-08-18T06:56:43.744140Z",
 77 |      "iopub.status.idle": "2023-08-18T06:56:43.751376Z",
 78 |      "shell.execute_reply": "2023-08-18T06:56:43.750630Z"
 79 |     },
 80 |     "origin_pos": 12,
 81 |     "tab": [
 82 |      "pytorch"
 83 |     ]
 84 |    },
 85 |    "outputs": [
 86 |     {
 87 |      "data": {
 88 |       "text/plain": [
 89 |        "(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))"
 90 |       ]
 91 |      },
 92 |      "execution_count": 3,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "y = torch.zeros(4)\n",
 99 |     "torch.save([x, y],'x-files')\n",
100 |     "x2, y2 = torch.load('x-files')\n",
101 |     "(x2, y2)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "id": "47678e7f",
107 |    "metadata": {
108 |     "slideshow": {
109 |      "slide_type": "-"
110 |     }
111 |    },
112 |    "source": [
113 |     "写入或读取从字符串映射到张量的字典"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 4,
119 |    "id": "fde1cb33",
120 |    "metadata": {
121 |     "execution": {
122 |      "iopub.execute_input": "2023-08-18T06:56:43.754777Z",
123 |      "iopub.status.busy": "2023-08-18T06:56:43.754313Z",
124 |      "iopub.status.idle": "2023-08-18T06:56:43.761150Z",
125 |      "shell.execute_reply": "2023-08-18T06:56:43.760369Z"
126 |     },
127 |     "origin_pos": 17,
128 |     "tab": [
129 |      "pytorch"
130 |     ]
131 |    },
132 |    "outputs": [
133 |     {
134 |      "data": {
135 |       "text/plain": [
136 |        "{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}"
137 |       ]
138 |      },
139 |      "execution_count": 4,
140 |      "metadata": {},
141 |      "output_type": "execute_result"
142 |     }
143 |    ],
144 |    "source": [
145 |     "mydict = {'x': x, 'y': y}\n",
146 |     "torch.save(mydict, 'mydict')\n",
147 |     "mydict2 = torch.load('mydict')\n",
148 |     "mydict2"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "id": "9cc9655d",
154 |    "metadata": {
155 |     "slideshow": {
156 |      "slide_type": "slide"
157 |     }
158 |    },
159 |    "source": [
160 |     "加载和保存模型参数"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 5,
166 |    "id": "2672b5c2",
167 |    "metadata": {
168 |     "execution": {
169 |      "iopub.execute_input": "2023-08-18T06:56:43.764609Z",
170 |      "iopub.status.busy": "2023-08-18T06:56:43.764090Z",
171 |      "iopub.status.idle": "2023-08-18T06:56:43.773070Z",
172 |      "shell.execute_reply": "2023-08-18T06:56:43.772277Z"
173 |     },
174 |     "origin_pos": 22,
175 |     "tab": [
176 |      "pytorch"
177 |     ]
178 |    },
179 |    "outputs": [],
180 |    "source": [
181 |     "class MLP(nn.Module):\n",
182 |     "    def __init__(self):\n",
183 |     "        super().__init__()\n",
184 |     "        self.hidden = nn.Linear(20, 256)\n",
185 |     "        self.output = nn.Linear(256, 10)\n",
186 |     "\n",
187 |     "    def forward(self, x):\n",
188 |     "        return self.output(F.relu(self.hidden(x)))\n",
189 |     "\n",
190 |     "net = MLP()\n",
191 |     "X = torch.randn(size=(2, 20))\n",
192 |     "Y = net(X)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "id": "72d77012",
198 |    "metadata": {
199 |     "slideshow": {
200 |      "slide_type": "slide"
201 |     }
202 |    },
203 |    "source": [
204 |     "将模型的参数存储在一个叫做“mlp.params”的文件中"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 6,
210 |    "id": "a53c1315",
211 |    "metadata": {
212 |     "execution": {
213 |      "iopub.execute_input": "2023-08-18T06:56:43.776452Z",
214 |      "iopub.status.busy": "2023-08-18T06:56:43.775942Z",
215 |      "iopub.status.idle": "2023-08-18T06:56:43.780387Z",
216 |      "shell.execute_reply": "2023-08-18T06:56:43.779636Z"
217 |     },
218 |     "origin_pos": 27,
219 |     "tab": [
220 |      "pytorch"
221 |     ]
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "torch.save(net.state_dict(), 'mlp.params')"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "markdown",
230 |    "id": "647cbcef",
231 |    "metadata": {
232 |     "slideshow": {
233 |      "slide_type": "slide"
234 |     }
235 |    },
236 |    "source": [
237 |     "实例化了原始多层感知机模型的一个备份。\n",
238 |     "直接读取文件中存储的参数"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 7,
244 |    "id": "da5e1b3f",
245 |    "metadata": {
246 |     "execution": {
247 |      "iopub.execute_input": "2023-08-18T06:56:43.783850Z",
248 |      "iopub.status.busy": "2023-08-18T06:56:43.783240Z",
249 |      "iopub.status.idle": "2023-08-18T06:56:43.789905Z",
250 |      "shell.execute_reply": "2023-08-18T06:56:43.789164Z"
251 |     },
252 |     "origin_pos": 32,
253 |     "tab": [
254 |      "pytorch"
255 |     ]
256 |    },
257 |    "outputs": [
258 |     {
259 |      "data": {
260 |       "text/plain": [
261 |        "MLP(\n",
262 |        "  (hidden): Linear(in_features=20, out_features=256, bias=True)\n",
263 |        "  (output): Linear(in_features=256, out_features=10, bias=True)\n",
264 |        ")"
265 |       ]
266 |      },
267 |      "execution_count": 7,
268 |      "metadata": {},
269 |      "output_type": "execute_result"
270 |     }
271 |    ],
272 |    "source": [
273 |     "clone = MLP()\n",
274 |     "clone.load_state_dict(torch.load('mlp.params'))\n",
275 |     "clone.eval()"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 8,
281 |    "id": "a25ba1f1",
282 |    "metadata": {
283 |     "execution": {
284 |      "iopub.execute_input": "2023-08-18T06:56:43.793400Z",
285 |      "iopub.status.busy": "2023-08-18T06:56:43.792788Z",
286 |      "iopub.status.idle": "2023-08-18T06:56:43.798329Z",
287 |      "shell.execute_reply": "2023-08-18T06:56:43.797576Z"
288 |     },
289 |     "origin_pos": 37,
290 |     "tab": [
291 |      "pytorch"
292 |     ]
293 |    },
294 |    "outputs": [
295 |     {
296 |      "data": {
297 |       "text/plain": [
298 |        "tensor([[True, True, True, True, True, True, True, True, True, True],\n",
299 |        "        [True, True, True, True, True, True, True, True, True, True]])"
300 |       ]
301 |      },
302 |      "execution_count": 8,
303 |      "metadata": {},
304 |      "output_type": "execute_result"
305 |     }
306 |    ],
307 |    "source": [
308 |     "Y_clone = clone(X)\n",
309 |     "Y_clone == Y"
310 |    ]
311 |   }
312 |  ],
313 |  "metadata": {
314 |   "celltoolbar": "Slideshow",
315 |   "language_info": {
316 |    "name": "python"
317 |   },
318 |   "required_libs": [],
319 |   "rise": {
320 |    "autolaunch": true,
321 |    "enable_chalkboard": true,
322 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
323 |    "scroll": true
324 |   }
325 |  },
326 |  "nbformat": 4,
327 |  "nbformat_minor": 5
328 | }


--------------------------------------------------------------------------------
/chapter_deep-learning-computation/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_deep-learning-computation/use-gpu.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "b867430b",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# GPU\n",
 13 |     "\n",
 14 |     "查看显卡信息"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "369d9baa",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T06:58:06.499888Z",
 24 |      "iopub.status.busy": "2023-08-18T06:58:06.499324Z",
 25 |      "iopub.status.idle": "2023-08-18T06:58:06.859541Z",
 26 |      "shell.execute_reply": "2023-08-18T06:58:06.858210Z"
 27 |     },
 28 |     "origin_pos": 1,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "Fri Aug 18 06:58:06 2023       \r\n",
 39 |       "+-----------------------------------------------------------------------------+\r\n",
 40 |       "| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.7     |\r\n",
 41 |       "|-------------------------------+----------------------+----------------------+\r\n",
 42 |       "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
 43 |       "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
 44 |       "|                               |                      |               MIG M. |\r\n",
 45 |       "|===============================+======================+======================|\r\n",
 46 |       "|   0  Tesla V100-SXM2...  Off  | 00000000:00:1B.0 Off |                    0 |\r\n",
 47 |       "| N/A   41C    P0    42W / 300W |      0MiB / 16160MiB |      0%      Default |\r\n",
 48 |       "|                               |                      |                  N/A |\r\n",
 49 |       "+-------------------------------+----------------------+----------------------+\r\n"
 50 |      ]
 51 |     },
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "|   1  Tesla V100-SXM2...  Off  | 00000000:00:1C.0 Off |                    0 |\r\n",
 57 |       "| N/A   44C    P0   113W / 300W |   1456MiB / 16160MiB |     53%      Default |\r\n",
 58 |       "|                               |                      |                  N/A |\r\n",
 59 |       "+-------------------------------+----------------------+----------------------+\r\n",
 60 |       "|   2  Tesla V100-SXM2...  Off  | 00000000:00:1D.0 Off |                    0 |\r\n",
 61 |       "| N/A   43C    P0   120W / 300W |   1358MiB / 16160MiB |     55%      Default |\r\n",
 62 |       "|                               |                      |                  N/A |\r\n",
 63 |       "+-------------------------------+----------------------+----------------------+\r\n",
 64 |       "|   3  Tesla V100-SXM2...  Off  | 00000000:00:1E.0 Off |                    0 |\r\n",
 65 |       "| N/A   42C    P0    47W / 300W |      0MiB / 16160MiB |      0%      Default |\r\n",
 66 |       "|                               |                      |                  N/A |\r\n",
 67 |       "+-------------------------------+----------------------+----------------------+\r\n",
 68 |       "                                                                               \r\n",
 69 |       "+-----------------------------------------------------------------------------+\r\n",
 70 |       "| Processes:                                                                  |\r\n",
 71 |       "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\r\n",
 72 |       "|        ID   ID                                                   Usage      |\r\n",
 73 |       "|=============================================================================|\r\n",
 74 |       "+-----------------------------------------------------------------------------+\r\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "!nvidia-smi"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "id": "19db9813",
 85 |    "metadata": {
 86 |     "slideshow": {
 87 |      "slide_type": "slide"
 88 |     }
 89 |    },
 90 |    "source": [
 91 |     "计算设备"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 2,
 97 |    "id": "9f69ad46",
 98 |    "metadata": {
 99 |     "execution": {
100 |      "iopub.execute_input": "2023-08-18T06:58:06.865430Z",
101 |      "iopub.status.busy": "2023-08-18T06:58:06.864979Z",
102 |      "iopub.status.idle": "2023-08-18T06:58:07.970615Z",
103 |      "shell.execute_reply": "2023-08-18T06:58:07.969801Z"
104 |     },
105 |     "origin_pos": 10,
106 |     "tab": [
107 |      "pytorch"
108 |     ]
109 |    },
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": [
114 |        "(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))"
115 |       ]
116 |      },
117 |      "execution_count": 2,
118 |      "metadata": {},
119 |      "output_type": "execute_result"
120 |     }
121 |    ],
122 |    "source": [
123 |     "import torch\n",
124 |     "from torch import nn\n",
125 |     "\n",
126 |     "torch.device('cpu'), torch.device('cuda'), torch.device('cuda:1')"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "id": "1e0f86ba",
132 |    "metadata": {
133 |     "slideshow": {
134 |      "slide_type": "-"
135 |     }
136 |    },
137 |    "source": [
138 |     "查询可用gpu的数量"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 3,
144 |    "id": "c29151b0",
145 |    "metadata": {
146 |     "execution": {
147 |      "iopub.execute_input": "2023-08-18T06:58:07.974568Z",
148 |      "iopub.status.busy": "2023-08-18T06:58:07.973917Z",
149 |      "iopub.status.idle": "2023-08-18T06:58:07.979097Z",
150 |      "shell.execute_reply": "2023-08-18T06:58:07.978337Z"
151 |     },
152 |     "origin_pos": 15,
153 |     "tab": [
154 |      "pytorch"
155 |     ]
156 |    },
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/plain": [
161 |        "2"
162 |       ]
163 |      },
164 |      "execution_count": 3,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "torch.cuda.device_count()"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "id": "add6b576",
176 |    "metadata": {
177 |     "slideshow": {
178 |      "slide_type": "slide"
179 |     }
180 |    },
181 |    "source": [
182 |     "这两个函数允许我们在不存在所需所有GPU的情况下运行代码"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": 4,
188 |    "id": "cda0ab76",
189 |    "metadata": {
190 |     "execution": {
191 |      "iopub.execute_input": "2023-08-18T06:58:07.983261Z",
192 |      "iopub.status.busy": "2023-08-18T06:58:07.982604Z",
193 |      "iopub.status.idle": "2023-08-18T06:58:07.990309Z",
194 |      "shell.execute_reply": "2023-08-18T06:58:07.989541Z"
195 |     },
196 |     "origin_pos": 20,
197 |     "tab": [
198 |      "pytorch"
199 |     ]
200 |    },
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "(device(type='cuda', index=0),\n",
206 |        " device(type='cpu'),\n",
207 |        " [device(type='cuda', index=0), device(type='cuda', index=1)])"
208 |       ]
209 |      },
210 |      "execution_count": 4,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "def try_gpu(i=0):  \n",
217 |     "    \"\"\"如果存在，则返回gpu(i)，否则返回cpu()\"\"\"\n",
218 |     "    if torch.cuda.device_count() >= i + 1:\n",
219 |     "        return torch.device(f'cuda:{i}')\n",
220 |     "    return torch.device('cpu')\n",
221 |     "\n",
222 |     "def try_all_gpus():  \n",
223 |     "    \"\"\"返回所有可用的GPU，如果没有GPU，则返回[cpu(),]\"\"\"\n",
224 |     "    devices = [torch.device(f'cuda:{i}')\n",
225 |     "             for i in range(torch.cuda.device_count())]\n",
226 |     "    return devices if devices else [torch.device('cpu')]\n",
227 |     "\n",
228 |     "try_gpu(), try_gpu(10), try_all_gpus()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "id": "013f4e4b",
234 |    "metadata": {
235 |     "slideshow": {
236 |      "slide_type": "slide"
237 |     }
238 |    },
239 |    "source": [
240 |     "查询张量所在的设备"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 5,
246 |    "id": "f6ab0f26",
247 |    "metadata": {
248 |     "execution": {
249 |      "iopub.execute_input": "2023-08-18T06:58:07.994741Z",
250 |      "iopub.status.busy": "2023-08-18T06:58:07.994126Z",
251 |      "iopub.status.idle": "2023-08-18T06:58:07.999439Z",
252 |      "shell.execute_reply": "2023-08-18T06:58:07.998673Z"
253 |     },
254 |     "origin_pos": 25,
255 |     "tab": [
256 |      "pytorch"
257 |     ]
258 |    },
259 |    "outputs": [
260 |     {
261 |      "data": {
262 |       "text/plain": [
263 |        "device(type='cpu')"
264 |       ]
265 |      },
266 |      "execution_count": 5,
267 |      "metadata": {},
268 |      "output_type": "execute_result"
269 |     }
270 |    ],
271 |    "source": [
272 |     "x = torch.tensor([1, 2, 3])\n",
273 |     "x.device"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "id": "9404d10b",
279 |    "metadata": {
280 |     "slideshow": {
281 |      "slide_type": "slide"
282 |     }
283 |    },
284 |    "source": [
285 |     "存储在GPU上"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": 6,
291 |    "id": "a67dbf2f",
292 |    "metadata": {
293 |     "execution": {
294 |      "iopub.execute_input": "2023-08-18T06:58:08.004162Z",
295 |      "iopub.status.busy": "2023-08-18T06:58:08.003541Z",
296 |      "iopub.status.idle": "2023-08-18T06:58:09.277879Z",
297 |      "shell.execute_reply": "2023-08-18T06:58:09.277008Z"
298 |     },
299 |     "origin_pos": 30,
300 |     "tab": [
301 |      "pytorch"
302 |     ]
303 |    },
304 |    "outputs": [
305 |     {
306 |      "data": {
307 |       "text/plain": [
308 |        "tensor([[1., 1., 1.],\n",
309 |        "        [1., 1., 1.]], device='cuda:0')"
310 |       ]
311 |      },
312 |      "execution_count": 6,
313 |      "metadata": {},
314 |      "output_type": "execute_result"
315 |     }
316 |    ],
317 |    "source": [
318 |     "X = torch.ones(2, 3, device=try_gpu())\n",
319 |     "X"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "id": "5bf59dc4",
325 |    "metadata": {
326 |     "slideshow": {
327 |      "slide_type": "-"
328 |     }
329 |    },
330 |    "source": [
331 |     "第二个GPU上创建一个随机张量"
332 |    ]
333 |   },
334 |   {
335 |    "cell_type": "code",
336 |    "execution_count": 7,
337 |    "id": "7c0d4a84",
338 |    "metadata": {
339 |     "execution": {
340 |      "iopub.execute_input": "2023-08-18T06:58:09.282814Z",
341 |      "iopub.status.busy": "2023-08-18T06:58:09.282230Z",
342 |      "iopub.status.idle": "2023-08-18T06:58:10.279046Z",
343 |      "shell.execute_reply": "2023-08-18T06:58:10.278227Z"
344 |     },
345 |     "origin_pos": 35,
346 |     "tab": [
347 |      "pytorch"
348 |     ]
349 |    },
350 |    "outputs": [
351 |     {
352 |      "data": {
353 |       "text/plain": [
354 |        "tensor([[0.4860, 0.1285, 0.0440],\n",
355 |        "        [0.9743, 0.4159, 0.9979]], device='cuda:1')"
356 |       ]
357 |      },
358 |      "execution_count": 7,
359 |      "metadata": {},
360 |      "output_type": "execute_result"
361 |     }
362 |    ],
363 |    "source": [
364 |     "Y = torch.rand(2, 3, device=try_gpu(1))\n",
365 |     "Y"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "id": "397b13bf",
371 |    "metadata": {
372 |     "slideshow": {
373 |      "slide_type": "slide"
374 |     }
375 |    },
376 |    "source": [
377 |     "要计算`X + Y`，我们需要决定在哪里执行这个操作"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 8,
383 |    "id": "9e700cd2",
384 |    "metadata": {
385 |     "execution": {
386 |      "iopub.execute_input": "2023-08-18T06:58:10.284097Z",
387 |      "iopub.status.busy": "2023-08-18T06:58:10.283529Z",
388 |      "iopub.status.idle": "2023-08-18T06:58:10.290795Z",
389 |      "shell.execute_reply": "2023-08-18T06:58:10.290007Z"
390 |     },
391 |     "origin_pos": 40,
392 |     "tab": [
393 |      "pytorch"
394 |     ]
395 |    },
396 |    "outputs": [
397 |     {
398 |      "name": "stdout",
399 |      "output_type": "stream",
400 |      "text": [
401 |       "tensor([[1., 1., 1.],\n",
402 |       "        [1., 1., 1.]], device='cuda:0')\n",
403 |       "tensor([[1., 1., 1.],\n",
404 |       "        [1., 1., 1.]], device='cuda:1')\n"
405 |      ]
406 |     }
407 |    ],
408 |    "source": [
409 |     "Z = X.cuda(1)\n",
410 |     "print(X)\n",
411 |     "print(Z)"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "markdown",
416 |    "id": "a8407698",
417 |    "metadata": {
418 |     "slideshow": {
419 |      "slide_type": "slide"
420 |     }
421 |    },
422 |    "source": [
423 |     "现在数据在同一个GPU上（`Z`和`Y`都在），我们可以将它们相加"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": 9,
429 |    "id": "b2f04f35",
430 |    "metadata": {
431 |     "execution": {
432 |      "iopub.execute_input": "2023-08-18T06:58:10.295377Z",
433 |      "iopub.status.busy": "2023-08-18T06:58:10.294845Z",
434 |      "iopub.status.idle": "2023-08-18T06:58:10.301122Z",
435 |      "shell.execute_reply": "2023-08-18T06:58:10.300297Z"
436 |     },
437 |     "origin_pos": 43,
438 |     "tab": [
439 |      "pytorch"
440 |     ]
441 |    },
442 |    "outputs": [
443 |     {
444 |      "data": {
445 |       "text/plain": [
446 |        "tensor([[1.4860, 1.1285, 1.0440],\n",
447 |        "        [1.9743, 1.4159, 1.9979]], device='cuda:1')"
448 |       ]
449 |      },
450 |      "execution_count": 9,
451 |      "metadata": {},
452 |      "output_type": "execute_result"
453 |     }
454 |    ],
455 |    "source": [
456 |     "Y + Z"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": 10,
462 |    "id": "d6b95aa1",
463 |    "metadata": {
464 |     "execution": {
465 |      "iopub.execute_input": "2023-08-18T06:58:10.305143Z",
466 |      "iopub.status.busy": "2023-08-18T06:58:10.304592Z",
467 |      "iopub.status.idle": "2023-08-18T06:58:10.309707Z",
468 |      "shell.execute_reply": "2023-08-18T06:58:10.308894Z"
469 |     },
470 |     "origin_pos": 48,
471 |     "tab": [
472 |      "pytorch"
473 |     ]
474 |    },
475 |    "outputs": [
476 |     {
477 |      "data": {
478 |       "text/plain": [
479 |        "True"
480 |       ]
481 |      },
482 |      "execution_count": 10,
483 |      "metadata": {},
484 |      "output_type": "execute_result"
485 |     }
486 |    ],
487 |    "source": [
488 |     "Z.cuda(1) is Z"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "id": "86e67482",
494 |    "metadata": {
495 |     "slideshow": {
496 |      "slide_type": "slide"
497 |     }
498 |    },
499 |    "source": [
500 |     "神经网络与GPU"
501 |    ]
502 |   },
503 |   {
504 |    "cell_type": "code",
505 |    "execution_count": 12,
506 |    "id": "955f7f67",
507 |    "metadata": {
508 |     "execution": {
509 |      "iopub.execute_input": "2023-08-18T06:58:10.340989Z",
510 |      "iopub.status.busy": "2023-08-18T06:58:10.340312Z",
511 |      "iopub.status.idle": "2023-08-18T06:58:10.930969Z",
512 |      "shell.execute_reply": "2023-08-18T06:58:10.930143Z"
513 |     },
514 |     "origin_pos": 56,
515 |     "tab": [
516 |      "pytorch"
517 |     ]
518 |    },
519 |    "outputs": [
520 |     {
521 |      "data": {
522 |       "text/plain": [
523 |        "tensor([[-0.4275],\n",
524 |        "        [-0.4275]], device='cuda:0', grad_fn=<AddmmBackward0>)"
525 |       ]
526 |      },
527 |      "execution_count": 12,
528 |      "metadata": {},
529 |      "output_type": "execute_result"
530 |     }
531 |    ],
532 |    "source": [
533 |     "net = nn.Sequential(nn.Linear(3, 1))\n",
534 |     "net = net.to(device=try_gpu())\n",
535 |     "\n",
536 |     "net(X)"
537 |    ]
538 |   },
539 |   {
540 |    "cell_type": "markdown",
541 |    "id": "b1ae89a4",
542 |    "metadata": {
543 |     "slideshow": {
544 |      "slide_type": "-"
545 |     }
546 |    },
547 |    "source": [
548 |     "确认模型参数存储在同一个GPU上"
549 |    ]
550 |   },
551 |   {
552 |    "cell_type": "code",
553 |    "execution_count": 13,
554 |    "id": "bd727993",
555 |    "metadata": {
556 |     "execution": {
557 |      "iopub.execute_input": "2023-08-18T06:58:10.935087Z",
558 |      "iopub.status.busy": "2023-08-18T06:58:10.934497Z",
559 |      "iopub.status.idle": "2023-08-18T06:58:10.939740Z",
560 |      "shell.execute_reply": "2023-08-18T06:58:10.938974Z"
561 |     },
562 |     "origin_pos": 59,
563 |     "tab": [
564 |      "pytorch"
565 |     ]
566 |    },
567 |    "outputs": [
568 |     {
569 |      "data": {
570 |       "text/plain": [
571 |        "device(type='cuda', index=0)"
572 |       ]
573 |      },
574 |      "execution_count": 13,
575 |      "metadata": {},
576 |      "output_type": "execute_result"
577 |     }
578 |    ],
579 |    "source": [
580 |     "net[0].weight.data.device"
581 |    ]
582 |   }
583 |  ],
584 |  "metadata": {
585 |   "celltoolbar": "Slideshow",
586 |   "language_info": {
587 |    "name": "python"
588 |   },
589 |   "required_libs": [],
590 |   "rise": {
591 |    "autolaunch": true,
592 |    "enable_chalkboard": true,
593 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
594 |    "scroll": true
595 |   }
596 |  },
597 |  "nbformat": 4,
598 |  "nbformat_minor": 5
599 | }


--------------------------------------------------------------------------------
/chapter_linear-networks/linear-regression-concise.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "a5b9ec6e",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "slide"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 线性回归的简洁实现\n",
 13 |     "\n",
 14 |     "通过使用深度学习框架来简洁地实现\n",
 15 |     "线性回归模型\n",
 16 |     "生成数据集"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "id": "c26b741f",
 23 |    "metadata": {
 24 |     "execution": {
 25 |      "iopub.execute_input": "2023-08-18T07:01:54.616404Z",
 26 |      "iopub.status.busy": "2023-08-18T07:01:54.615685Z",
 27 |      "iopub.status.idle": "2023-08-18T07:01:54.643472Z",
 28 |      "shell.execute_reply": "2023-08-18T07:01:54.642512Z"
 29 |     },
 30 |     "origin_pos": 5,
 31 |     "tab": [
 32 |      "pytorch"
 33 |     ]
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import numpy as np\n",
 38 |     "import torch\n",
 39 |     "from torch.utils import data\n",
 40 |     "from d2l import torch as d2l\n",
 41 |     "\n",
 42 |     "true_w = torch.tensor([2, -3.4])\n",
 43 |     "true_b = 4.2\n",
 44 |     "features, labels = d2l.synthetic_data(true_w, true_b, 1000)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "25eda004",
 50 |    "metadata": {
 51 |     "slideshow": {
 52 |      "slide_type": "slide"
 53 |     }
 54 |    },
 55 |    "source": [
 56 |     "调用框架中现有的API来读取数据"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 5,
 62 |    "id": "7c6919b8",
 63 |    "metadata": {
 64 |     "execution": {
 65 |      "iopub.execute_input": "2023-08-18T07:01:54.665574Z",
 66 |      "iopub.status.busy": "2023-08-18T07:01:54.664999Z",
 67 |      "iopub.status.idle": "2023-08-18T07:01:54.673523Z",
 68 |      "shell.execute_reply": "2023-08-18T07:01:54.672688Z"
 69 |     },
 70 |     "origin_pos": 13,
 71 |     "tab": [
 72 |      "pytorch"
 73 |     ]
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "data": {
 78 |       "text/plain": [
 79 |        "[tensor([[-1.3116, -0.3062],\n",
 80 |        "         [-1.5653,  0.4830],\n",
 81 |        "         [-0.8893, -0.9466],\n",
 82 |        "         [-1.2417,  1.6891],\n",
 83 |        "         [-0.7148,  0.1376],\n",
 84 |        "         [-0.2162, -0.6122],\n",
 85 |        "         [ 2.4048, -0.3211],\n",
 86 |        "         [-0.1516,  0.4997],\n",
 87 |        "         [ 1.5298, -0.2291],\n",
 88 |        "         [ 1.3895,  1.2602]]),\n",
 89 |        " tensor([[ 2.6073],\n",
 90 |        "         [-0.5787],\n",
 91 |        "         [ 5.6339],\n",
 92 |        "         [-4.0211],\n",
 93 |        "         [ 2.3117],\n",
 94 |        "         [ 5.8492],\n",
 95 |        "         [10.0926],\n",
 96 |        "         [ 2.1932],\n",
 97 |        "         [ 8.0441],\n",
 98 |        "         [ 2.6943]])]"
 99 |       ]
100 |      },
101 |      "execution_count": 5,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "def load_array(data_arrays, batch_size, is_train=True):  \n",
108 |     "    \"\"\"构造一个PyTorch数据迭代器\"\"\"\n",
109 |     "    dataset = data.TensorDataset(*data_arrays)\n",
110 |     "    return data.DataLoader(dataset, batch_size, shuffle=is_train)\n",
111 |     "\n",
112 |     "batch_size = 10\n",
113 |     "data_iter = load_array((features, labels), batch_size)\n",
114 |     "\n",
115 |     "next(iter(data_iter))"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "id": "b4c6012b",
121 |    "metadata": {
122 |     "slideshow": {
123 |      "slide_type": "slide"
124 |     }
125 |    },
126 |    "source": [
127 |     "使用框架的预定义好的层"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 6,
133 |    "id": "85c54a1a",
134 |    "metadata": {
135 |     "execution": {
136 |      "iopub.execute_input": "2023-08-18T07:01:54.677177Z",
137 |      "iopub.status.busy": "2023-08-18T07:01:54.676580Z",
138 |      "iopub.status.idle": "2023-08-18T07:01:54.680914Z",
139 |      "shell.execute_reply": "2023-08-18T07:01:54.680130Z"
140 |     },
141 |     "origin_pos": 20,
142 |     "tab": [
143 |      "pytorch"
144 |     ]
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "from torch import nn\n",
149 |     "\n",
150 |     "net = nn.Sequential(nn.Linear(2, 1))"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "id": "9bf96a4d",
156 |    "metadata": {
157 |     "slideshow": {
158 |      "slide_type": "-"
159 |     }
160 |    },
161 |    "source": [
162 |     "初始化模型参数"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 7,
168 |    "id": "31716c55",
169 |    "metadata": {
170 |     "execution": {
171 |      "iopub.execute_input": "2023-08-18T07:01:54.684561Z",
172 |      "iopub.status.busy": "2023-08-18T07:01:54.684036Z",
173 |      "iopub.status.idle": "2023-08-18T07:01:54.690673Z",
174 |      "shell.execute_reply": "2023-08-18T07:01:54.689754Z"
175 |     },
176 |     "origin_pos": 29,
177 |     "tab": [
178 |      "pytorch"
179 |     ]
180 |    },
181 |    "outputs": [
182 |     {
183 |      "data": {
184 |       "text/plain": [
185 |        "tensor([0.])"
186 |       ]
187 |      },
188 |      "execution_count": 7,
189 |      "metadata": {},
190 |      "output_type": "execute_result"
191 |     }
192 |    ],
193 |    "source": [
194 |     "net[0].weight.data.normal_(0, 0.01)\n",
195 |     "net[0].bias.data.fill_(0)"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "id": "11c01887",
201 |    "metadata": {
202 |     "slideshow": {
203 |      "slide_type": "slide"
204 |     }
205 |    },
206 |    "source": [
207 |     "计算均方误差使用的是`MSELoss`类，也称为平方$L_2$范数"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 8,
213 |    "id": "19a417ac",
214 |    "metadata": {
215 |     "execution": {
216 |      "iopub.execute_input": "2023-08-18T07:01:54.695575Z",
217 |      "iopub.status.busy": "2023-08-18T07:01:54.694922Z",
218 |      "iopub.status.idle": "2023-08-18T07:01:54.699373Z",
219 |      "shell.execute_reply": "2023-08-18T07:01:54.698348Z"
220 |     },
221 |     "origin_pos": 41,
222 |     "tab": [
223 |      "pytorch"
224 |     ]
225 |    },
226 |    "outputs": [],
227 |    "source": [
228 |     "loss = nn.MSELoss()"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "markdown",
233 |    "id": "68315d47",
234 |    "metadata": {
235 |     "slideshow": {
236 |      "slide_type": "-"
237 |     }
238 |    },
239 |    "source": [
240 |     "实例化一个`SGD`实例"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 9,
246 |    "id": "1ae0989f",
247 |    "metadata": {
248 |     "execution": {
249 |      "iopub.execute_input": "2023-08-18T07:01:54.703905Z",
250 |      "iopub.status.busy": "2023-08-18T07:01:54.703368Z",
251 |      "iopub.status.idle": "2023-08-18T07:01:54.708081Z",
252 |      "shell.execute_reply": "2023-08-18T07:01:54.706987Z"
253 |     },
254 |     "origin_pos": 50,
255 |     "tab": [
256 |      "pytorch"
257 |     ]
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "trainer = torch.optim.SGD(net.parameters(), lr=0.03)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "id": "d5991157",
267 |    "metadata": {
268 |     "slideshow": {
269 |      "slide_type": "slide"
270 |     }
271 |    },
272 |    "source": [
273 |     "训练过程代码与我们从零开始实现时所做的非常相似"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 10,
279 |    "id": "1270d706",
280 |    "metadata": {
281 |     "execution": {
282 |      "iopub.execute_input": "2023-08-18T07:01:54.712705Z",
283 |      "iopub.status.busy": "2023-08-18T07:01:54.712113Z",
284 |      "iopub.status.idle": "2023-08-18T07:01:54.922720Z",
285 |      "shell.execute_reply": "2023-08-18T07:01:54.921580Z"
286 |     },
287 |     "origin_pos": 55,
288 |     "tab": [
289 |      "pytorch"
290 |     ]
291 |    },
292 |    "outputs": [
293 |     {
294 |      "name": "stdout",
295 |      "output_type": "stream",
296 |      "text": [
297 |       "epoch 1, loss 0.000248\n",
298 |       "epoch 2, loss 0.000103\n",
299 |       "epoch 3, loss 0.000103\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "num_epochs = 3\n",
305 |     "for epoch in range(num_epochs):\n",
306 |     "    for X, y in data_iter:\n",
307 |     "        l = loss(net(X) ,y)\n",
308 |     "        trainer.zero_grad()\n",
309 |     "        l.backward()\n",
310 |     "        trainer.step()\n",
311 |     "    l = loss(net(features), labels)\n",
312 |     "    print(f'epoch {epoch + 1}, loss {l:f}')"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "id": "7c8e4f61",
318 |    "metadata": {
319 |     "slideshow": {
320 |      "slide_type": "slide"
321 |     }
322 |    },
323 |    "source": [
324 |     "比较生成数据集的真实参数和通过有限数据训练获得的模型参数"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 11,
330 |    "id": "aa7cef5a",
331 |    "metadata": {
332 |     "execution": {
333 |      "iopub.execute_input": "2023-08-18T07:01:54.927464Z",
334 |      "iopub.status.busy": "2023-08-18T07:01:54.927072Z",
335 |      "iopub.status.idle": "2023-08-18T07:01:54.935672Z",
336 |      "shell.execute_reply": "2023-08-18T07:01:54.934585Z"
337 |     },
338 |     "origin_pos": 60,
339 |     "tab": [
340 |      "pytorch"
341 |     ]
342 |    },
343 |    "outputs": [
344 |     {
345 |      "name": "stdout",
346 |      "output_type": "stream",
347 |      "text": [
348 |       "w的估计误差： tensor([-0.0010, -0.0003])\n",
349 |       "b的估计误差： tensor([-0.0003])\n"
350 |      ]
351 |     }
352 |    ],
353 |    "source": [
354 |     "w = net[0].weight.data\n",
355 |     "print('w的估计误差：', true_w - w.reshape(true_w.shape))\n",
356 |     "b = net[0].bias.data\n",
357 |     "print('b的估计误差：', true_b - b)"
358 |    ]
359 |   }
360 |  ],
361 |  "metadata": {
362 |   "celltoolbar": "Slideshow",
363 |   "language_info": {
364 |    "name": "python"
365 |   },
366 |   "required_libs": [],
367 |   "rise": {
368 |    "autolaunch": true,
369 |    "enable_chalkboard": true,
370 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
371 |    "scroll": true
372 |   }
373 |  },
374 |  "nbformat": 4,
375 |  "nbformat_minor": 5
376 | }


--------------------------------------------------------------------------------
/chapter_linear-networks/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_multilayer-perceptrons/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "e030be85",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "slide"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 自然语言推断与数据集\n",
 13 |     "\n",
 14 |     "斯坦福自然语言推断语料库（Stanford Natural Language Inference，SNLI）"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "85ccbfd4",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:06:00.201212Z",
 24 |      "iopub.status.busy": "2023-08-18T07:06:00.200144Z",
 25 |      "iopub.status.idle": "2023-08-18T07:06:09.370822Z",
 26 |      "shell.execute_reply": "2023-08-18T07:06:09.368591Z"
 27 |     },
 28 |     "origin_pos": 2,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "import re\n",
 37 |     "import torch\n",
 38 |     "from torch import nn\n",
 39 |     "from d2l import torch as d2l\n",
 40 |     "\n",
 41 |     "d2l.DATA_HUB['SNLI'] = (\n",
 42 |     "    'https://nlp.stanford.edu/projects/snli/snli_1.0.zip',\n",
 43 |     "    '9fcde07509c7e87ec61c640c1b2753d9041758e4')\n",
 44 |     "\n",
 45 |     "data_dir = d2l.download_extract('SNLI')"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "id": "4b6dcd15",
 51 |    "metadata": {
 52 |     "slideshow": {
 53 |      "slide_type": "slide"
 54 |     }
 55 |    },
 56 |    "source": [
 57 |     "读取数据集"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 2,
 63 |    "id": "fa839f80",
 64 |    "metadata": {
 65 |     "execution": {
 66 |      "iopub.execute_input": "2023-08-18T07:06:09.377922Z",
 67 |      "iopub.status.busy": "2023-08-18T07:06:09.377380Z",
 68 |      "iopub.status.idle": "2023-08-18T07:06:09.392203Z",
 69 |      "shell.execute_reply": "2023-08-18T07:06:09.390984Z"
 70 |     },
 71 |     "origin_pos": 5,
 72 |     "tab": [
 73 |      "pytorch"
 74 |     ]
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def read_snli(data_dir, is_train):\n",
 79 |     "    \"\"\"将SNLI数据集解析为前提、假设和标签\"\"\"\n",
 80 |     "    def extract_text(s):\n",
 81 |     "        s = re.sub('\\\\(', '', s)\n",
 82 |     "        s = re.sub('\\\\)', '', s)\n",
 83 |     "        s = re.sub('\\\\s{2,}', ' ', s)\n",
 84 |     "        return s.strip()\n",
 85 |     "    label_set = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n",
 86 |     "    file_name = os.path.join(data_dir, 'snli_1.0_train.txt'\n",
 87 |     "                             if is_train else 'snli_1.0_test.txt')\n",
 88 |     "    with open(file_name, 'r') as f:\n",
 89 |     "        rows = [row.split('\\t') for row in f.readlines()[1:]]\n",
 90 |     "    premises = [extract_text(row[1]) for row in rows if row[0] in label_set]\n",
 91 |     "    hypotheses = [extract_text(row[2]) for row in rows if row[0] \\\n",
 92 |     "                in label_set]\n",
 93 |     "    labels = [label_set[row[0]] for row in rows if row[0] in label_set]\n",
 94 |     "    return premises, hypotheses, labels"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "ab3a4dc3",
100 |    "metadata": {
101 |     "slideshow": {
102 |      "slide_type": "slide"
103 |     }
104 |    },
105 |    "source": [
106 |     "打印前3对"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 3,
112 |    "id": "19101f9e",
113 |    "metadata": {
114 |     "execution": {
115 |      "iopub.execute_input": "2023-08-18T07:06:09.397297Z",
116 |      "iopub.status.busy": "2023-08-18T07:06:09.396407Z",
117 |      "iopub.status.idle": "2023-08-18T07:06:23.206512Z",
118 |      "shell.execute_reply": "2023-08-18T07:06:23.205574Z"
119 |     },
120 |     "origin_pos": 7,
121 |     "tab": [
122 |      "pytorch"
123 |     ]
124 |    },
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "前提： A person on a horse jumps over a broken down airplane .\n",
131 |       "假设： A person is training his horse for a competition .\n",
132 |       "标签： 2\n",
133 |       "前提： A person on a horse jumps over a broken down airplane .\n",
134 |       "假设： A person is at a diner , ordering an omelette .\n",
135 |       "标签： 1\n",
136 |       "前提： A person on a horse jumps over a broken down airplane .\n",
137 |       "假设： A person is outdoors , on a horse .\n",
138 |       "标签： 0\n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "train_data = read_snli(data_dir, is_train=True)\n",
144 |     "for x0, x1, y in zip(train_data[0][:3], train_data[1][:3], train_data[2][:3]):\n",
145 |     "    print('前提：', x0)\n",
146 |     "    print('假设：', x1)\n",
147 |     "    print('标签：', y)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "id": "ffb6b0b0",
153 |    "metadata": {
154 |     "slideshow": {
155 |      "slide_type": "slide"
156 |     }
157 |    },
158 |    "source": [
159 |     "标签“蕴涵”“矛盾”和“中性”是平衡的"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 4,
165 |    "id": "972ca3d1",
166 |    "metadata": {
167 |     "execution": {
168 |      "iopub.execute_input": "2023-08-18T07:06:23.210300Z",
169 |      "iopub.status.busy": "2023-08-18T07:06:23.209728Z",
170 |      "iopub.status.idle": "2023-08-18T07:06:23.531128Z",
171 |      "shell.execute_reply": "2023-08-18T07:06:23.530246Z"
172 |     },
173 |     "origin_pos": 9,
174 |     "tab": [
175 |      "pytorch"
176 |     ]
177 |    },
178 |    "outputs": [
179 |     {
180 |      "name": "stdout",
181 |      "output_type": "stream",
182 |      "text": [
183 |       "[183416, 183187, 182764]\n",
184 |       "[3368, 3237, 3219]\n"
185 |      ]
186 |     }
187 |    ],
188 |    "source": [
189 |     "test_data = read_snli(data_dir, is_train=False)\n",
190 |     "for data in [train_data, test_data]:\n",
191 |     "    print([[row for row in data[2]].count(i) for i in range(3)])"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "id": "8fe876db",
197 |    "metadata": {
198 |     "slideshow": {
199 |      "slide_type": "slide"
200 |     }
201 |    },
202 |    "source": [
203 |     "定义用于加载数据集的类"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 5,
209 |    "id": "b8b15f65",
210 |    "metadata": {
211 |     "execution": {
212 |      "iopub.execute_input": "2023-08-18T07:06:23.534933Z",
213 |      "iopub.status.busy": "2023-08-18T07:06:23.534365Z",
214 |      "iopub.status.idle": "2023-08-18T07:06:23.542550Z",
215 |      "shell.execute_reply": "2023-08-18T07:06:23.541714Z"
216 |     },
217 |     "origin_pos": 12,
218 |     "tab": [
219 |      "pytorch"
220 |     ]
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "class SNLIDataset(torch.utils.data.Dataset):\n",
225 |     "    \"\"\"用于加载SNLI数据集的自定义数据集\"\"\"\n",
226 |     "    def __init__(self, dataset, num_steps, vocab=None):\n",
227 |     "        self.num_steps = num_steps\n",
228 |     "        all_premise_tokens = d2l.tokenize(dataset[0])\n",
229 |     "        all_hypothesis_tokens = d2l.tokenize(dataset[1])\n",
230 |     "        if vocab is None:\n",
231 |     "            self.vocab = d2l.Vocab(all_premise_tokens + \\\n",
232 |     "                all_hypothesis_tokens, min_freq=5, reserved_tokens=['<pad>'])\n",
233 |     "        else:\n",
234 |     "            self.vocab = vocab\n",
235 |     "        self.premises = self._pad(all_premise_tokens)\n",
236 |     "        self.hypotheses = self._pad(all_hypothesis_tokens)\n",
237 |     "        self.labels = torch.tensor(dataset[2])\n",
238 |     "        print('read ' + str(len(self.premises)) + ' examples')\n",
239 |     "\n",
240 |     "    def _pad(self, lines):\n",
241 |     "        return torch.tensor([d2l.truncate_pad(\n",
242 |     "            self.vocab[line], self.num_steps, self.vocab['<pad>'])\n",
243 |     "                         for line in lines])\n",
244 |     "\n",
245 |     "    def __getitem__(self, idx):\n",
246 |     "        return (self.premises[idx], self.hypotheses[idx]), self.labels[idx]\n",
247 |     "\n",
248 |     "    def __len__(self):\n",
249 |     "        return len(self.premises)"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "id": "f4ab0616",
255 |    "metadata": {
256 |     "slideshow": {
257 |      "slide_type": "slide"
258 |     }
259 |    },
260 |    "source": [
261 |     "整合代码"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 7,
267 |    "id": "08d0c755",
268 |    "metadata": {
269 |     "execution": {
270 |      "iopub.execute_input": "2023-08-18T07:06:23.554839Z",
271 |      "iopub.status.busy": "2023-08-18T07:06:23.554288Z",
272 |      "iopub.status.idle": "2023-08-18T07:07:02.488484Z",
273 |      "shell.execute_reply": "2023-08-18T07:07:02.487658Z"
274 |     },
275 |     "origin_pos": 19,
276 |     "tab": [
277 |      "pytorch"
278 |     ]
279 |    },
280 |    "outputs": [
281 |     {
282 |      "name": "stdout",
283 |      "output_type": "stream",
284 |      "text": [
285 |       "read 549367 examples\n"
286 |      ]
287 |     },
288 |     {
289 |      "name": "stdout",
290 |      "output_type": "stream",
291 |      "text": [
292 |       "read 9824 examples\n"
293 |      ]
294 |     },
295 |     {
296 |      "data": {
297 |       "text/plain": [
298 |        "18678"
299 |       ]
300 |      },
301 |      "execution_count": 7,
302 |      "metadata": {},
303 |      "output_type": "execute_result"
304 |     }
305 |    ],
306 |    "source": [
307 |     "def load_data_snli(batch_size, num_steps=50):\n",
308 |     "    \"\"\"下载SNLI数据集并返回数据迭代器和词表\"\"\"\n",
309 |     "    num_workers = d2l.get_dataloader_workers()\n",
310 |     "    data_dir = d2l.download_extract('SNLI')\n",
311 |     "    train_data = read_snli(data_dir, True)\n",
312 |     "    test_data = read_snli(data_dir, False)\n",
313 |     "    train_set = SNLIDataset(train_data, num_steps)\n",
314 |     "    test_set = SNLIDataset(test_data, num_steps, train_set.vocab)\n",
315 |     "    train_iter = torch.utils.data.DataLoader(train_set, batch_size,\n",
316 |     "                                             shuffle=True,\n",
317 |     "                                             num_workers=num_workers)\n",
318 |     "    test_iter = torch.utils.data.DataLoader(test_set, batch_size,\n",
319 |     "                                            shuffle=False,\n",
320 |     "                                            num_workers=num_workers)\n",
321 |     "    return train_iter, test_iter, train_set.vocab\n",
322 |     "\n",
323 |     "train_iter, test_iter, vocab = load_data_snli(128, 50)\n",
324 |     "len(vocab)"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 8,
330 |    "id": "d7411a33",
331 |    "metadata": {
332 |     "execution": {
333 |      "iopub.execute_input": "2023-08-18T07:07:02.492220Z",
334 |      "iopub.status.busy": "2023-08-18T07:07:02.491909Z",
335 |      "iopub.status.idle": "2023-08-18T07:07:02.966465Z",
336 |      "shell.execute_reply": "2023-08-18T07:07:02.965137Z"
337 |     },
338 |     "origin_pos": 21,
339 |     "tab": [
340 |      "pytorch"
341 |     ]
342 |    },
343 |    "outputs": [
344 |     {
345 |      "name": "stdout",
346 |      "output_type": "stream",
347 |      "text": [
348 |       "torch.Size([128, 50])\n",
349 |       "torch.Size([128, 50])\n",
350 |       "torch.Size([128])\n"
351 |      ]
352 |     }
353 |    ],
354 |    "source": [
355 |     "for X, Y in train_iter:\n",
356 |     "    print(X[0].shape)\n",
357 |     "    print(X[1].shape)\n",
358 |     "    print(Y.shape)\n",
359 |     "    break"
360 |    ]
361 |   }
362 |  ],
363 |  "metadata": {
364 |   "celltoolbar": "Slideshow",
365 |   "language_info": {
366 |    "name": "python"
367 |   },
368 |   "required_libs": [],
369 |   "rise": {
370 |    "autolaunch": true,
371 |    "enable_chalkboard": true,
372 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
373 |    "scroll": true
374 |   }
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 5
378 | }


--------------------------------------------------------------------------------
/chapter_natural-language-processing-applications/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_preliminaries/autograd.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "807b7862",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 自动微分\n",
 13 |     "\n",
 14 |     "假设我们想对函数$y=2\\mathbf{x}^{\\top}\\mathbf{x}$关于列向量$\\mathbf{x}$求导"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "98cd8a9e",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:07:31.627945Z",
 24 |      "iopub.status.busy": "2023-08-18T07:07:31.627424Z",
 25 |      "iopub.status.idle": "2023-08-18T07:07:32.686372Z",
 26 |      "shell.execute_reply": "2023-08-18T07:07:32.685559Z"
 27 |     },
 28 |     "origin_pos": 2,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "data": {
 36 |       "text/plain": [
 37 |        "tensor([0., 1., 2., 3.])"
 38 |       ]
 39 |      },
 40 |      "execution_count": 1,
 41 |      "metadata": {},
 42 |      "output_type": "execute_result"
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "import torch\n",
 47 |     "\n",
 48 |     "x = torch.arange(4.0)\n",
 49 |     "x"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "id": "4bd68367",
 55 |    "metadata": {
 56 |     "slideshow": {
 57 |      "slide_type": "slide"
 58 |     }
 59 |    },
 60 |    "source": [
 61 |     "在我们计算$y$关于$\\mathbf{x}$的梯度之前，需要一个地方来存储梯度"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 2,
 67 |    "id": "e27a5df4",
 68 |    "metadata": {
 69 |     "execution": {
 70 |      "iopub.execute_input": "2023-08-18T07:07:32.690633Z",
 71 |      "iopub.status.busy": "2023-08-18T07:07:32.689882Z",
 72 |      "iopub.status.idle": "2023-08-18T07:07:32.694159Z",
 73 |      "shell.execute_reply": "2023-08-18T07:07:32.693367Z"
 74 |     },
 75 |     "origin_pos": 7,
 76 |     "tab": [
 77 |      "pytorch"
 78 |     ]
 79 |    },
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "x.requires_grad_(True)\n",
 83 |     "x.grad"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "id": "aeab4090",
 89 |    "metadata": {
 90 |     "slideshow": {
 91 |      "slide_type": "-"
 92 |     }
 93 |    },
 94 |    "source": [
 95 |     "现在计算$y$"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 3,
101 |    "id": "4c3f80b7",
102 |    "metadata": {
103 |     "execution": {
104 |      "iopub.execute_input": "2023-08-18T07:07:32.698006Z",
105 |      "iopub.status.busy": "2023-08-18T07:07:32.697167Z",
106 |      "iopub.status.idle": "2023-08-18T07:07:32.705385Z",
107 |      "shell.execute_reply": "2023-08-18T07:07:32.704593Z"
108 |     },
109 |     "origin_pos": 12,
110 |     "tab": [
111 |      "pytorch"
112 |     ]
113 |    },
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "tensor(28., grad_fn=<MulBackward0>)"
119 |       ]
120 |      },
121 |      "execution_count": 3,
122 |      "metadata": {},
123 |      "output_type": "execute_result"
124 |     }
125 |    ],
126 |    "source": [
127 |     "y = 2 * torch.dot(x, x)\n",
128 |     "y"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "id": "c6ffaf8b",
134 |    "metadata": {
135 |     "slideshow": {
136 |      "slide_type": "slide"
137 |     }
138 |    },
139 |    "source": [
140 |     "通过调用反向传播函数来自动计算`y`关于`x`每个分量的梯度"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 4,
146 |    "id": "a1c3a419",
147 |    "metadata": {
148 |     "execution": {
149 |      "iopub.execute_input": "2023-08-18T07:07:32.708698Z",
150 |      "iopub.status.busy": "2023-08-18T07:07:32.708196Z",
151 |      "iopub.status.idle": "2023-08-18T07:07:32.713924Z",
152 |      "shell.execute_reply": "2023-08-18T07:07:32.713091Z"
153 |     },
154 |     "origin_pos": 17,
155 |     "tab": [
156 |      "pytorch"
157 |     ]
158 |    },
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "tensor([ 0.,  4.,  8., 12.])"
164 |       ]
165 |      },
166 |      "execution_count": 4,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "y.backward()\n",
173 |     "x.grad"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 5,
179 |    "id": "b8493d0a",
180 |    "metadata": {
181 |     "execution": {
182 |      "iopub.execute_input": "2023-08-18T07:07:32.718858Z",
183 |      "iopub.status.busy": "2023-08-18T07:07:32.718156Z",
184 |      "iopub.status.idle": "2023-08-18T07:07:32.724091Z",
185 |      "shell.execute_reply": "2023-08-18T07:07:32.723104Z"
186 |     },
187 |     "origin_pos": 22,
188 |     "tab": [
189 |      "pytorch"
190 |     ]
191 |    },
192 |    "outputs": [
193 |     {
194 |      "data": {
195 |       "text/plain": [
196 |        "tensor([True, True, True, True])"
197 |       ]
198 |      },
199 |      "execution_count": 5,
200 |      "metadata": {},
201 |      "output_type": "execute_result"
202 |     }
203 |    ],
204 |    "source": [
205 |     "x.grad == 4 * x"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "markdown",
210 |    "id": "dcb2d3a8",
211 |    "metadata": {
212 |     "slideshow": {
213 |      "slide_type": "slide"
214 |     }
215 |    },
216 |    "source": [
217 |     "现在计算`x`的另一个函数"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 6,
223 |    "id": "f2fcd392",
224 |    "metadata": {
225 |     "execution": {
226 |      "iopub.execute_input": "2023-08-18T07:07:32.729368Z",
227 |      "iopub.status.busy": "2023-08-18T07:07:32.728433Z",
228 |      "iopub.status.idle": "2023-08-18T07:07:32.736493Z",
229 |      "shell.execute_reply": "2023-08-18T07:07:32.735715Z"
230 |     },
231 |     "origin_pos": 27,
232 |     "tab": [
233 |      "pytorch"
234 |     ]
235 |    },
236 |    "outputs": [
237 |     {
238 |      "data": {
239 |       "text/plain": [
240 |        "tensor([1., 1., 1., 1.])"
241 |       ]
242 |      },
243 |      "execution_count": 6,
244 |      "metadata": {},
245 |      "output_type": "execute_result"
246 |     }
247 |    ],
248 |    "source": [
249 |     "x.grad.zero_()\n",
250 |     "y = x.sum()\n",
251 |     "y.backward()\n",
252 |     "x.grad"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "id": "6ab86a5c",
258 |    "metadata": {
259 |     "slideshow": {
260 |      "slide_type": "slide"
261 |     }
262 |    },
263 |    "source": [
264 |     "深度学习中\n",
265 |     "，我们的目的不是计算微分矩阵，而是单独计算批量中每个样本的偏导数之和"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 7,
271 |    "id": "f4e62a5d",
272 |    "metadata": {
273 |     "execution": {
274 |      "iopub.execute_input": "2023-08-18T07:07:32.740109Z",
275 |      "iopub.status.busy": "2023-08-18T07:07:32.739419Z",
276 |      "iopub.status.idle": "2023-08-18T07:07:32.745803Z",
277 |      "shell.execute_reply": "2023-08-18T07:07:32.744893Z"
278 |     },
279 |     "origin_pos": 32,
280 |     "tab": [
281 |      "pytorch"
282 |     ]
283 |    },
284 |    "outputs": [
285 |     {
286 |      "data": {
287 |       "text/plain": [
288 |        "tensor([0., 2., 4., 6.])"
289 |       ]
290 |      },
291 |      "execution_count": 7,
292 |      "metadata": {},
293 |      "output_type": "execute_result"
294 |     }
295 |    ],
296 |    "source": [
297 |     "x.grad.zero_()\n",
298 |     "y = x * x\n",
299 |     "y.sum().backward()\n",
300 |     "x.grad"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "markdown",
305 |    "id": "aba72343",
306 |    "metadata": {
307 |     "slideshow": {
308 |      "slide_type": "slide"
309 |     }
310 |    },
311 |    "source": [
312 |     "将某些计算移动到记录的计算图之外"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": 8,
318 |    "id": "8dab493d",
319 |    "metadata": {
320 |     "execution": {
321 |      "iopub.execute_input": "2023-08-18T07:07:32.749398Z",
322 |      "iopub.status.busy": "2023-08-18T07:07:32.748759Z",
323 |      "iopub.status.idle": "2023-08-18T07:07:32.755280Z",
324 |      "shell.execute_reply": "2023-08-18T07:07:32.754543Z"
325 |     },
326 |     "origin_pos": 37,
327 |     "tab": [
328 |      "pytorch"
329 |     ]
330 |    },
331 |    "outputs": [
332 |     {
333 |      "data": {
334 |       "text/plain": [
335 |        "tensor([True, True, True, True])"
336 |       ]
337 |      },
338 |      "execution_count": 8,
339 |      "metadata": {},
340 |      "output_type": "execute_result"
341 |     }
342 |    ],
343 |    "source": [
344 |     "x.grad.zero_()\n",
345 |     "y = x * x\n",
346 |     "u = y.detach()\n",
347 |     "z = u * x\n",
348 |     "\n",
349 |     "z.sum().backward()\n",
350 |     "x.grad == u"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 9,
356 |    "id": "271a9b3a",
357 |    "metadata": {
358 |     "execution": {
359 |      "iopub.execute_input": "2023-08-18T07:07:32.759344Z",
360 |      "iopub.status.busy": "2023-08-18T07:07:32.758633Z",
361 |      "iopub.status.idle": "2023-08-18T07:07:32.764663Z",
362 |      "shell.execute_reply": "2023-08-18T07:07:32.763922Z"
363 |     },
364 |     "origin_pos": 42,
365 |     "tab": [
366 |      "pytorch"
367 |     ]
368 |    },
369 |    "outputs": [
370 |     {
371 |      "data": {
372 |       "text/plain": [
373 |        "tensor([True, True, True, True])"
374 |       ]
375 |      },
376 |      "execution_count": 9,
377 |      "metadata": {},
378 |      "output_type": "execute_result"
379 |     }
380 |    ],
381 |    "source": [
382 |     "x.grad.zero_()\n",
383 |     "y.sum().backward()\n",
384 |     "x.grad == 2 * x"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "id": "491b3462",
390 |    "metadata": {
391 |     "slideshow": {
392 |      "slide_type": "slide"
393 |     }
394 |    },
395 |    "source": [
396 |     "即使构建函数的计算图需要通过Python控制流（例如，条件、循环或任意函数调用），我们仍然可以计算得到的变量的梯度"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "code",
401 |    "execution_count": 12,
402 |    "id": "2595bdc0",
403 |    "metadata": {
404 |     "execution": {
405 |      "iopub.execute_input": "2023-08-18T07:07:32.785728Z",
406 |      "iopub.status.busy": "2023-08-18T07:07:32.785179Z",
407 |      "iopub.status.idle": "2023-08-18T07:07:32.790672Z",
408 |      "shell.execute_reply": "2023-08-18T07:07:32.789892Z"
409 |     },
410 |     "origin_pos": 57,
411 |     "tab": [
412 |      "pytorch"
413 |     ]
414 |    },
415 |    "outputs": [
416 |     {
417 |      "data": {
418 |       "text/plain": [
419 |        "tensor(True)"
420 |       ]
421 |      },
422 |      "execution_count": 12,
423 |      "metadata": {},
424 |      "output_type": "execute_result"
425 |     }
426 |    ],
427 |    "source": [
428 |     "def f(a):\n",
429 |     "    b = a * 2\n",
430 |     "    while b.norm() < 1000:\n",
431 |     "        b = b * 2\n",
432 |     "    if b.sum() > 0:\n",
433 |     "        c = b\n",
434 |     "    else:\n",
435 |     "        c = 100 * b\n",
436 |     "    return c\n",
437 |     "\n",
438 |     "a = torch.randn(size=(), requires_grad=True)\n",
439 |     "d = f(a)\n",
440 |     "d.backward()\n",
441 |     "\n",
442 |     "a.grad == d / a"
443 |    ]
444 |   }
445 |  ],
446 |  "metadata": {
447 |   "celltoolbar": "Slideshow",
448 |   "language_info": {
449 |    "name": "python"
450 |   },
451 |   "required_libs": [],
452 |   "rise": {
453 |    "autolaunch": true,
454 |    "enable_chalkboard": true,
455 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
456 |    "scroll": true
457 |   }
458 |  },
459 |  "nbformat": 4,
460 |  "nbformat_minor": 5
461 | }


--------------------------------------------------------------------------------
/chapter_preliminaries/lookup-api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "a9a80d6d",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 查阅文档\n",
 13 |     "\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "id": "0be06f74",
 19 |    "metadata": {
 20 |     "slideshow": {
 21 |      "slide_type": "-"
 22 |     }
 23 |    },
 24 |    "source": [
 25 |     "查询随机数生成模块中的所有属性"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 1,
 31 |    "id": "8f7f4d63",
 32 |    "metadata": {
 33 |     "execution": {
 34 |      "iopub.execute_input": "2023-08-18T07:05:30.519062Z",
 35 |      "iopub.status.busy": "2023-08-18T07:05:30.518501Z",
 36 |      "iopub.status.idle": "2023-08-18T07:05:31.469749Z",
 37 |      "shell.execute_reply": "2023-08-18T07:05:31.468858Z"
 38 |     },
 39 |     "origin_pos": 6,
 40 |     "tab": [
 41 |      "pytorch"
 42 |     ]
 43 |    },
 44 |    "outputs": [
 45 |     {
 46 |      "name": "stdout",
 47 |      "output_type": "stream",
 48 |      "text": [
 49 |       "['AbsTransform', 'AffineTransform', 'Bernoulli', 'Beta', 'Binomial', 'CatTransform', 'Categorical', 'Cauchy', 'Chi2', 'ComposeTransform', 'ContinuousBernoulli', 'CorrCholeskyTransform', 'CumulativeDistributionTransform', 'Dirichlet', 'Distribution', 'ExpTransform', 'Exponential', 'ExponentialFamily', 'FisherSnedecor', 'Gamma', 'Geometric', 'Gumbel', 'HalfCauchy', 'HalfNormal', 'Independent', 'IndependentTransform', 'Kumaraswamy', 'LKJCholesky', 'Laplace', 'LogNormal', 'LogisticNormal', 'LowRankMultivariateNormal', 'LowerCholeskyTransform', 'MixtureSameFamily', 'Multinomial', 'MultivariateNormal', 'NegativeBinomial', 'Normal', 'OneHotCategorical', 'OneHotCategoricalStraightThrough', 'Pareto', 'Poisson', 'PowerTransform', 'RelaxedBernoulli', 'RelaxedOneHotCategorical', 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', 'SoftplusTransform', 'StackTransform', 'StickBreakingTransform', 'StudentT', 'TanhTransform', 'Transform', 'TransformedDistribution', 'Uniform', 'VonMises', 'Weibull', 'Wishart', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'bernoulli', 'beta', 'biject_to', 'binomial', 'categorical', 'cauchy', 'chi2', 'constraint_registry', 'constraints', 'continuous_bernoulli', 'dirichlet', 'distribution', 'exp_family', 'exponential', 'fishersnedecor', 'gamma', 'geometric', 'gumbel', 'half_cauchy', 'half_normal', 'identity_transform', 'independent', 'kl', 'kl_divergence', 'kumaraswamy', 'laplace', 'lkj_cholesky', 'log_normal', 'logistic_normal', 'lowrank_multivariate_normal', 'mixture_same_family', 'multinomial', 'multivariate_normal', 'negative_binomial', 'normal', 'one_hot_categorical', 'pareto', 'poisson', 'register_kl', 'relaxed_bernoulli', 'relaxed_categorical', 'studentT', 'transform_to', 'transformed_distribution', 'transforms', 'uniform', 'utils', 'von_mises', 'weibull', 'wishart']\n"
 50 |      ]
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "import torch\n",
 55 |     "\n",
 56 |     "print(dir(torch.distributions))"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "id": "4b506160",
 62 |    "metadata": {
 63 |     "slideshow": {
 64 |      "slide_type": "slide"
 65 |     }
 66 |    },
 67 |    "source": [
 68 |     "查看张量`ones`函数的用法"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 2,
 74 |    "id": "a16494ed",
 75 |    "metadata": {
 76 |     "execution": {
 77 |      "iopub.execute_input": "2023-08-18T07:05:31.473606Z",
 78 |      "iopub.status.busy": "2023-08-18T07:05:31.472946Z",
 79 |      "iopub.status.idle": "2023-08-18T07:05:31.477780Z",
 80 |      "shell.execute_reply": "2023-08-18T07:05:31.476938Z"
 81 |     },
 82 |     "origin_pos": 11,
 83 |     "tab": [
 84 |      "pytorch"
 85 |     ]
 86 |    },
 87 |    "outputs": [
 88 |     {
 89 |      "name": "stdout",
 90 |      "output_type": "stream",
 91 |      "text": [
 92 |       "Help on built-in function ones in module torch:\n",
 93 |       "\n",
 94 |       "ones(...)\n",
 95 |       "    ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor\n",
 96 |       "    \n",
 97 |       "    Returns a tensor filled with the scalar value `1`, with the shape defined\n",
 98 |       "    by the variable argument :attr:`size`.\n",
 99 |       "    \n",
100 |       "    Args:\n",
101 |       "        size (int...): a sequence of integers defining the shape of the output tensor.\n",
102 |       "            Can be a variable number of arguments or a collection like a list or tuple.\n",
103 |       "    \n",
104 |       "    Keyword arguments:\n",
105 |       "        out (Tensor, optional): the output tensor.\n",
106 |       "        dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.\n",
107 |       "            Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).\n",
108 |       "        layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.\n",
109 |       "            Default: ``torch.strided``.\n",
110 |       "        device (:class:`torch.device`, optional): the desired device of returned tensor.\n",
111 |       "            Default: if ``None``, uses the current device for the default tensor type\n",
112 |       "            (see :func:`torch.set_default_tensor_type`). :attr:`device` will be the CPU\n",
113 |       "            for CPU tensor types and the current CUDA device for CUDA tensor types.\n",
114 |       "        requires_grad (bool, optional): If autograd should record operations on the\n",
115 |       "            returned tensor. Default: ``False``.\n",
116 |       "    \n",
117 |       "    Example::\n",
118 |       "    \n",
119 |       "        >>> torch.ones(2, 3)\n",
120 |       "        tensor([[ 1.,  1.,  1.],\n",
121 |       "                [ 1.,  1.,  1.]])\n",
122 |       "    \n",
123 |       "        >>> torch.ones(5)\n",
124 |       "        tensor([ 1.,  1.,  1.,  1.,  1.])\n",
125 |       "\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "help(torch.ones)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "id": "5bb71da8",
136 |    "metadata": {
137 |     "slideshow": {
138 |      "slide_type": "slide"
139 |     }
140 |    },
141 |    "source": [
142 |     "运行一个快速测试"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 3,
148 |    "id": "7870b2f5",
149 |    "metadata": {
150 |     "execution": {
151 |      "iopub.execute_input": "2023-08-18T07:05:31.481310Z",
152 |      "iopub.status.busy": "2023-08-18T07:05:31.480685Z",
153 |      "iopub.status.idle": "2023-08-18T07:05:31.490398Z",
154 |      "shell.execute_reply": "2023-08-18T07:05:31.489581Z"
155 |     },
156 |     "origin_pos": 16,
157 |     "tab": [
158 |      "pytorch"
159 |     ]
160 |    },
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "tensor([1., 1., 1., 1.])"
166 |       ]
167 |      },
168 |      "execution_count": 3,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "torch.ones(4)"
175 |    ]
176 |   }
177 |  ],
178 |  "metadata": {
179 |   "celltoolbar": "Slideshow",
180 |   "language_info": {
181 |    "name": "python"
182 |   },
183 |   "required_libs": [],
184 |   "rise": {
185 |    "autolaunch": true,
186 |    "enable_chalkboard": true,
187 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
188 |    "scroll": true
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 5
193 | }


--------------------------------------------------------------------------------
/chapter_preliminaries/ndarray.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "ee4089f8",
   6 |    "metadata": {
   7 |     "slideshow": {
   8 |      "slide_type": "-"
   9 |     }
  10 |    },
  11 |    "source": [
  12 |     "# 数据操作\n",
  13 |     "\n"
  14 |    ]
  15 |   },
  16 |   {
  17 |    "cell_type": "markdown",
  18 |    "id": "52ec48cf",
  19 |    "metadata": {
  20 |     "slideshow": {
  21 |      "slide_type": "-"
  22 |     }
  23 |    },
  24 |    "source": [
  25 |     "首先，我们导入`torch`。请注意，虽然它被称为PyTorch，但是代码中使用`torch`而不是`pytorch`"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "code",
  30 |    "execution_count": 1,
  31 |    "id": "278e6d3f",
  32 |    "metadata": {
  33 |     "execution": {
  34 |      "iopub.execute_input": "2023-08-18T07:05:01.545874Z",
  35 |      "iopub.status.busy": "2023-08-18T07:05:01.545147Z",
  36 |      "iopub.status.idle": "2023-08-18T07:05:02.992816Z",
  37 |      "shell.execute_reply": "2023-08-18T07:05:02.991719Z"
  38 |     },
  39 |     "origin_pos": 5,
  40 |     "tab": [
  41 |      "pytorch"
  42 |     ]
  43 |    },
  44 |    "outputs": [],
  45 |    "source": [
  46 |     "import torch"
  47 |    ]
  48 |   },
  49 |   {
  50 |    "cell_type": "markdown",
  51 |    "id": "8f89b9c0",
  52 |    "metadata": {
  53 |     "slideshow": {
  54 |      "slide_type": "slide"
  55 |     }
  56 |    },
  57 |    "source": [
  58 |     "张量表示一个由数值组成的数组，这个数组可能有多个维度"
  59 |    ]
  60 |   },
  61 |   {
  62 |    "cell_type": "code",
  63 |    "execution_count": 2,
  64 |    "id": "b1700627",
  65 |    "metadata": {
  66 |     "execution": {
  67 |      "iopub.execute_input": "2023-08-18T07:05:02.997386Z",
  68 |      "iopub.status.busy": "2023-08-18T07:05:02.996970Z",
  69 |      "iopub.status.idle": "2023-08-18T07:05:03.007632Z",
  70 |      "shell.execute_reply": "2023-08-18T07:05:03.006483Z"
  71 |     },
  72 |     "origin_pos": 13,
  73 |     "tab": [
  74 |      "pytorch"
  75 |     ]
  76 |    },
  77 |    "outputs": [
  78 |     {
  79 |      "data": {
  80 |       "text/plain": [
  81 |        "tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])"
  82 |       ]
  83 |      },
  84 |      "execution_count": 2,
  85 |      "metadata": {},
  86 |      "output_type": "execute_result"
  87 |     }
  88 |    ],
  89 |    "source": [
  90 |     "x = torch.arange(12)\n",
  91 |     "x"
  92 |    ]
  93 |   },
  94 |   {
  95 |    "cell_type": "markdown",
  96 |    "id": "8ed94c74",
  97 |    "metadata": {
  98 |     "slideshow": {
  99 |      "slide_type": "slide"
 100 |     }
 101 |    },
 102 |    "source": [
 103 |     "可以通过张量的`shape`属性来访问张量（沿每个轴的长度）的*形状*\n",
 104 |     "和张量中元素的总数"
 105 |    ]
 106 |   },
 107 |   {
 108 |    "cell_type": "code",
 109 |    "execution_count": 3,
 110 |    "id": "b86b6572",
 111 |    "metadata": {
 112 |     "execution": {
 113 |      "iopub.execute_input": "2023-08-18T07:05:03.011628Z",
 114 |      "iopub.status.busy": "2023-08-18T07:05:03.011110Z",
 115 |      "iopub.status.idle": "2023-08-18T07:05:03.017191Z",
 116 |      "shell.execute_reply": "2023-08-18T07:05:03.016193Z"
 117 |     },
 118 |     "origin_pos": 17,
 119 |     "tab": [
 120 |      "pytorch"
 121 |     ]
 122 |    },
 123 |    "outputs": [
 124 |     {
 125 |      "data": {
 126 |       "text/plain": [
 127 |        "torch.Size([12])"
 128 |       ]
 129 |      },
 130 |      "execution_count": 3,
 131 |      "metadata": {},
 132 |      "output_type": "execute_result"
 133 |     }
 134 |    ],
 135 |    "source": [
 136 |     "x.shape"
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": 4,
 142 |    "id": "b8b69ca9",
 143 |    "metadata": {
 144 |     "execution": {
 145 |      "iopub.execute_input": "2023-08-18T07:05:03.020938Z",
 146 |      "iopub.status.busy": "2023-08-18T07:05:03.020483Z",
 147 |      "iopub.status.idle": "2023-08-18T07:05:03.026998Z",
 148 |      "shell.execute_reply": "2023-08-18T07:05:03.025752Z"
 149 |     },
 150 |     "origin_pos": 20,
 151 |     "tab": [
 152 |      "pytorch"
 153 |     ]
 154 |    },
 155 |    "outputs": [
 156 |     {
 157 |      "data": {
 158 |       "text/plain": [
 159 |        "12"
 160 |       ]
 161 |      },
 162 |      "execution_count": 4,
 163 |      "metadata": {},
 164 |      "output_type": "execute_result"
 165 |     }
 166 |    ],
 167 |    "source": [
 168 |     "x.numel()"
 169 |    ]
 170 |   },
 171 |   {
 172 |    "cell_type": "markdown",
 173 |    "id": "588600c4",
 174 |    "metadata": {
 175 |     "slideshow": {
 176 |      "slide_type": "slide"
 177 |     }
 178 |    },
 179 |    "source": [
 180 |     "要想改变一个张量的形状而不改变元素数量和元素值，可以调用`reshape`函数"
 181 |    ]
 182 |   },
 183 |   {
 184 |    "cell_type": "code",
 185 |    "execution_count": 5,
 186 |    "id": "0f294243",
 187 |    "metadata": {
 188 |     "execution": {
 189 |      "iopub.execute_input": "2023-08-18T07:05:03.031842Z",
 190 |      "iopub.status.busy": "2023-08-18T07:05:03.031448Z",
 191 |      "iopub.status.idle": "2023-08-18T07:05:03.039288Z",
 192 |      "shell.execute_reply": "2023-08-18T07:05:03.038227Z"
 193 |     },
 194 |     "origin_pos": 24,
 195 |     "tab": [
 196 |      "pytorch"
 197 |     ]
 198 |    },
 199 |    "outputs": [
 200 |     {
 201 |      "data": {
 202 |       "text/plain": [
 203 |        "tensor([[ 0,  1,  2,  3],\n",
 204 |        "        [ 4,  5,  6,  7],\n",
 205 |        "        [ 8,  9, 10, 11]])"
 206 |       ]
 207 |      },
 208 |      "execution_count": 5,
 209 |      "metadata": {},
 210 |      "output_type": "execute_result"
 211 |     }
 212 |    ],
 213 |    "source": [
 214 |     "X = x.reshape(3, 4)\n",
 215 |     "X"
 216 |    ]
 217 |   },
 218 |   {
 219 |    "cell_type": "markdown",
 220 |    "id": "403fad02",
 221 |    "metadata": {
 222 |     "slideshow": {
 223 |      "slide_type": "slide"
 224 |     }
 225 |    },
 226 |    "source": [
 227 |     "使用全0、全1、其他常量，或者从特定分布中随机采样的数字"
 228 |    ]
 229 |   },
 230 |   {
 231 |    "cell_type": "code",
 232 |    "execution_count": 6,
 233 |    "id": "b23c3056",
 234 |    "metadata": {
 235 |     "execution": {
 236 |      "iopub.execute_input": "2023-08-18T07:05:03.044733Z",
 237 |      "iopub.status.busy": "2023-08-18T07:05:03.043866Z",
 238 |      "iopub.status.idle": "2023-08-18T07:05:03.052195Z",
 239 |      "shell.execute_reply": "2023-08-18T07:05:03.051146Z"
 240 |     },
 241 |     "origin_pos": 29,
 242 |     "tab": [
 243 |      "pytorch"
 244 |     ]
 245 |    },
 246 |    "outputs": [
 247 |     {
 248 |      "data": {
 249 |       "text/plain": [
 250 |        "tensor([[[0., 0., 0., 0.],\n",
 251 |        "         [0., 0., 0., 0.],\n",
 252 |        "         [0., 0., 0., 0.]],\n",
 253 |        "\n",
 254 |        "        [[0., 0., 0., 0.],\n",
 255 |        "         [0., 0., 0., 0.],\n",
 256 |        "         [0., 0., 0., 0.]]])"
 257 |       ]
 258 |      },
 259 |      "execution_count": 6,
 260 |      "metadata": {},
 261 |      "output_type": "execute_result"
 262 |     }
 263 |    ],
 264 |    "source": [
 265 |     "torch.zeros((2, 3, 4))"
 266 |    ]
 267 |   },
 268 |   {
 269 |    "cell_type": "code",
 270 |    "execution_count": 7,
 271 |    "id": "25981960",
 272 |    "metadata": {
 273 |     "execution": {
 274 |      "iopub.execute_input": "2023-08-18T07:05:03.057264Z",
 275 |      "iopub.status.busy": "2023-08-18T07:05:03.056578Z",
 276 |      "iopub.status.idle": "2023-08-18T07:05:03.064973Z",
 277 |      "shell.execute_reply": "2023-08-18T07:05:03.063853Z"
 278 |     },
 279 |     "origin_pos": 34,
 280 |     "tab": [
 281 |      "pytorch"
 282 |     ]
 283 |    },
 284 |    "outputs": [
 285 |     {
 286 |      "data": {
 287 |       "text/plain": [
 288 |        "tensor([[[1., 1., 1., 1.],\n",
 289 |        "         [1., 1., 1., 1.],\n",
 290 |        "         [1., 1., 1., 1.]],\n",
 291 |        "\n",
 292 |        "        [[1., 1., 1., 1.],\n",
 293 |        "         [1., 1., 1., 1.],\n",
 294 |        "         [1., 1., 1., 1.]]])"
 295 |       ]
 296 |      },
 297 |      "execution_count": 7,
 298 |      "metadata": {},
 299 |      "output_type": "execute_result"
 300 |     }
 301 |    ],
 302 |    "source": [
 303 |     "torch.ones((2, 3, 4))"
 304 |    ]
 305 |   },
 306 |   {
 307 |    "cell_type": "code",
 308 |    "execution_count": 8,
 309 |    "id": "2493f09a",
 310 |    "metadata": {
 311 |     "execution": {
 312 |      "iopub.execute_input": "2023-08-18T07:05:03.069946Z",
 313 |      "iopub.status.busy": "2023-08-18T07:05:03.069231Z",
 314 |      "iopub.status.idle": "2023-08-18T07:05:03.077304Z",
 315 |      "shell.execute_reply": "2023-08-18T07:05:03.076139Z"
 316 |     },
 317 |     "origin_pos": 39,
 318 |     "tab": [
 319 |      "pytorch"
 320 |     ]
 321 |    },
 322 |    "outputs": [
 323 |     {
 324 |      "data": {
 325 |       "text/plain": [
 326 |        "tensor([[-0.0135,  0.0665,  0.0912,  0.3212],\n",
 327 |        "        [ 1.4653,  0.1843, -1.6995, -0.3036],\n",
 328 |        "        [ 1.7646,  1.0450,  0.2457, -0.7732]])"
 329 |       ]
 330 |      },
 331 |      "execution_count": 8,
 332 |      "metadata": {},
 333 |      "output_type": "execute_result"
 334 |     }
 335 |    ],
 336 |    "source": [
 337 |     "torch.randn(3, 4)"
 338 |    ]
 339 |   },
 340 |   {
 341 |    "cell_type": "markdown",
 342 |    "id": "211d5b8e",
 343 |    "metadata": {
 344 |     "slideshow": {
 345 |      "slide_type": "slide"
 346 |     }
 347 |    },
 348 |    "source": [
 349 |     "通过提供包含数值的Python列表（或嵌套列表），来为所需张量中的每个元素赋予确定值"
 350 |    ]
 351 |   },
 352 |   {
 353 |    "cell_type": "code",
 354 |    "execution_count": 9,
 355 |    "id": "708be494",
 356 |    "metadata": {
 357 |     "execution": {
 358 |      "iopub.execute_input": "2023-08-18T07:05:03.082360Z",
 359 |      "iopub.status.busy": "2023-08-18T07:05:03.081424Z",
 360 |      "iopub.status.idle": "2023-08-18T07:05:03.090148Z",
 361 |      "shell.execute_reply": "2023-08-18T07:05:03.088973Z"
 362 |     },
 363 |     "origin_pos": 44,
 364 |     "tab": [
 365 |      "pytorch"
 366 |     ]
 367 |    },
 368 |    "outputs": [
 369 |     {
 370 |      "data": {
 371 |       "text/plain": [
 372 |        "tensor([[2, 1, 4, 3],\n",
 373 |        "        [1, 2, 3, 4],\n",
 374 |        "        [4, 3, 2, 1]])"
 375 |       ]
 376 |      },
 377 |      "execution_count": 9,
 378 |      "metadata": {},
 379 |      "output_type": "execute_result"
 380 |     }
 381 |    ],
 382 |    "source": [
 383 |     "torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])"
 384 |    ]
 385 |   },
 386 |   {
 387 |    "cell_type": "markdown",
 388 |    "id": "14185106",
 389 |    "metadata": {
 390 |     "slideshow": {
 391 |      "slide_type": "slide"
 392 |     }
 393 |    },
 394 |    "source": [
 395 |     "常见的标准算术运算符（`+`、`-`、`*`、`/`和`**`）都可以被升级为按元素运算"
 396 |    ]
 397 |   },
 398 |   {
 399 |    "cell_type": "code",
 400 |    "execution_count": 10,
 401 |    "id": "99b28553",
 402 |    "metadata": {
 403 |     "execution": {
 404 |      "iopub.execute_input": "2023-08-18T07:05:03.095504Z",
 405 |      "iopub.status.busy": "2023-08-18T07:05:03.094688Z",
 406 |      "iopub.status.idle": "2023-08-18T07:05:03.106084Z",
 407 |      "shell.execute_reply": "2023-08-18T07:05:03.104976Z"
 408 |     },
 409 |     "origin_pos": 49,
 410 |     "tab": [
 411 |      "pytorch"
 412 |     ]
 413 |    },
 414 |    "outputs": [
 415 |     {
 416 |      "data": {
 417 |       "text/plain": [
 418 |        "(tensor([ 3.,  4.,  6., 10.]),\n",
 419 |        " tensor([-1.,  0.,  2.,  6.]),\n",
 420 |        " tensor([ 2.,  4.,  8., 16.]),\n",
 421 |        " tensor([0.5000, 1.0000, 2.0000, 4.0000]),\n",
 422 |        " tensor([ 1.,  4., 16., 64.]))"
 423 |       ]
 424 |      },
 425 |      "execution_count": 10,
 426 |      "metadata": {},
 427 |      "output_type": "execute_result"
 428 |     }
 429 |    ],
 430 |    "source": [
 431 |     "x = torch.tensor([1.0, 2, 4, 8])\n",
 432 |     "y = torch.tensor([2, 2, 2, 2])\n",
 433 |     "x + y, x - y, x * y, x / y, x ** y"
 434 |    ]
 435 |   },
 436 |   {
 437 |    "cell_type": "markdown",
 438 |    "id": "7e770d36",
 439 |    "metadata": {
 440 |     "slideshow": {
 441 |      "slide_type": "-"
 442 |     }
 443 |    },
 444 |    "source": [
 445 |     "“按元素”方式可以应用更多的计算"
 446 |    ]
 447 |   },
 448 |   {
 449 |    "cell_type": "code",
 450 |    "execution_count": 11,
 451 |    "id": "ef07c995",
 452 |    "metadata": {
 453 |     "execution": {
 454 |      "iopub.execute_input": "2023-08-18T07:05:03.110973Z",
 455 |      "iopub.status.busy": "2023-08-18T07:05:03.110221Z",
 456 |      "iopub.status.idle": "2023-08-18T07:05:03.120389Z",
 457 |      "shell.execute_reply": "2023-08-18T07:05:03.119471Z"
 458 |     },
 459 |     "origin_pos": 54,
 460 |     "tab": [
 461 |      "pytorch"
 462 |     ]
 463 |    },
 464 |    "outputs": [
 465 |     {
 466 |      "data": {
 467 |       "text/plain": [
 468 |        "tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])"
 469 |       ]
 470 |      },
 471 |      "execution_count": 11,
 472 |      "metadata": {},
 473 |      "output_type": "execute_result"
 474 |     }
 475 |    ],
 476 |    "source": [
 477 |     "torch.exp(x)"
 478 |    ]
 479 |   },
 480 |   {
 481 |    "cell_type": "markdown",
 482 |    "id": "e8a09ecd",
 483 |    "metadata": {
 484 |     "slideshow": {
 485 |      "slide_type": "slide"
 486 |     }
 487 |    },
 488 |    "source": [
 489 |     "我们也可以把多个张量*连结*（concatenate）在一起"
 490 |    ]
 491 |   },
 492 |   {
 493 |    "cell_type": "code",
 494 |    "execution_count": 12,
 495 |    "id": "a583b891",
 496 |    "metadata": {
 497 |     "execution": {
 498 |      "iopub.execute_input": "2023-08-18T07:05:03.125263Z",
 499 |      "iopub.status.busy": "2023-08-18T07:05:03.124477Z",
 500 |      "iopub.status.idle": "2023-08-18T07:05:03.136328Z",
 501 |      "shell.execute_reply": "2023-08-18T07:05:03.135199Z"
 502 |     },
 503 |     "origin_pos": 59,
 504 |     "tab": [
 505 |      "pytorch"
 506 |     ]
 507 |    },
 508 |    "outputs": [
 509 |     {
 510 |      "data": {
 511 |       "text/plain": [
 512 |        "(tensor([[ 0.,  1.,  2.,  3.],\n",
 513 |        "         [ 4.,  5.,  6.,  7.],\n",
 514 |        "         [ 8.,  9., 10., 11.],\n",
 515 |        "         [ 2.,  1.,  4.,  3.],\n",
 516 |        "         [ 1.,  2.,  3.,  4.],\n",
 517 |        "         [ 4.,  3.,  2.,  1.]]),\n",
 518 |        " tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],\n",
 519 |        "         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],\n",
 520 |        "         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))"
 521 |       ]
 522 |      },
 523 |      "execution_count": 12,
 524 |      "metadata": {},
 525 |      "output_type": "execute_result"
 526 |     }
 527 |    ],
 528 |    "source": [
 529 |     "X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n",
 530 |     "Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])\n",
 531 |     "torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)"
 532 |    ]
 533 |   },
 534 |   {
 535 |    "cell_type": "markdown",
 536 |    "id": "fac225a4",
 537 |    "metadata": {
 538 |     "slideshow": {
 539 |      "slide_type": "slide"
 540 |     }
 541 |    },
 542 |    "source": [
 543 |     "通过*逻辑运算符*构建二元张量"
 544 |    ]
 545 |   },
 546 |   {
 547 |    "cell_type": "code",
 548 |    "execution_count": 13,
 549 |    "id": "6405ec63",
 550 |    "metadata": {
 551 |     "execution": {
 552 |      "iopub.execute_input": "2023-08-18T07:05:03.141449Z",
 553 |      "iopub.status.busy": "2023-08-18T07:05:03.140776Z",
 554 |      "iopub.status.idle": "2023-08-18T07:05:03.148692Z",
 555 |      "shell.execute_reply": "2023-08-18T07:05:03.147491Z"
 556 |     },
 557 |     "origin_pos": 63,
 558 |     "tab": [
 559 |      "pytorch"
 560 |     ]
 561 |    },
 562 |    "outputs": [
 563 |     {
 564 |      "data": {
 565 |       "text/plain": [
 566 |        "tensor([[False,  True, False,  True],\n",
 567 |        "        [False, False, False, False],\n",
 568 |        "        [False, False, False, False]])"
 569 |       ]
 570 |      },
 571 |      "execution_count": 13,
 572 |      "metadata": {},
 573 |      "output_type": "execute_result"
 574 |     }
 575 |    ],
 576 |    "source": [
 577 |     "X == Y"
 578 |    ]
 579 |   },
 580 |   {
 581 |    "cell_type": "markdown",
 582 |    "id": "ab37c495",
 583 |    "metadata": {
 584 |     "slideshow": {
 585 |      "slide_type": "slide"
 586 |     }
 587 |    },
 588 |    "source": [
 589 |     "对张量中的所有元素进行求和，会产生一个单元素张量"
 590 |    ]
 591 |   },
 592 |   {
 593 |    "cell_type": "code",
 594 |    "execution_count": 14,
 595 |    "id": "a13cb291",
 596 |    "metadata": {
 597 |     "execution": {
 598 |      "iopub.execute_input": "2023-08-18T07:05:03.153907Z",
 599 |      "iopub.status.busy": "2023-08-18T07:05:03.152814Z",
 600 |      "iopub.status.idle": "2023-08-18T07:05:03.160277Z",
 601 |      "shell.execute_reply": "2023-08-18T07:05:03.159188Z"
 602 |     },
 603 |     "origin_pos": 65,
 604 |     "tab": [
 605 |      "pytorch"
 606 |     ]
 607 |    },
 608 |    "outputs": [
 609 |     {
 610 |      "data": {
 611 |       "text/plain": [
 612 |        "tensor(66.)"
 613 |       ]
 614 |      },
 615 |      "execution_count": 14,
 616 |      "metadata": {},
 617 |      "output_type": "execute_result"
 618 |     }
 619 |    ],
 620 |    "source": [
 621 |     "X.sum()"
 622 |    ]
 623 |   },
 624 |   {
 625 |    "cell_type": "markdown",
 626 |    "id": "63f0408f",
 627 |    "metadata": {
 628 |     "slideshow": {
 629 |      "slide_type": "slide"
 630 |     }
 631 |    },
 632 |    "source": [
 633 |     "即使形状不同，我们仍然可以通过调用\n",
 634 |     "*广播机制*（broadcasting mechanism）来执行按元素操作"
 635 |    ]
 636 |   },
 637 |   {
 638 |    "cell_type": "code",
 639 |    "execution_count": 15,
 640 |    "id": "a1de79a2",
 641 |    "metadata": {
 642 |     "execution": {
 643 |      "iopub.execute_input": "2023-08-18T07:05:03.165305Z",
 644 |      "iopub.status.busy": "2023-08-18T07:05:03.164274Z",
 645 |      "iopub.status.idle": "2023-08-18T07:05:03.172771Z",
 646 |      "shell.execute_reply": "2023-08-18T07:05:03.171692Z"
 647 |     },
 648 |     "origin_pos": 69,
 649 |     "tab": [
 650 |      "pytorch"
 651 |     ]
 652 |    },
 653 |    "outputs": [
 654 |     {
 655 |      "data": {
 656 |       "text/plain": [
 657 |        "(tensor([[0],\n",
 658 |        "         [1],\n",
 659 |        "         [2]]),\n",
 660 |        " tensor([[0, 1]]))"
 661 |       ]
 662 |      },
 663 |      "execution_count": 15,
 664 |      "metadata": {},
 665 |      "output_type": "execute_result"
 666 |     }
 667 |    ],
 668 |    "source": [
 669 |     "a = torch.arange(3).reshape((3, 1))\n",
 670 |     "b = torch.arange(2).reshape((1, 2))\n",
 671 |     "a, b"
 672 |    ]
 673 |   },
 674 |   {
 675 |    "cell_type": "code",
 676 |    "execution_count": 16,
 677 |    "id": "4d8904b1",
 678 |    "metadata": {
 679 |     "execution": {
 680 |      "iopub.execute_input": "2023-08-18T07:05:03.177900Z",
 681 |      "iopub.status.busy": "2023-08-18T07:05:03.176935Z",
 682 |      "iopub.status.idle": "2023-08-18T07:05:03.184212Z",
 683 |      "shell.execute_reply": "2023-08-18T07:05:03.183156Z"
 684 |     },
 685 |     "origin_pos": 73,
 686 |     "tab": [
 687 |      "pytorch"
 688 |     ]
 689 |    },
 690 |    "outputs": [
 691 |     {
 692 |      "data": {
 693 |       "text/plain": [
 694 |        "tensor([[0, 1],\n",
 695 |        "        [1, 2],\n",
 696 |        "        [2, 3]])"
 697 |       ]
 698 |      },
 699 |      "execution_count": 16,
 700 |      "metadata": {},
 701 |      "output_type": "execute_result"
 702 |     }
 703 |    ],
 704 |    "source": [
 705 |     "a + b"
 706 |    ]
 707 |   },
 708 |   {
 709 |    "cell_type": "markdown",
 710 |    "id": "16a2cb7b",
 711 |    "metadata": {
 712 |     "slideshow": {
 713 |      "slide_type": "slide"
 714 |     }
 715 |    },
 716 |    "source": [
 717 |     "可以用`[-1]`选择最后一个元素，可以用`[1:3]`选择第二个和第三个元素"
 718 |    ]
 719 |   },
 720 |   {
 721 |    "cell_type": "code",
 722 |    "execution_count": 17,
 723 |    "id": "b62b00c7",
 724 |    "metadata": {
 725 |     "execution": {
 726 |      "iopub.execute_input": "2023-08-18T07:05:03.189786Z",
 727 |      "iopub.status.busy": "2023-08-18T07:05:03.188961Z",
 728 |      "iopub.status.idle": "2023-08-18T07:05:03.197712Z",
 729 |      "shell.execute_reply": "2023-08-18T07:05:03.196559Z"
 730 |     },
 731 |     "origin_pos": 75,
 732 |     "tab": [
 733 |      "pytorch"
 734 |     ]
 735 |    },
 736 |    "outputs": [
 737 |     {
 738 |      "data": {
 739 |       "text/plain": [
 740 |        "(tensor([ 8.,  9., 10., 11.]),\n",
 741 |        " tensor([[ 4.,  5.,  6.,  7.],\n",
 742 |        "         [ 8.,  9., 10., 11.]]))"
 743 |       ]
 744 |      },
 745 |      "execution_count": 17,
 746 |      "metadata": {},
 747 |      "output_type": "execute_result"
 748 |     }
 749 |    ],
 750 |    "source": [
 751 |     "X[-1], X[1:3]"
 752 |    ]
 753 |   },
 754 |   {
 755 |    "cell_type": "markdown",
 756 |    "id": "9d9f38fe",
 757 |    "metadata": {
 758 |     "slideshow": {
 759 |      "slide_type": "slide"
 760 |     }
 761 |    },
 762 |    "source": [
 763 |     "除读取外，我们还可以通过指定索引来将元素写入矩阵"
 764 |    ]
 765 |   },
 766 |   {
 767 |    "cell_type": "code",
 768 |    "execution_count": 18,
 769 |    "id": "56a8261a",
 770 |    "metadata": {
 771 |     "execution": {
 772 |      "iopub.execute_input": "2023-08-18T07:05:03.203157Z",
 773 |      "iopub.status.busy": "2023-08-18T07:05:03.202390Z",
 774 |      "iopub.status.idle": "2023-08-18T07:05:03.210176Z",
 775 |      "shell.execute_reply": "2023-08-18T07:05:03.209097Z"
 776 |     },
 777 |     "origin_pos": 78,
 778 |     "tab": [
 779 |      "pytorch"
 780 |     ]
 781 |    },
 782 |    "outputs": [
 783 |     {
 784 |      "data": {
 785 |       "text/plain": [
 786 |        "tensor([[ 0.,  1.,  2.,  3.],\n",
 787 |        "        [ 4.,  5.,  9.,  7.],\n",
 788 |        "        [ 8.,  9., 10., 11.]])"
 789 |       ]
 790 |      },
 791 |      "execution_count": 18,
 792 |      "metadata": {},
 793 |      "output_type": "execute_result"
 794 |     }
 795 |    ],
 796 |    "source": [
 797 |     "X[1, 2] = 9\n",
 798 |     "X"
 799 |    ]
 800 |   },
 801 |   {
 802 |    "cell_type": "markdown",
 803 |    "id": "70b1b082",
 804 |    "metadata": {
 805 |     "slideshow": {
 806 |      "slide_type": "slide"
 807 |     }
 808 |    },
 809 |    "source": [
 810 |     "为多个元素赋值相同的值，我们只需要索引所有元素，然后为它们赋值"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "code",
 815 |    "execution_count": 19,
 816 |    "id": "bd48bae9",
 817 |    "metadata": {
 818 |     "execution": {
 819 |      "iopub.execute_input": "2023-08-18T07:05:03.214118Z",
 820 |      "iopub.status.busy": "2023-08-18T07:05:03.213430Z",
 821 |      "iopub.status.idle": "2023-08-18T07:05:03.221215Z",
 822 |      "shell.execute_reply": "2023-08-18T07:05:03.220084Z"
 823 |     },
 824 |     "origin_pos": 81,
 825 |     "tab": [
 826 |      "pytorch"
 827 |     ]
 828 |    },
 829 |    "outputs": [
 830 |     {
 831 |      "data": {
 832 |       "text/plain": [
 833 |        "tensor([[12., 12., 12., 12.],\n",
 834 |        "        [12., 12., 12., 12.],\n",
 835 |        "        [ 8.,  9., 10., 11.]])"
 836 |       ]
 837 |      },
 838 |      "execution_count": 19,
 839 |      "metadata": {},
 840 |      "output_type": "execute_result"
 841 |     }
 842 |    ],
 843 |    "source": [
 844 |     "X[0:2, :] = 12\n",
 845 |     "X"
 846 |    ]
 847 |   },
 848 |   {
 849 |    "cell_type": "markdown",
 850 |    "id": "9360fd66",
 851 |    "metadata": {
 852 |     "slideshow": {
 853 |      "slide_type": "slide"
 854 |     }
 855 |    },
 856 |    "source": [
 857 |     "运行一些操作可能会导致为新结果分配内存"
 858 |    ]
 859 |   },
 860 |   {
 861 |    "cell_type": "code",
 862 |    "execution_count": 20,
 863 |    "id": "6bcd6d07",
 864 |    "metadata": {
 865 |     "execution": {
 866 |      "iopub.execute_input": "2023-08-18T07:05:03.225106Z",
 867 |      "iopub.status.busy": "2023-08-18T07:05:03.224353Z",
 868 |      "iopub.status.idle": "2023-08-18T07:05:03.231715Z",
 869 |      "shell.execute_reply": "2023-08-18T07:05:03.230626Z"
 870 |     },
 871 |     "origin_pos": 84,
 872 |     "tab": [
 873 |      "pytorch"
 874 |     ]
 875 |    },
 876 |    "outputs": [
 877 |     {
 878 |      "data": {
 879 |       "text/plain": [
 880 |        "False"
 881 |       ]
 882 |      },
 883 |      "execution_count": 20,
 884 |      "metadata": {},
 885 |      "output_type": "execute_result"
 886 |     }
 887 |    ],
 888 |    "source": [
 889 |     "before = id(Y)\n",
 890 |     "Y = Y + X\n",
 891 |     "id(Y) == before"
 892 |    ]
 893 |   },
 894 |   {
 895 |    "cell_type": "markdown",
 896 |    "id": "1764d724",
 897 |    "metadata": {
 898 |     "slideshow": {
 899 |      "slide_type": "-"
 900 |     }
 901 |    },
 902 |    "source": [
 903 |     "执行原地操作"
 904 |    ]
 905 |   },
 906 |   {
 907 |    "cell_type": "code",
 908 |    "execution_count": 21,
 909 |    "id": "13b7fdf6",
 910 |    "metadata": {
 911 |     "execution": {
 912 |      "iopub.execute_input": "2023-08-18T07:05:03.236933Z",
 913 |      "iopub.status.busy": "2023-08-18T07:05:03.236016Z",
 914 |      "iopub.status.idle": "2023-08-18T07:05:03.243252Z",
 915 |      "shell.execute_reply": "2023-08-18T07:05:03.242153Z"
 916 |     },
 917 |     "origin_pos": 89,
 918 |     "tab": [
 919 |      "pytorch"
 920 |     ]
 921 |    },
 922 |    "outputs": [
 923 |     {
 924 |      "name": "stdout",
 925 |      "output_type": "stream",
 926 |      "text": [
 927 |       "id(Z): 140327634811696\n",
 928 |       "id(Z): 140327634811696\n"
 929 |      ]
 930 |     }
 931 |    ],
 932 |    "source": [
 933 |     "Z = torch.zeros_like(Y)\n",
 934 |     "print('id(Z):', id(Z))\n",
 935 |     "Z[:] = X + Y\n",
 936 |     "print('id(Z):', id(Z))"
 937 |    ]
 938 |   },
 939 |   {
 940 |    "cell_type": "markdown",
 941 |    "id": "08b6232e",
 942 |    "metadata": {
 943 |     "slideshow": {
 944 |      "slide_type": "slide"
 945 |     }
 946 |    },
 947 |    "source": [
 948 |     "如果在后续计算中没有重复使用`X`，\n",
 949 |     "我们也可以使用`X[:] = X + Y`或`X += Y`来减少操作的内存开销"
 950 |    ]
 951 |   },
 952 |   {
 953 |    "cell_type": "code",
 954 |    "execution_count": 22,
 955 |    "id": "c8a97d75",
 956 |    "metadata": {
 957 |     "execution": {
 958 |      "iopub.execute_input": "2023-08-18T07:05:03.248290Z",
 959 |      "iopub.status.busy": "2023-08-18T07:05:03.247521Z",
 960 |      "iopub.status.idle": "2023-08-18T07:05:03.255046Z",
 961 |      "shell.execute_reply": "2023-08-18T07:05:03.253935Z"
 962 |     },
 963 |     "origin_pos": 94,
 964 |     "tab": [
 965 |      "pytorch"
 966 |     ]
 967 |    },
 968 |    "outputs": [
 969 |     {
 970 |      "data": {
 971 |       "text/plain": [
 972 |        "True"
 973 |       ]
 974 |      },
 975 |      "execution_count": 22,
 976 |      "metadata": {},
 977 |      "output_type": "execute_result"
 978 |     }
 979 |    ],
 980 |    "source": [
 981 |     "before = id(X)\n",
 982 |     "X += Y\n",
 983 |     "id(X) == before"
 984 |    ]
 985 |   },
 986 |   {
 987 |    "cell_type": "markdown",
 988 |    "id": "62d8bda0",
 989 |    "metadata": {
 990 |     "slideshow": {
 991 |      "slide_type": "slide"
 992 |     }
 993 |    },
 994 |    "source": [
 995 |     "转换为NumPy张量（`ndarray`）"
 996 |    ]
 997 |   },
 998 |   {
 999 |    "cell_type": "code",
1000 |    "execution_count": 23,
1001 |    "id": "7386f580",
1002 |    "metadata": {
1003 |     "execution": {
1004 |      "iopub.execute_input": "2023-08-18T07:05:03.259655Z",
1005 |      "iopub.status.busy": "2023-08-18T07:05:03.259273Z",
1006 |      "iopub.status.idle": "2023-08-18T07:05:03.266501Z",
1007 |      "shell.execute_reply": "2023-08-18T07:05:03.265738Z"
1008 |     },
1009 |     "origin_pos": 100,
1010 |     "tab": [
1011 |      "pytorch"
1012 |     ]
1013 |    },
1014 |    "outputs": [
1015 |     {
1016 |      "data": {
1017 |       "text/plain": [
1018 |        "(numpy.ndarray, torch.Tensor)"
1019 |       ]
1020 |      },
1021 |      "execution_count": 23,
1022 |      "metadata": {},
1023 |      "output_type": "execute_result"
1024 |     }
1025 |    ],
1026 |    "source": [
1027 |     "A = X.numpy()\n",
1028 |     "B = torch.tensor(A)\n",
1029 |     "type(A), type(B)"
1030 |    ]
1031 |   },
1032 |   {
1033 |    "cell_type": "markdown",
1034 |    "id": "bd310838",
1035 |    "metadata": {
1036 |     "slideshow": {
1037 |      "slide_type": "-"
1038 |     }
1039 |    },
1040 |    "source": [
1041 |     "将大小为1的张量转换为Python标量"
1042 |    ]
1043 |   },
1044 |   {
1045 |    "cell_type": "code",
1046 |    "execution_count": 24,
1047 |    "id": "10a429bd",
1048 |    "metadata": {
1049 |     "execution": {
1050 |      "iopub.execute_input": "2023-08-18T07:05:03.270566Z",
1051 |      "iopub.status.busy": "2023-08-18T07:05:03.270102Z",
1052 |      "iopub.status.idle": "2023-08-18T07:05:03.276982Z",
1053 |      "shell.execute_reply": "2023-08-18T07:05:03.276051Z"
1054 |     },
1055 |     "origin_pos": 105,
1056 |     "tab": [
1057 |      "pytorch"
1058 |     ]
1059 |    },
1060 |    "outputs": [
1061 |     {
1062 |      "data": {
1063 |       "text/plain": [
1064 |        "(tensor([3.5000]), 3.5, 3.5, 3)"
1065 |       ]
1066 |      },
1067 |      "execution_count": 24,
1068 |      "metadata": {},
1069 |      "output_type": "execute_result"
1070 |     }
1071 |    ],
1072 |    "source": [
1073 |     "a = torch.tensor([3.5])\n",
1074 |     "a, a.item(), float(a), int(a)"
1075 |    ]
1076 |   }
1077 |  ],
1078 |  "metadata": {
1079 |   "celltoolbar": "Slideshow",
1080 |   "language_info": {
1081 |    "name": "python"
1082 |   },
1083 |   "required_libs": [],
1084 |   "rise": {
1085 |    "autolaunch": true,
1086 |    "enable_chalkboard": true,
1087 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
1088 |    "scroll": true
1089 |   }
1090 |  },
1091 |  "nbformat": 4,
1092 |  "nbformat_minor": 5
1093 | }


--------------------------------------------------------------------------------
/chapter_preliminaries/pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "bbe8a716",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 数据预处理\n",
 13 |     "\n",
 14 |     "创建一个人工数据集，并存储在CSV（逗号分隔值）文件"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "ee72fd16",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:03:38.903209Z",
 24 |      "iopub.status.busy": "2023-08-18T07:03:38.902351Z",
 25 |      "iopub.status.idle": "2023-08-18T07:03:38.918117Z",
 26 |      "shell.execute_reply": "2023-08-18T07:03:38.916775Z"
 27 |     },
 28 |     "origin_pos": 1,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "import os\n",
 36 |     "\n",
 37 |     "os.makedirs(os.path.join('..', 'data'), exist_ok=True)\n",
 38 |     "data_file = os.path.join('..', 'data', 'house_tiny.csv')\n",
 39 |     "with open(data_file, 'w') as f:\n",
 40 |     "    f.write('NumRooms,Alley,Price\\n')\n",
 41 |     "    f.write('NA,Pave,127500\\n')\n",
 42 |     "    f.write('2,NA,106000\\n')\n",
 43 |     "    f.write('4,NA,178100\\n')\n",
 44 |     "    f.write('NA,NA,140000\\n')"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "50063800",
 50 |    "metadata": {
 51 |     "slideshow": {
 52 |      "slide_type": "slide"
 53 |     }
 54 |    },
 55 |    "source": [
 56 |     "从创建的CSV文件中加载原始数据集"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "id": "5fb16e52",
 63 |    "metadata": {
 64 |     "execution": {
 65 |      "iopub.execute_input": "2023-08-18T07:03:38.923957Z",
 66 |      "iopub.status.busy": "2023-08-18T07:03:38.923101Z",
 67 |      "iopub.status.idle": "2023-08-18T07:03:39.372116Z",
 68 |      "shell.execute_reply": "2023-08-18T07:03:39.371151Z"
 69 |     },
 70 |     "origin_pos": 3,
 71 |     "tab": [
 72 |      "pytorch"
 73 |     ]
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "   NumRooms Alley   Price\n",
 81 |       "0       NaN  Pave  127500\n",
 82 |       "1       2.0   NaN  106000\n",
 83 |       "2       4.0   NaN  178100\n",
 84 |       "3       NaN   NaN  140000\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "import pandas as pd\n",
 90 |     "\n",
 91 |     "data = pd.read_csv(data_file)\n",
 92 |     "print(data)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "2a0d9ba1",
 98 |    "metadata": {
 99 |     "slideshow": {
100 |      "slide_type": "slide"
101 |     }
102 |    },
103 |    "source": [
104 |     "为了处理缺失的数据，典型的方法包括*插值法*和*删除法*，\n",
105 |     "这里，我们将考虑插值法"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 3,
111 |    "id": "d460a301",
112 |    "metadata": {
113 |     "execution": {
114 |      "iopub.execute_input": "2023-08-18T07:03:39.375828Z",
115 |      "iopub.status.busy": "2023-08-18T07:03:39.375535Z",
116 |      "iopub.status.idle": "2023-08-18T07:03:39.389220Z",
117 |      "shell.execute_reply": "2023-08-18T07:03:39.387998Z"
118 |     },
119 |     "origin_pos": 5,
120 |     "tab": [
121 |      "pytorch"
122 |     ]
123 |    },
124 |    "outputs": [
125 |     {
126 |      "name": "stdout",
127 |      "output_type": "stream",
128 |      "text": [
129 |       "   NumRooms Alley\n",
130 |       "0       3.0  Pave\n",
131 |       "1       2.0   NaN\n",
132 |       "2       4.0   NaN\n",
133 |       "3       3.0   NaN\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]\n",
139 |     "inputs = inputs.fillna(inputs.mean())\n",
140 |     "print(inputs)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "id": "13fbca82",
146 |    "metadata": {
147 |     "slideshow": {
148 |      "slide_type": "slide"
149 |     }
150 |    },
151 |    "source": [
152 |     "对于`inputs`中的类别值或离散值，我们将“NaN”视为一个类别"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 4,
158 |    "id": "09ab8738",
159 |    "metadata": {
160 |     "execution": {
161 |      "iopub.execute_input": "2023-08-18T07:03:39.394176Z",
162 |      "iopub.status.busy": "2023-08-18T07:03:39.393444Z",
163 |      "iopub.status.idle": "2023-08-18T07:03:39.409892Z",
164 |      "shell.execute_reply": "2023-08-18T07:03:39.408559Z"
165 |     },
166 |     "origin_pos": 7,
167 |     "tab": [
168 |      "pytorch"
169 |     ]
170 |    },
171 |    "outputs": [
172 |     {
173 |      "name": "stdout",
174 |      "output_type": "stream",
175 |      "text": [
176 |       "   NumRooms  Alley_Pave  Alley_nan\n",
177 |       "0       3.0           1          0\n",
178 |       "1       2.0           0          1\n",
179 |       "2       4.0           0          1\n",
180 |       "3       3.0           0          1\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "inputs = pd.get_dummies(inputs, dummy_na=True)\n",
186 |     "print(inputs)"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "id": "56c7800a",
192 |    "metadata": {
193 |     "slideshow": {
194 |      "slide_type": "slide"
195 |     }
196 |    },
197 |    "source": [
198 |     "现在`inputs`和`outputs`中的所有条目都是数值类型，它们可以转换为张量格式"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 5,
204 |    "id": "4f551c6d",
205 |    "metadata": {
206 |     "execution": {
207 |      "iopub.execute_input": "2023-08-18T07:03:39.414531Z",
208 |      "iopub.status.busy": "2023-08-18T07:03:39.413831Z",
209 |      "iopub.status.idle": "2023-08-18T07:03:40.467689Z",
210 |      "shell.execute_reply": "2023-08-18T07:03:40.466637Z"
211 |     },
212 |     "origin_pos": 10,
213 |     "tab": [
214 |      "pytorch"
215 |     ]
216 |    },
217 |    "outputs": [
218 |     {
219 |      "data": {
220 |       "text/plain": [
221 |        "(tensor([[3., 1., 0.],\n",
222 |        "         [2., 0., 1.],\n",
223 |        "         [4., 0., 1.],\n",
224 |        "         [3., 0., 1.]], dtype=torch.float64),\n",
225 |        " tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))"
226 |       ]
227 |      },
228 |      "execution_count": 5,
229 |      "metadata": {},
230 |      "output_type": "execute_result"
231 |     }
232 |    ],
233 |    "source": [
234 |     "import torch\n",
235 |     "\n",
236 |     "X = torch.tensor(inputs.to_numpy(dtype=float))\n",
237 |     "y = torch.tensor(outputs.to_numpy(dtype=float))\n",
238 |     "X, y"
239 |    ]
240 |   }
241 |  ],
242 |  "metadata": {
243 |   "celltoolbar": "Slideshow",
244 |   "language_info": {
245 |    "name": "python"
246 |   },
247 |   "required_libs": [],
248 |   "rise": {
249 |    "autolaunch": true,
250 |    "enable_chalkboard": true,
251 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
252 |    "scroll": true
253 |   }
254 |  },
255 |  "nbformat": 4,
256 |  "nbformat_minor": 5
257 | }


--------------------------------------------------------------------------------
/chapter_preliminaries/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_recurrent-modern/encoder-decoder.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "80a4901a",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 编码器-解码器架构\n",
 13 |     "\n",
 14 |     "编码器"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "id": "17f77c60",
 21 |    "metadata": {
 22 |     "execution": {
 23 |      "iopub.execute_input": "2023-08-18T07:05:48.406295Z",
 24 |      "iopub.status.busy": "2023-08-18T07:05:48.405469Z",
 25 |      "iopub.status.idle": "2023-08-18T07:05:49.653322Z",
 26 |      "shell.execute_reply": "2023-08-18T07:05:49.651979Z"
 27 |     },
 28 |     "origin_pos": 2,
 29 |     "tab": [
 30 |      "pytorch"
 31 |     ]
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from torch import nn\n",
 36 |     "\n",
 37 |     "\n",
 38 |     "class Encoder(nn.Module):\n",
 39 |     "    \"\"\"编码器-解码器架构的基本编码器接口\"\"\"\n",
 40 |     "    def __init__(self, **kwargs):\n",
 41 |     "        super(Encoder, self).__init__(**kwargs)\n",
 42 |     "\n",
 43 |     "    def forward(self, X, *args):\n",
 44 |     "        raise NotImplementedError"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "id": "614d0a9c",
 50 |    "metadata": {
 51 |     "slideshow": {
 52 |      "slide_type": "slide"
 53 |     }
 54 |    },
 55 |    "source": [
 56 |     "解码器"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "id": "5c7a6471",
 63 |    "metadata": {
 64 |     "execution": {
 65 |      "iopub.execute_input": "2023-08-18T07:05:49.659889Z",
 66 |      "iopub.status.busy": "2023-08-18T07:05:49.659020Z",
 67 |      "iopub.status.idle": "2023-08-18T07:05:49.666360Z",
 68 |      "shell.execute_reply": "2023-08-18T07:05:49.665230Z"
 69 |     },
 70 |     "origin_pos": 7,
 71 |     "tab": [
 72 |      "pytorch"
 73 |     ]
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "class Decoder(nn.Module):\n",
 78 |     "    \"\"\"编码器-解码器架构的基本解码器接口\"\"\"\n",
 79 |     "    def __init__(self, **kwargs):\n",
 80 |     "        super(Decoder, self).__init__(**kwargs)\n",
 81 |     "\n",
 82 |     "    def init_state(self, enc_outputs, *args):\n",
 83 |     "        raise NotImplementedError\n",
 84 |     "\n",
 85 |     "    def forward(self, X, state):\n",
 86 |     "        raise NotImplementedError"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "id": "eae87cdc",
 92 |    "metadata": {
 93 |     "slideshow": {
 94 |      "slide_type": "slide"
 95 |     }
 96 |    },
 97 |    "source": [
 98 |     "合并编码器和解码器"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 3,
104 |    "id": "53fb0929",
105 |    "metadata": {
106 |     "execution": {
107 |      "iopub.execute_input": "2023-08-18T07:05:49.671685Z",
108 |      "iopub.status.busy": "2023-08-18T07:05:49.670944Z",
109 |      "iopub.status.idle": "2023-08-18T07:05:49.678831Z",
110 |      "shell.execute_reply": "2023-08-18T07:05:49.677718Z"
111 |     },
112 |     "origin_pos": 12,
113 |     "tab": [
114 |      "pytorch"
115 |     ]
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "class EncoderDecoder(nn.Module):\n",
120 |     "    \"\"\"编码器-解码器架构的基类\"\"\"\n",
121 |     "    def __init__(self, encoder, decoder, **kwargs):\n",
122 |     "        super(EncoderDecoder, self).__init__(**kwargs)\n",
123 |     "        self.encoder = encoder\n",
124 |     "        self.decoder = decoder\n",
125 |     "\n",
126 |     "    def forward(self, enc_X, dec_X, *args):\n",
127 |     "        enc_outputs = self.encoder(enc_X, *args)\n",
128 |     "        dec_state = self.decoder.init_state(enc_outputs, *args)\n",
129 |     "        return self.decoder(dec_X, dec_state)"
130 |    ]
131 |   }
132 |  ],
133 |  "metadata": {
134 |   "celltoolbar": "Slideshow",
135 |   "language_info": {
136 |    "name": "python"
137 |   },
138 |   "required_libs": [],
139 |   "rise": {
140 |    "autolaunch": true,
141 |    "enable_chalkboard": true,
142 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
143 |    "scroll": true
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 5
148 | }


--------------------------------------------------------------------------------
/chapter_recurrent-modern/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_recurrent-neural-networks/rise.css:
--------------------------------------------------------------------------------
 1 | 
 2 | div.text_cell_render.rendered_html {
 3 |     padding: 0.35em 0.1em;
 4 | }
 5 | 
 6 | div.code_cell {
 7 |     font-size: 120%;
 8 | }
 9 | 
10 | div.my-top-right {
11 |     position: absolute;
12 |     right: 5%;
13 |     top: 1em;
14 |     font-size: 2em;
15 | }
16 | 
17 | div.my-top-left {
18 |     position: absolute;
19 |     left: 5%;
20 |     top: 1em;
21 |     font-size: 2em;
22 | }
23 | 


--------------------------------------------------------------------------------
/chapter_recurrent-neural-networks/text-preprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "0afcc24a",
  6 |    "metadata": {
  7 |     "slideshow": {
  8 |      "slide_type": "-"
  9 |     }
 10 |    },
 11 |    "source": [
 12 |     "# 文本预处理\n",
 13 |     "\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "id": "bb8907ca",
 20 |    "metadata": {
 21 |     "execution": {
 22 |      "iopub.execute_input": "2023-08-18T07:02:24.243885Z",
 23 |      "iopub.status.busy": "2023-08-18T07:02:24.243343Z",
 24 |      "iopub.status.idle": "2023-08-18T07:02:26.213654Z",
 25 |      "shell.execute_reply": "2023-08-18T07:02:26.212745Z"
 26 |     },
 27 |     "origin_pos": 2,
 28 |     "tab": [
 29 |      "pytorch"
 30 |     ]
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import collections\n",
 35 |     "import re\n",
 36 |     "from d2l import torch as d2l"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "id": "80e119b6",
 42 |    "metadata": {
 43 |     "slideshow": {
 44 |      "slide_type": "-"
 45 |     }
 46 |    },
 47 |    "source": [
 48 |     "将数据集读取到由多条文本行组成的列表中"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 2,
 54 |    "id": "ac0f9f0d",
 55 |    "metadata": {
 56 |     "execution": {
 57 |      "iopub.execute_input": "2023-08-18T07:02:26.218338Z",
 58 |      "iopub.status.busy": "2023-08-18T07:02:26.217685Z",
 59 |      "iopub.status.idle": "2023-08-18T07:02:26.304928Z",
 60 |      "shell.execute_reply": "2023-08-18T07:02:26.304151Z"
 61 |     },
 62 |     "origin_pos": 6,
 63 |     "tab": [
 64 |      "pytorch"
 65 |     ]
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "name": "stdout",
 70 |      "output_type": "stream",
 71 |      "text": [
 72 |       "Downloading ../data/timemachine.txt from http://d2l-data.s3-accelerate.amazonaws.com/timemachine.txt...\n",
 73 |       "# 文本总行数: 3221\n",
 74 |       "the time machine by h g wells\n",
 75 |       "twinkled and his usually pale face was flushed and animated the\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL + 'timemachine.txt',\n",
 81 |     "                                '090b5e7e70c295757f55df93cb0a180b9691891a')\n",
 82 |     "\n",
 83 |     "def read_time_machine():  \n",
 84 |     "    \"\"\"将时间机器数据集加载到文本行的列表中\"\"\"\n",
 85 |     "    with open(d2l.download('time_machine'), 'r') as f:\n",
 86 |     "        lines = f.readlines()\n",
 87 |     "    return [re.sub('[^A-Za-z]+', ' ', line).strip().lower() for line in lines]\n",
 88 |     "\n",
 89 |     "lines = read_time_machine()\n",
 90 |     "print(f'\n",
 91 |     "print(lines[0])\n",
 92 |     "print(lines[10])"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "id": "b899d3e4",
 98 |    "metadata": {
 99 |     "slideshow": {
100 |      "slide_type": "slide"
101 |     }
102 |    },
103 |    "source": [
104 |     "每个文本序列又被拆分成一个词元列表"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "id": "afd6a9df",
111 |    "metadata": {
112 |     "execution": {
113 |      "iopub.execute_input": "2023-08-18T07:02:26.308604Z",
114 |      "iopub.status.busy": "2023-08-18T07:02:26.308048Z",
115 |      "iopub.status.idle": "2023-08-18T07:02:26.317083Z",
116 |      "shell.execute_reply": "2023-08-18T07:02:26.316264Z"
117 |     },
118 |     "origin_pos": 8,
119 |     "tab": [
120 |      "pytorch"
121 |     ]
122 |    },
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n",
129 |       "[]\n",
130 |       "[]\n",
131 |       "[]\n",
132 |       "[]\n",
133 |       "['i']\n",
134 |       "[]\n",
135 |       "[]\n",
136 |       "['the', 'time', 'traveller', 'for', 'so', 'it', 'will', 'be', 'convenient', 'to', 'speak', 'of', 'him']\n",
137 |       "['was', 'expounding', 'a', 'recondite', 'matter', 'to', 'us', 'his', 'grey', 'eyes', 'shone', 'and']\n",
138 |       "['twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the']\n"
139 |      ]
140 |     }
141 |    ],
142 |    "source": [
143 |     "def tokenize(lines, token='word'):  \n",
144 |     "    \"\"\"将文本行拆分为单词或字符词元\"\"\"\n",
145 |     "    if token == 'word':\n",
146 |     "        return [line.split() for line in lines]\n",
147 |     "    elif token == 'char':\n",
148 |     "        return [list(line) for line in lines]\n",
149 |     "    else:\n",
150 |     "        print('错误：未知词元类型：' + token)\n",
151 |     "\n",
152 |     "tokens = tokenize(lines)\n",
153 |     "for i in range(11):\n",
154 |     "    print(tokens[i])"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "id": "4f4bc35b",
160 |    "metadata": {
161 |     "slideshow": {
162 |      "slide_type": "slide"
163 |     }
164 |    },
165 |    "source": [
166 |     "构建一个字典，通常也叫做*词表*（vocabulary），\n",
167 |     "用来将字符串类型的词元映射到从$0$开始的数字索引中"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 4,
173 |    "id": "16db7dad",
174 |    "metadata": {
175 |     "execution": {
176 |      "iopub.execute_input": "2023-08-18T07:02:26.320587Z",
177 |      "iopub.status.busy": "2023-08-18T07:02:26.320050Z",
178 |      "iopub.status.idle": "2023-08-18T07:02:26.330519Z",
179 |      "shell.execute_reply": "2023-08-18T07:02:26.329736Z"
180 |     },
181 |     "origin_pos": 10,
182 |     "tab": [
183 |      "pytorch"
184 |     ]
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "class Vocab:  \n",
189 |     "    \"\"\"文本词表\"\"\"\n",
190 |     "    def __init__(self, tokens=None, min_freq=0, reserved_tokens=None):\n",
191 |     "        if tokens is None:\n",
192 |     "            tokens = []\n",
193 |     "        if reserved_tokens is None:\n",
194 |     "            reserved_tokens = []\n",
195 |     "        counter = count_corpus(tokens)\n",
196 |     "        self._token_freqs = sorted(counter.items(), key=lambda x: x[1],\n",
197 |     "                                   reverse=True)\n",
198 |     "        self.idx_to_token = ['<unk>'] + reserved_tokens\n",
199 |     "        self.token_to_idx = {token: idx\n",
200 |     "                             for idx, token in enumerate(self.idx_to_token)}\n",
201 |     "        for token, freq in self._token_freqs:\n",
202 |     "            if freq < min_freq:\n",
203 |     "                break\n",
204 |     "            if token not in self.token_to_idx:\n",
205 |     "                self.idx_to_token.append(token)\n",
206 |     "                self.token_to_idx[token] = len(self.idx_to_token) - 1\n",
207 |     "\n",
208 |     "    def __len__(self):\n",
209 |     "        return len(self.idx_to_token)\n",
210 |     "\n",
211 |     "    def __getitem__(self, tokens):\n",
212 |     "        if not isinstance(tokens, (list, tuple)):\n",
213 |     "            return self.token_to_idx.get(tokens, self.unk)\n",
214 |     "        return [self.__getitem__(token) for token in tokens]\n",
215 |     "\n",
216 |     "    def to_tokens(self, indices):\n",
217 |     "        if not isinstance(indices, (list, tuple)):\n",
218 |     "            return self.idx_to_token[indices]\n",
219 |     "        return [self.idx_to_token[index] for index in indices]\n",
220 |     "\n",
221 |     "    @property\n",
222 |     "    def unk(self):\n",
223 |     "        return 0\n",
224 |     "\n",
225 |     "    @property\n",
226 |     "    def token_freqs(self):\n",
227 |     "        return self._token_freqs\n",
228 |     "\n",
229 |     "def count_corpus(tokens):  \n",
230 |     "    \"\"\"统计词元的频率\"\"\"\n",
231 |     "    if len(tokens) == 0 or isinstance(tokens[0], list):\n",
232 |     "        tokens = [token for line in tokens for token in line]\n",
233 |     "    return collections.Counter(tokens)"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "id": "8bea4a87",
239 |    "metadata": {
240 |     "slideshow": {
241 |      "slide_type": "slide"
242 |     }
243 |    },
244 |    "source": [
245 |     "构建词表"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 5,
251 |    "id": "1501d478",
252 |    "metadata": {
253 |     "execution": {
254 |      "iopub.execute_input": "2023-08-18T07:02:26.333942Z",
255 |      "iopub.status.busy": "2023-08-18T07:02:26.333382Z",
256 |      "iopub.status.idle": "2023-08-18T07:02:26.346927Z",
257 |      "shell.execute_reply": "2023-08-18T07:02:26.346182Z"
258 |     },
259 |     "origin_pos": 12,
260 |     "tab": [
261 |      "pytorch"
262 |     ]
263 |    },
264 |    "outputs": [
265 |     {
266 |      "name": "stdout",
267 |      "output_type": "stream",
268 |      "text": [
269 |       "[('<unk>', 0), ('the', 1), ('i', 2), ('and', 3), ('of', 4), ('a', 5), ('to', 6), ('was', 7), ('in', 8), ('that', 9)]\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     "vocab = Vocab(tokens)\n",
275 |     "print(list(vocab.token_to_idx.items())[:10])"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "id": "f1cfdd0c",
281 |    "metadata": {
282 |     "slideshow": {
283 |      "slide_type": "-"
284 |     }
285 |    },
286 |    "source": [
287 |     "将每一条文本行转换成一个数字索引列表"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": 6,
293 |    "id": "f0244f09",
294 |    "metadata": {
295 |     "execution": {
296 |      "iopub.execute_input": "2023-08-18T07:02:26.350343Z",
297 |      "iopub.status.busy": "2023-08-18T07:02:26.349779Z",
298 |      "iopub.status.idle": "2023-08-18T07:02:26.354215Z",
299 |      "shell.execute_reply": "2023-08-18T07:02:26.353468Z"
300 |     },
301 |     "origin_pos": 14,
302 |     "tab": [
303 |      "pytorch"
304 |     ]
305 |    },
306 |    "outputs": [
307 |     {
308 |      "name": "stdout",
309 |      "output_type": "stream",
310 |      "text": [
311 |       "文本: ['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n",
312 |       "索引: [1, 19, 50, 40, 2183, 2184, 400]\n",
313 |       "文本: ['twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the']\n",
314 |       "索引: [2186, 3, 25, 1044, 362, 113, 7, 1421, 3, 1045, 1]\n"
315 |      ]
316 |     }
317 |    ],
318 |    "source": [
319 |     "for i in [0, 10]:\n",
320 |     "    print('文本:', tokens[i])\n",
321 |     "    print('索引:', vocab[tokens[i]])"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "id": "b400e092",
327 |    "metadata": {
328 |     "slideshow": {
329 |      "slide_type": "slide"
330 |     }
331 |    },
332 |    "source": [
333 |     "将所有功能打包到`load_corpus_time_machine`函数中"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 7,
339 |    "id": "578ed76f",
340 |    "metadata": {
341 |     "execution": {
342 |      "iopub.execute_input": "2023-08-18T07:02:26.357414Z",
343 |      "iopub.status.busy": "2023-08-18T07:02:26.357141Z",
344 |      "iopub.status.idle": "2023-08-18T07:02:26.470812Z",
345 |      "shell.execute_reply": "2023-08-18T07:02:26.470008Z"
346 |     },
347 |     "origin_pos": 16,
348 |     "tab": [
349 |      "pytorch"
350 |     ]
351 |    },
352 |    "outputs": [
353 |     {
354 |      "data": {
355 |       "text/plain": [
356 |        "(170580, 28)"
357 |       ]
358 |      },
359 |      "execution_count": 7,
360 |      "metadata": {},
361 |      "output_type": "execute_result"
362 |     }
363 |    ],
364 |    "source": [
365 |     "def load_corpus_time_machine(max_tokens=-1):  \n",
366 |     "    \"\"\"返回时光机器数据集的词元索引列表和词表\"\"\"\n",
367 |     "    lines = read_time_machine()\n",
368 |     "    tokens = tokenize(lines, 'char')\n",
369 |     "    vocab = Vocab(tokens)\n",
370 |     "    corpus = [vocab[token] for line in tokens for token in line]\n",
371 |     "    if max_tokens > 0:\n",
372 |     "        corpus = corpus[:max_tokens]\n",
373 |     "    return corpus, vocab\n",
374 |     "\n",
375 |     "corpus, vocab = load_corpus_time_machine()\n",
376 |     "len(corpus), len(vocab)"
377 |    ]
378 |   }
379 |  ],
380 |  "metadata": {
381 |   "celltoolbar": "Slideshow",
382 |   "language_info": {
383 |    "name": "python"
384 |   },
385 |   "required_libs": [],
386 |   "rise": {
387 |    "autolaunch": true,
388 |    "enable_chalkboard": true,
389 |    "overlay": "<div class='my-top-right'><img height=80px src='http://d2l.ai/_static/logo-with-text.png'/></div><div class='my-top-left'></div>",
390 |    "scroll": true
391 |   }
392 |  },
393 |  "nbformat": 4,
394 |  "nbformat_minor": 5
395 | }


--------------------------------------------------------------------------------