├── README.md
├── chapter_attention-mechanisms
├── attention-scoring-functions.ipynb
├── bahdanau-attention.ipynb
├── multihead-attention.ipynb
├── nadaraya-waston.ipynb
├── rise.css
├── self-attention-and-positional-encoding.ipynb
└── transformer.ipynb
├── chapter_computational-performance
├── multiple-gpus-concise.ipynb
├── multiple-gpus.ipynb
└── rise.css
├── chapter_computer-vision
├── anchor.ipynb
├── bounding-box.ipynb
├── fcn.ipynb
├── fine-tuning.ipynb
├── image-augmentation.ipynb
├── kaggle-cifar10.ipynb
├── kaggle-dog.ipynb
├── multiscale-object-detection.ipynb
├── neural-style.ipynb
├── object-detection-dataset.ipynb
├── rise.css
├── semantic-segmentation-and-dataset.ipynb
├── ssd.ipynb
└── transposed-conv.ipynb
├── chapter_convolutional-modern
├── alexnet.ipynb
├── batch-norm.ipynb
├── densenet.ipynb
├── googlenet.ipynb
├── nin.ipynb
├── resnet.ipynb
├── rise.css
└── vgg.ipynb
├── chapter_convolutional-neural-networks
├── channels.ipynb
├── conv-layer.ipynb
├── lenet.ipynb
├── padding-and-strides.ipynb
├── pooling.ipynb
└── rise.css
├── chapter_deep-learning-computation
├── custom-layer.ipynb
├── model-construction.ipynb
├── parameters.ipynb
├── read-write.ipynb
├── rise.css
└── use-gpu.ipynb
├── chapter_linear-networks
├── image-classification-dataset.ipynb
├── linear-regression-concise.ipynb
├── linear-regression-scratch.ipynb
├── linear-regression.ipynb
├── rise.css
├── softmax-regression-concise.ipynb
└── softmax-regression-scratch.ipynb
├── chapter_multilayer-perceptrons
├── dropout.ipynb
├── kaggle-house-price.ipynb
├── mlp-concise.ipynb
├── mlp-scratch.ipynb
├── mlp.ipynb
├── numerical-stability-and-init.ipynb
├── rise.css
├── underfit-overfit.ipynb
└── weight-decay.ipynb
├── chapter_natural-language-processing-applications
├── natural-language-inference-and-dataset.ipynb
├── natural-language-inference-bert.ipynb
└── rise.css
├── chapter_preliminaries
├── autograd.ipynb
├── calculus.ipynb
├── linear-algebra.ipynb
├── lookup-api.ipynb
├── ndarray.ipynb
├── pandas.ipynb
└── rise.css
├── chapter_recurrent-modern
├── bi-rnn.ipynb
├── deep-rnn.ipynb
├── encoder-decoder.ipynb
├── gru.ipynb
├── lstm.ipynb
├── machine-translation-and-dataset.ipynb
├── rise.css
└── seq2seq.ipynb
└── chapter_recurrent-neural-networks
├── language-models-and-dataset.ipynb
├── rise.css
├── rnn-concise.ipynb
├── rnn-scratch.ipynb
├── sequence.ipynb
└── text-preprocessing.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # d2l-ai/d2l-zh-pytorch-slides
2 |
3 | This repo contains generated notebook slides. To open it locally, we suggest you to install the [rise](https://rise.readthedocs.io/en/stable/) extension.
4 |
5 | You can also preview them in nbviwer:
6 | - [chapter_preliminaries/ndarray.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/ndarray.ipynb)
7 | - [chapter_preliminaries/pandas.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/pandas.ipynb)
8 | - [chapter_preliminaries/linear-algebra.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/linear-algebra.ipynb)
9 | - [chapter_preliminaries/calculus.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/calculus.ipynb)
10 | - [chapter_preliminaries/autograd.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/autograd.ipynb)
11 | - [chapter_preliminaries/lookup-api.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/lookup-api.ipynb)
12 | - [chapter_linear-networks/linear-regression.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression.ipynb)
13 | - [chapter_linear-networks/linear-regression-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression-scratch.ipynb)
14 | - [chapter_linear-networks/linear-regression-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression-concise.ipynb)
15 | - [chapter_linear-networks/image-classification-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/image-classification-dataset.ipynb)
16 | - [chapter_linear-networks/softmax-regression-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/softmax-regression-scratch.ipynb)
17 | - [chapter_linear-networks/softmax-regression-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/softmax-regression-concise.ipynb)
18 | - [chapter_multilayer-perceptrons/mlp.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp.ipynb)
19 | - [chapter_multilayer-perceptrons/mlp-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp-scratch.ipynb)
20 | - [chapter_multilayer-perceptrons/mlp-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp-concise.ipynb)
21 | - [chapter_multilayer-perceptrons/underfit-overfit.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/underfit-overfit.ipynb)
22 | - [chapter_multilayer-perceptrons/weight-decay.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/weight-decay.ipynb)
23 | - [chapter_multilayer-perceptrons/dropout.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/dropout.ipynb)
24 | - [chapter_multilayer-perceptrons/numerical-stability-and-init.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/numerical-stability-and-init.ipynb)
25 | - [chapter_multilayer-perceptrons/kaggle-house-price.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/kaggle-house-price.ipynb)
26 | - [chapter_deep-learning-computation/model-construction.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/model-construction.ipynb)
27 | - [chapter_deep-learning-computation/parameters.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/parameters.ipynb)
28 | - [chapter_deep-learning-computation/custom-layer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/custom-layer.ipynb)
29 | - [chapter_deep-learning-computation/read-write.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/read-write.ipynb)
30 | - [chapter_deep-learning-computation/use-gpu.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/use-gpu.ipynb)
31 | - [chapter_convolutional-neural-networks/conv-layer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/conv-layer.ipynb)
32 | - [chapter_convolutional-neural-networks/padding-and-strides.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/padding-and-strides.ipynb)
33 | - [chapter_convolutional-neural-networks/channels.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/channels.ipynb)
34 | - [chapter_convolutional-neural-networks/pooling.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/pooling.ipynb)
35 | - [chapter_convolutional-neural-networks/lenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/lenet.ipynb)
36 | - [chapter_convolutional-modern/alexnet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/alexnet.ipynb)
37 | - [chapter_convolutional-modern/vgg.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/vgg.ipynb)
38 | - [chapter_convolutional-modern/nin.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/nin.ipynb)
39 | - [chapter_convolutional-modern/googlenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/googlenet.ipynb)
40 | - [chapter_convolutional-modern/batch-norm.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/batch-norm.ipynb)
41 | - [chapter_convolutional-modern/resnet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/resnet.ipynb)
42 | - [chapter_convolutional-modern/densenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/densenet.ipynb)
43 | - [chapter_recurrent-neural-networks/sequence.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/sequence.ipynb)
44 | - [chapter_recurrent-neural-networks/text-preprocessing.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/text-preprocessing.ipynb)
45 | - [chapter_recurrent-neural-networks/language-models-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/language-models-and-dataset.ipynb)
46 | - [chapter_recurrent-neural-networks/rnn-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/rnn-scratch.ipynb)
47 | - [chapter_recurrent-neural-networks/rnn-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/rnn-concise.ipynb)
48 | - [chapter_recurrent-modern/gru.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/gru.ipynb)
49 | - [chapter_recurrent-modern/lstm.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/lstm.ipynb)
50 | - [chapter_recurrent-modern/deep-rnn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/deep-rnn.ipynb)
51 | - [chapter_recurrent-modern/bi-rnn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/bi-rnn.ipynb)
52 | - [chapter_recurrent-modern/machine-translation-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/machine-translation-and-dataset.ipynb)
53 | - [chapter_recurrent-modern/encoder-decoder.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/encoder-decoder.ipynb)
54 | - [chapter_recurrent-modern/seq2seq.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/seq2seq.ipynb)
55 | - [chapter_attention-mechanisms/nadaraya-waston.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/nadaraya-waston.ipynb)
56 | - [chapter_attention-mechanisms/attention-scoring-functions.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/attention-scoring-functions.ipynb)
57 | - [chapter_attention-mechanisms/bahdanau-attention.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/bahdanau-attention.ipynb)
58 | - [chapter_attention-mechanisms/multihead-attention.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/multihead-attention.ipynb)
59 | - [chapter_attention-mechanisms/self-attention-and-positional-encoding.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/self-attention-and-positional-encoding.ipynb)
60 | - [chapter_attention-mechanisms/transformer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/transformer.ipynb)
61 | - [chapter_computational-performance/multiple-gpus.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computational-performance/multiple-gpus.ipynb)
62 | - [chapter_computational-performance/multiple-gpus-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computational-performance/multiple-gpus-concise.ipynb)
63 | - [chapter_computer-vision/image-augmentation.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/image-augmentation.ipynb)
64 | - [chapter_computer-vision/fine-tuning.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/fine-tuning.ipynb)
65 | - [chapter_computer-vision/bounding-box.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/bounding-box.ipynb)
66 | - [chapter_computer-vision/anchor.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/anchor.ipynb)
67 | - [chapter_computer-vision/multiscale-object-detection.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/multiscale-object-detection.ipynb)
68 | - [chapter_computer-vision/object-detection-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/object-detection-dataset.ipynb)
69 | - [chapter_computer-vision/ssd.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/ssd.ipynb)
70 | - [chapter_computer-vision/semantic-segmentation-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/semantic-segmentation-and-dataset.ipynb)
71 | - [chapter_computer-vision/transposed-conv.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/transposed-conv.ipynb)
72 | - [chapter_computer-vision/fcn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/fcn.ipynb)
73 | - [chapter_computer-vision/neural-style.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/neural-style.ipynb)
74 | - [chapter_computer-vision/kaggle-cifar10.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/kaggle-cifar10.ipynb)
75 | - [chapter_computer-vision/kaggle-dog.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/kaggle-dog.ipynb)
76 | - [chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb)
77 | - [chapter_natural-language-processing-applications/natural-language-inference-bert.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_natural-language-processing-applications/natural-language-inference-bert.ipynb)
--------------------------------------------------------------------------------
/chapter_attention-mechanisms/multihead-attention.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "cb4d82f7",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 多头注意力\n",
13 | "\n"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 1,
19 | "id": "dc55ba33",
20 | "metadata": {
21 | "execution": {
22 | "iopub.execute_input": "2023-08-18T07:01:32.189972Z",
23 | "iopub.status.busy": "2023-08-18T07:01:32.189240Z",
24 | "iopub.status.idle": "2023-08-18T07:01:34.516491Z",
25 | "shell.execute_reply": "2023-08-18T07:01:34.515475Z"
26 | },
27 | "origin_pos": 2,
28 | "tab": [
29 | "pytorch"
30 | ]
31 | },
32 | "outputs": [],
33 | "source": [
34 | "import math\n",
35 | "import torch\n",
36 | "from torch import nn\n",
37 | "from d2l import torch as d2l"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "id": "22964f2f",
43 | "metadata": {
44 | "slideshow": {
45 | "slide_type": "slide"
46 | }
47 | },
48 | "source": [
49 | "选择缩放点积注意力作为每一个注意力头"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 2,
55 | "id": "1bb10990",
56 | "metadata": {
57 | "execution": {
58 | "iopub.execute_input": "2023-08-18T07:01:34.521491Z",
59 | "iopub.status.busy": "2023-08-18T07:01:34.521131Z",
60 | "iopub.status.idle": "2023-08-18T07:01:34.530492Z",
61 | "shell.execute_reply": "2023-08-18T07:01:34.529556Z"
62 | },
63 | "origin_pos": 7,
64 | "tab": [
65 | "pytorch"
66 | ]
67 | },
68 | "outputs": [],
69 | "source": [
70 | "class MultiHeadAttention(nn.Module):\n",
71 | " \"\"\"多头注意力\"\"\"\n",
72 | " def __init__(self, key_size, query_size, value_size, num_hiddens,\n",
73 | " num_heads, dropout, bias=False, **kwargs):\n",
74 | " super(MultiHeadAttention, self).__init__(**kwargs)\n",
75 | " self.num_heads = num_heads\n",
76 | " self.attention = d2l.DotProductAttention(dropout)\n",
77 | " self.W_q = nn.Linear(query_size, num_hiddens, bias=bias)\n",
78 | " self.W_k = nn.Linear(key_size, num_hiddens, bias=bias)\n",
79 | " self.W_v = nn.Linear(value_size, num_hiddens, bias=bias)\n",
80 | " self.W_o = nn.Linear(num_hiddens, num_hiddens, bias=bias)\n",
81 | "\n",
82 | " def forward(self, queries, keys, values, valid_lens):\n",
83 | " queries = transpose_qkv(self.W_q(queries), self.num_heads)\n",
84 | " keys = transpose_qkv(self.W_k(keys), self.num_heads)\n",
85 | " values = transpose_qkv(self.W_v(values), self.num_heads)\n",
86 | "\n",
87 | " if valid_lens is not None:\n",
88 | " valid_lens = torch.repeat_interleave(\n",
89 | " valid_lens, repeats=self.num_heads, dim=0)\n",
90 | "\n",
91 | " output = self.attention(queries, keys, values, valid_lens)\n",
92 | "\n",
93 | " output_concat = transpose_output(output, self.num_heads)\n",
94 | " return self.W_o(output_concat)"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "id": "d376aca2",
100 | "metadata": {
101 | "slideshow": {
102 | "slide_type": "slide"
103 | }
104 | },
105 | "source": [
106 | "使多个头并行计算"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 3,
112 | "id": "b2af5ed8",
113 | "metadata": {
114 | "execution": {
115 | "iopub.execute_input": "2023-08-18T07:01:34.534820Z",
116 | "iopub.status.busy": "2023-08-18T07:01:34.534308Z",
117 | "iopub.status.idle": "2023-08-18T07:01:34.540852Z",
118 | "shell.execute_reply": "2023-08-18T07:01:34.539927Z"
119 | },
120 | "origin_pos": 12,
121 | "tab": [
122 | "pytorch"
123 | ]
124 | },
125 | "outputs": [],
126 | "source": [
127 | "def transpose_qkv(X, num_heads):\n",
128 | " \"\"\"为了多注意力头的并行计算而变换形状\"\"\"\n",
129 | " X = X.reshape(X.shape[0], X.shape[1], num_heads, -1)\n",
130 | "\n",
131 | " X = X.permute(0, 2, 1, 3)\n",
132 | "\n",
133 | " return X.reshape(-1, X.shape[2], X.shape[3])\n",
134 | "\n",
135 | "\n",
136 | "def transpose_output(X, num_heads):\n",
137 | " \"\"\"逆转transpose_qkv函数的操作\"\"\"\n",
138 | " X = X.reshape(-1, num_heads, X.shape[1], X.shape[2])\n",
139 | " X = X.permute(0, 2, 1, 3)\n",
140 | " return X.reshape(X.shape[0], X.shape[1], -1)"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "id": "015e3e67",
146 | "metadata": {
147 | "slideshow": {
148 | "slide_type": "slide"
149 | }
150 | },
151 | "source": [
152 | "测试"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 4,
158 | "id": "d06baadf",
159 | "metadata": {
160 | "execution": {
161 | "iopub.execute_input": "2023-08-18T07:01:34.545405Z",
162 | "iopub.status.busy": "2023-08-18T07:01:34.544605Z",
163 | "iopub.status.idle": "2023-08-18T07:01:34.571251Z",
164 | "shell.execute_reply": "2023-08-18T07:01:34.570476Z"
165 | },
166 | "origin_pos": 17,
167 | "tab": [
168 | "pytorch"
169 | ]
170 | },
171 | "outputs": [
172 | {
173 | "data": {
174 | "text/plain": [
175 | "MultiHeadAttention(\n",
176 | " (attention): DotProductAttention(\n",
177 | " (dropout): Dropout(p=0.5, inplace=False)\n",
178 | " )\n",
179 | " (W_q): Linear(in_features=100, out_features=100, bias=False)\n",
180 | " (W_k): Linear(in_features=100, out_features=100, bias=False)\n",
181 | " (W_v): Linear(in_features=100, out_features=100, bias=False)\n",
182 | " (W_o): Linear(in_features=100, out_features=100, bias=False)\n",
183 | ")"
184 | ]
185 | },
186 | "execution_count": 4,
187 | "metadata": {},
188 | "output_type": "execute_result"
189 | }
190 | ],
191 | "source": [
192 | "num_hiddens, num_heads = 100, 5\n",
193 | "attention = MultiHeadAttention(num_hiddens, num_hiddens, num_hiddens,\n",
194 | " num_hiddens, num_heads, 0.5)\n",
195 | "attention.eval()"
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "execution_count": 5,
201 | "id": "8da65afc",
202 | "metadata": {
203 | "execution": {
204 | "iopub.execute_input": "2023-08-18T07:01:34.574642Z",
205 | "iopub.status.busy": "2023-08-18T07:01:34.574021Z",
206 | "iopub.status.idle": "2023-08-18T07:01:34.588848Z",
207 | "shell.execute_reply": "2023-08-18T07:01:34.587945Z"
208 | },
209 | "origin_pos": 20,
210 | "tab": [
211 | "pytorch"
212 | ]
213 | },
214 | "outputs": [
215 | {
216 | "data": {
217 | "text/plain": [
218 | "torch.Size([2, 4, 100])"
219 | ]
220 | },
221 | "execution_count": 5,
222 | "metadata": {},
223 | "output_type": "execute_result"
224 | }
225 | ],
226 | "source": [
227 | "batch_size, num_queries = 2, 4\n",
228 | "num_kvpairs, valid_lens = 6, torch.tensor([3, 2])\n",
229 | "X = torch.ones((batch_size, num_queries, num_hiddens))\n",
230 | "Y = torch.ones((batch_size, num_kvpairs, num_hiddens))\n",
231 | "attention(X, Y, Y, valid_lens).shape"
232 | ]
233 | }
234 | ],
235 | "metadata": {
236 | "celltoolbar": "Slideshow",
237 | "language_info": {
238 | "name": "python"
239 | },
240 | "required_libs": [],
241 | "rise": {
242 | "autolaunch": true,
243 | "enable_chalkboard": true,
244 | "overlay": "

",
245 | "scroll": true
246 | }
247 | },
248 | "nbformat": 4,
249 | "nbformat_minor": 5
250 | }
--------------------------------------------------------------------------------
/chapter_attention-mechanisms/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_computational-performance/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_computer-vision/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_computer-vision/transposed-conv.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "db0f208a",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 转置卷积\n",
13 | "\n"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 1,
19 | "id": "1f39b5ef",
20 | "metadata": {
21 | "execution": {
22 | "iopub.execute_input": "2023-08-18T07:05:22.451701Z",
23 | "iopub.status.busy": "2023-08-18T07:05:22.451411Z",
24 | "iopub.status.idle": "2023-08-18T07:05:24.490785Z",
25 | "shell.execute_reply": "2023-08-18T07:05:24.489970Z"
26 | },
27 | "origin_pos": 2,
28 | "tab": [
29 | "pytorch"
30 | ]
31 | },
32 | "outputs": [],
33 | "source": [
34 | "import torch\n",
35 | "from torch import nn\n",
36 | "from d2l import torch as d2l"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "id": "1f3a83f3",
42 | "metadata": {
43 | "slideshow": {
44 | "slide_type": "-"
45 | }
46 | },
47 | "source": [
48 | "实现基本的转置卷积运算"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 2,
54 | "id": "e6931d90",
55 | "metadata": {
56 | "execution": {
57 | "iopub.execute_input": "2023-08-18T07:05:24.494981Z",
58 | "iopub.status.busy": "2023-08-18T07:05:24.494307Z",
59 | "iopub.status.idle": "2023-08-18T07:05:24.499745Z",
60 | "shell.execute_reply": "2023-08-18T07:05:24.498885Z"
61 | },
62 | "origin_pos": 5,
63 | "tab": [
64 | "pytorch"
65 | ]
66 | },
67 | "outputs": [],
68 | "source": [
69 | "def trans_conv(X, K):\n",
70 | " h, w = K.shape\n",
71 | " Y = torch.zeros((X.shape[0] + h - 1, X.shape[1] + w - 1))\n",
72 | " for i in range(X.shape[0]):\n",
73 | " for j in range(X.shape[1]):\n",
74 | " Y[i: i + h, j: j + w] += X[i, j] * K\n",
75 | " return Y"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "id": "f3baa22e",
81 | "metadata": {
82 | "slideshow": {
83 | "slide_type": "slide"
84 | }
85 | },
86 | "source": [
87 | "验证上述实现输出"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 3,
93 | "id": "a7c6e2fd",
94 | "metadata": {
95 | "execution": {
96 | "iopub.execute_input": "2023-08-18T07:05:24.503202Z",
97 | "iopub.status.busy": "2023-08-18T07:05:24.502646Z",
98 | "iopub.status.idle": "2023-08-18T07:05:24.531448Z",
99 | "shell.execute_reply": "2023-08-18T07:05:24.530730Z"
100 | },
101 | "origin_pos": 7,
102 | "tab": [
103 | "pytorch"
104 | ]
105 | },
106 | "outputs": [
107 | {
108 | "data": {
109 | "text/plain": [
110 | "tensor([[ 0., 0., 1.],\n",
111 | " [ 0., 4., 6.],\n",
112 | " [ 4., 12., 9.]])"
113 | ]
114 | },
115 | "execution_count": 3,
116 | "metadata": {},
117 | "output_type": "execute_result"
118 | }
119 | ],
120 | "source": [
121 | "X = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
122 | "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
123 | "trans_conv(X, K)"
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "id": "9f9dd301",
129 | "metadata": {
130 | "slideshow": {
131 | "slide_type": "slide"
132 | }
133 | },
134 | "source": [
135 | "使用高级API获得相同的结果"
136 | ]
137 | },
138 | {
139 | "cell_type": "code",
140 | "execution_count": 4,
141 | "id": "b9de6d80",
142 | "metadata": {
143 | "execution": {
144 | "iopub.execute_input": "2023-08-18T07:05:24.535386Z",
145 | "iopub.status.busy": "2023-08-18T07:05:24.534826Z",
146 | "iopub.status.idle": "2023-08-18T07:05:24.544484Z",
147 | "shell.execute_reply": "2023-08-18T07:05:24.543747Z"
148 | },
149 | "origin_pos": 10,
150 | "tab": [
151 | "pytorch"
152 | ]
153 | },
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/plain": [
158 | "tensor([[[[ 0., 0., 1.],\n",
159 | " [ 0., 4., 6.],\n",
160 | " [ 4., 12., 9.]]]], grad_fn=)"
161 | ]
162 | },
163 | "execution_count": 4,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "X, K = X.reshape(1, 1, 2, 2), K.reshape(1, 1, 2, 2)\n",
170 | "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, bias=False)\n",
171 | "tconv.weight.data = K\n",
172 | "tconv(X)"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "id": "f8811b58",
178 | "metadata": {
179 | "slideshow": {
180 | "slide_type": "slide"
181 | }
182 | },
183 | "source": [
184 | "填充、步幅和多通道"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 5,
190 | "id": "cd114de1",
191 | "metadata": {
192 | "execution": {
193 | "iopub.execute_input": "2023-08-18T07:05:24.548040Z",
194 | "iopub.status.busy": "2023-08-18T07:05:24.547398Z",
195 | "iopub.status.idle": "2023-08-18T07:05:24.553659Z",
196 | "shell.execute_reply": "2023-08-18T07:05:24.552864Z"
197 | },
198 | "origin_pos": 14,
199 | "tab": [
200 | "pytorch"
201 | ]
202 | },
203 | "outputs": [
204 | {
205 | "data": {
206 | "text/plain": [
207 | "tensor([[[[4.]]]], grad_fn=)"
208 | ]
209 | },
210 | "execution_count": 5,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, padding=1, bias=False)\n",
217 | "tconv.weight.data = K\n",
218 | "tconv(X)"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": 6,
224 | "id": "48064406",
225 | "metadata": {
226 | "execution": {
227 | "iopub.execute_input": "2023-08-18T07:05:24.557362Z",
228 | "iopub.status.busy": "2023-08-18T07:05:24.556727Z",
229 | "iopub.status.idle": "2023-08-18T07:05:24.563081Z",
230 | "shell.execute_reply": "2023-08-18T07:05:24.562365Z"
231 | },
232 | "origin_pos": 18,
233 | "tab": [
234 | "pytorch"
235 | ]
236 | },
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/plain": [
241 | "tensor([[[[0., 0., 0., 1.],\n",
242 | " [0., 0., 2., 3.],\n",
243 | " [0., 2., 0., 3.],\n",
244 | " [4., 6., 6., 9.]]]], grad_fn=)"
245 | ]
246 | },
247 | "execution_count": 6,
248 | "metadata": {},
249 | "output_type": "execute_result"
250 | }
251 | ],
252 | "source": [
253 | "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, stride=2, bias=False)\n",
254 | "tconv.weight.data = K\n",
255 | "tconv(X)"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 7,
261 | "id": "5e7033d7",
262 | "metadata": {
263 | "execution": {
264 | "iopub.execute_input": "2023-08-18T07:05:24.566613Z",
265 | "iopub.status.busy": "2023-08-18T07:05:24.565990Z",
266 | "iopub.status.idle": "2023-08-18T07:05:24.577437Z",
267 | "shell.execute_reply": "2023-08-18T07:05:24.576434Z"
268 | },
269 | "origin_pos": 22,
270 | "tab": [
271 | "pytorch"
272 | ]
273 | },
274 | "outputs": [
275 | {
276 | "data": {
277 | "text/plain": [
278 | "True"
279 | ]
280 | },
281 | "execution_count": 7,
282 | "metadata": {},
283 | "output_type": "execute_result"
284 | }
285 | ],
286 | "source": [
287 | "X = torch.rand(size=(1, 10, 16, 16))\n",
288 | "conv = nn.Conv2d(10, 20, kernel_size=5, padding=2, stride=3)\n",
289 | "tconv = nn.ConvTranspose2d(20, 10, kernel_size=5, padding=2, stride=3)\n",
290 | "tconv(conv(X)).shape == X.shape"
291 | ]
292 | },
293 | {
294 | "cell_type": "markdown",
295 | "id": "12aa0878",
296 | "metadata": {
297 | "slideshow": {
298 | "slide_type": "slide"
299 | }
300 | },
301 | "source": [
302 | "与矩阵变换的联系"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 8,
308 | "id": "260d5c6d",
309 | "metadata": {
310 | "execution": {
311 | "iopub.execute_input": "2023-08-18T07:05:24.581485Z",
312 | "iopub.status.busy": "2023-08-18T07:05:24.580866Z",
313 | "iopub.status.idle": "2023-08-18T07:05:24.589179Z",
314 | "shell.execute_reply": "2023-08-18T07:05:24.588233Z"
315 | },
316 | "origin_pos": 25,
317 | "tab": [
318 | "pytorch"
319 | ]
320 | },
321 | "outputs": [
322 | {
323 | "data": {
324 | "text/plain": [
325 | "tensor([[27., 37.],\n",
326 | " [57., 67.]])"
327 | ]
328 | },
329 | "execution_count": 8,
330 | "metadata": {},
331 | "output_type": "execute_result"
332 | }
333 | ],
334 | "source": [
335 | "X = torch.arange(9.0).reshape(3, 3)\n",
336 | "K = torch.tensor([[1.0, 2.0], [3.0, 4.0]])\n",
337 | "Y = d2l.corr2d(X, K)\n",
338 | "Y"
339 | ]
340 | },
341 | {
342 | "cell_type": "code",
343 | "execution_count": 9,
344 | "id": "d9f6ce2b",
345 | "metadata": {
346 | "execution": {
347 | "iopub.execute_input": "2023-08-18T07:05:24.592769Z",
348 | "iopub.status.busy": "2023-08-18T07:05:24.592164Z",
349 | "iopub.status.idle": "2023-08-18T07:05:24.602392Z",
350 | "shell.execute_reply": "2023-08-18T07:05:24.601439Z"
351 | },
352 | "origin_pos": 28,
353 | "tab": [
354 | "pytorch"
355 | ]
356 | },
357 | "outputs": [
358 | {
359 | "data": {
360 | "text/plain": [
361 | "tensor([[1., 2., 0., 3., 4., 0., 0., 0., 0.],\n",
362 | " [0., 1., 2., 0., 3., 4., 0., 0., 0.],\n",
363 | " [0., 0., 0., 1., 2., 0., 3., 4., 0.],\n",
364 | " [0., 0., 0., 0., 1., 2., 0., 3., 4.]])"
365 | ]
366 | },
367 | "execution_count": 9,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "def kernel2matrix(K):\n",
374 | " k, W = torch.zeros(5), torch.zeros((4, 9))\n",
375 | " k[:2], k[3:5] = K[0, :], K[1, :]\n",
376 | " W[0, :5], W[1, 1:6], W[2, 3:8], W[3, 4:] = k, k, k, k\n",
377 | " return W\n",
378 | "\n",
379 | "W = kernel2matrix(K)\n",
380 | "W"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "execution_count": 10,
386 | "id": "1fb803d0",
387 | "metadata": {
388 | "execution": {
389 | "iopub.execute_input": "2023-08-18T07:05:24.606249Z",
390 | "iopub.status.busy": "2023-08-18T07:05:24.605496Z",
391 | "iopub.status.idle": "2023-08-18T07:05:24.612872Z",
392 | "shell.execute_reply": "2023-08-18T07:05:24.611900Z"
393 | },
394 | "origin_pos": 31,
395 | "tab": [
396 | "pytorch"
397 | ]
398 | },
399 | "outputs": [
400 | {
401 | "data": {
402 | "text/plain": [
403 | "tensor([[True, True],\n",
404 | " [True, True]])"
405 | ]
406 | },
407 | "execution_count": 10,
408 | "metadata": {},
409 | "output_type": "execute_result"
410 | }
411 | ],
412 | "source": [
413 | "Y == torch.matmul(W, X.reshape(-1)).reshape(2, 2)"
414 | ]
415 | },
416 | {
417 | "cell_type": "code",
418 | "execution_count": 11,
419 | "id": "f1a55ff1",
420 | "metadata": {
421 | "execution": {
422 | "iopub.execute_input": "2023-08-18T07:05:24.616575Z",
423 | "iopub.status.busy": "2023-08-18T07:05:24.615826Z",
424 | "iopub.status.idle": "2023-08-18T07:05:24.623063Z",
425 | "shell.execute_reply": "2023-08-18T07:05:24.622144Z"
426 | },
427 | "origin_pos": 34,
428 | "tab": [
429 | "pytorch"
430 | ]
431 | },
432 | "outputs": [
433 | {
434 | "data": {
435 | "text/plain": [
436 | "tensor([[True, True, True],\n",
437 | " [True, True, True],\n",
438 | " [True, True, True]])"
439 | ]
440 | },
441 | "execution_count": 11,
442 | "metadata": {},
443 | "output_type": "execute_result"
444 | }
445 | ],
446 | "source": [
447 | "Z = trans_conv(Y, K)\n",
448 | "Z == torch.matmul(W.T, Y.reshape(-1)).reshape(3, 3)"
449 | ]
450 | }
451 | ],
452 | "metadata": {
453 | "celltoolbar": "Slideshow",
454 | "language_info": {
455 | "name": "python"
456 | },
457 | "required_libs": [],
458 | "rise": {
459 | "autolaunch": true,
460 | "enable_chalkboard": true,
461 | "overlay": "
",
462 | "scroll": true
463 | }
464 | },
465 | "nbformat": 4,
466 | "nbformat_minor": 5
467 | }
--------------------------------------------------------------------------------
/chapter_convolutional-modern/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/channels.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "a0f37efc",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 多输入多输出通道\n",
13 | "\n",
14 | "实现一下多输入通道互相关运算"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "0cff24d4",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:02:38.339612Z",
24 | "iopub.status.busy": "2023-08-18T07:02:38.339031Z",
25 | "iopub.status.idle": "2023-08-18T07:02:38.344485Z",
26 | "shell.execute_reply": "2023-08-18T07:02:38.343326Z"
27 | },
28 | "origin_pos": 4,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import torch\n",
36 | "from d2l import torch as d2l\n",
37 | "\n",
38 | "def corr2d_multi_in(X, K):\n",
39 | " return sum(d2l.corr2d(x, k) for x, k in zip(X, K))"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "id": "aafb58cc",
45 | "metadata": {
46 | "slideshow": {
47 | "slide_type": "-"
48 | }
49 | },
50 | "source": [
51 | "验证互相关运算的输出"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 3,
57 | "id": "5a60b8f9",
58 | "metadata": {
59 | "execution": {
60 | "iopub.execute_input": "2023-08-18T07:02:38.347937Z",
61 | "iopub.status.busy": "2023-08-18T07:02:38.347463Z",
62 | "iopub.status.idle": "2023-08-18T07:02:38.380997Z",
63 | "shell.execute_reply": "2023-08-18T07:02:38.379885Z"
64 | },
65 | "origin_pos": 7,
66 | "tab": [
67 | "pytorch"
68 | ]
69 | },
70 | "outputs": [
71 | {
72 | "data": {
73 | "text/plain": [
74 | "tensor([[ 56., 72.],\n",
75 | " [104., 120.]])"
76 | ]
77 | },
78 | "execution_count": 3,
79 | "metadata": {},
80 | "output_type": "execute_result"
81 | }
82 | ],
83 | "source": [
84 | "X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],\n",
85 | " [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])\n",
86 | "K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])\n",
87 | "\n",
88 | "corr2d_multi_in(X, K)"
89 | ]
90 | },
91 | {
92 | "cell_type": "markdown",
93 | "id": "4b4c9aa1",
94 | "metadata": {
95 | "slideshow": {
96 | "slide_type": "slide"
97 | }
98 | },
99 | "source": [
100 | "计算多个通道的输出的互相关函数"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 5,
106 | "id": "6dde7543",
107 | "metadata": {
108 | "execution": {
109 | "iopub.execute_input": "2023-08-18T07:02:38.392733Z",
110 | "iopub.status.busy": "2023-08-18T07:02:38.392298Z",
111 | "iopub.status.idle": "2023-08-18T07:02:38.399310Z",
112 | "shell.execute_reply": "2023-08-18T07:02:38.398211Z"
113 | },
114 | "origin_pos": 11,
115 | "tab": [
116 | "pytorch"
117 | ]
118 | },
119 | "outputs": [
120 | {
121 | "data": {
122 | "text/plain": [
123 | "torch.Size([3, 2, 2, 2])"
124 | ]
125 | },
126 | "execution_count": 5,
127 | "metadata": {},
128 | "output_type": "execute_result"
129 | }
130 | ],
131 | "source": [
132 | "def corr2d_multi_in_out(X, K):\n",
133 | " return torch.stack([corr2d_multi_in(X, k) for k in K], 0)\n",
134 | "\n",
135 | "K = torch.stack((K, K + 1, K + 2), 0)\n",
136 | "K.shape"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 6,
142 | "id": "86b2b71f",
143 | "metadata": {
144 | "execution": {
145 | "iopub.execute_input": "2023-08-18T07:02:38.403159Z",
146 | "iopub.status.busy": "2023-08-18T07:02:38.402457Z",
147 | "iopub.status.idle": "2023-08-18T07:02:38.410409Z",
148 | "shell.execute_reply": "2023-08-18T07:02:38.409310Z"
149 | },
150 | "origin_pos": 13,
151 | "tab": [
152 | "pytorch"
153 | ]
154 | },
155 | "outputs": [
156 | {
157 | "data": {
158 | "text/plain": [
159 | "tensor([[[ 56., 72.],\n",
160 | " [104., 120.]],\n",
161 | "\n",
162 | " [[ 76., 100.],\n",
163 | " [148., 172.]],\n",
164 | "\n",
165 | " [[ 96., 128.],\n",
166 | " [192., 224.]]])"
167 | ]
168 | },
169 | "execution_count": 6,
170 | "metadata": {},
171 | "output_type": "execute_result"
172 | }
173 | ],
174 | "source": [
175 | "corr2d_multi_in_out(X, K)"
176 | ]
177 | },
178 | {
179 | "cell_type": "markdown",
180 | "id": "cafe51a2",
181 | "metadata": {
182 | "slideshow": {
183 | "slide_type": "slide"
184 | }
185 | },
186 | "source": [
187 | "1x1卷积"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 9,
193 | "id": "7250eae2",
194 | "metadata": {
195 | "execution": {
196 | "iopub.execute_input": "2023-08-18T07:02:38.430613Z",
197 | "iopub.status.busy": "2023-08-18T07:02:38.430184Z",
198 | "iopub.status.idle": "2023-08-18T07:02:38.438715Z",
199 | "shell.execute_reply": "2023-08-18T07:02:38.437662Z"
200 | },
201 | "origin_pos": 19,
202 | "tab": [
203 | "pytorch"
204 | ]
205 | },
206 | "outputs": [],
207 | "source": [
208 | "def corr2d_multi_in_out_1x1(X, K):\n",
209 | " c_i, h, w = X.shape\n",
210 | " c_o = K.shape[0]\n",
211 | " X = X.reshape((c_i, h * w))\n",
212 | " K = K.reshape((c_o, c_i))\n",
213 | " Y = torch.matmul(K, X)\n",
214 | " return Y.reshape((c_o, h, w))\n",
215 | "\n",
216 | "X = torch.normal(0, 1, (3, 3, 3))\n",
217 | "K = torch.normal(0, 1, (2, 3, 1, 1))\n",
218 | "\n",
219 | "Y1 = corr2d_multi_in_out_1x1(X, K)\n",
220 | "Y2 = corr2d_multi_in_out(X, K)\n",
221 | "assert float(torch.abs(Y1 - Y2).sum()) < 1e-6"
222 | ]
223 | }
224 | ],
225 | "metadata": {
226 | "celltoolbar": "Slideshow",
227 | "language_info": {
228 | "name": "python"
229 | },
230 | "required_libs": [],
231 | "rise": {
232 | "autolaunch": true,
233 | "enable_chalkboard": true,
234 | "overlay": "
",
235 | "scroll": true
236 | }
237 | },
238 | "nbformat": 4,
239 | "nbformat_minor": 5
240 | }
--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/conv-layer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "50832220",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 图像卷积\n",
13 | "\n",
14 | "互相关运算"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "16abe7ca",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:07:28.563668Z",
24 | "iopub.status.busy": "2023-08-18T07:07:28.562986Z",
25 | "iopub.status.idle": "2023-08-18T07:07:28.569424Z",
26 | "shell.execute_reply": "2023-08-18T07:07:28.568319Z"
27 | },
28 | "origin_pos": 4,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import torch\n",
36 | "from torch import nn\n",
37 | "from d2l import torch as d2l\n",
38 | "\n",
39 | "def corr2d(X, K): \n",
40 | " \"\"\"计算二维互相关运算\"\"\"\n",
41 | " h, w = K.shape\n",
42 | " Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
43 | " for i in range(Y.shape[0]):\n",
44 | " for j in range(Y.shape[1]):\n",
45 | " Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n",
46 | " return Y"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "id": "42171987",
52 | "metadata": {
53 | "slideshow": {
54 | "slide_type": "slide"
55 | }
56 | },
57 | "source": [
58 | "验证上述二维互相关运算的输出"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 3,
64 | "id": "6f84e512",
65 | "metadata": {
66 | "execution": {
67 | "iopub.execute_input": "2023-08-18T07:07:28.572958Z",
68 | "iopub.status.busy": "2023-08-18T07:07:28.572449Z",
69 | "iopub.status.idle": "2023-08-18T07:07:28.604854Z",
70 | "shell.execute_reply": "2023-08-18T07:07:28.603813Z"
71 | },
72 | "origin_pos": 7,
73 | "tab": [
74 | "pytorch"
75 | ]
76 | },
77 | "outputs": [
78 | {
79 | "data": {
80 | "text/plain": [
81 | "tensor([[19., 25.],\n",
82 | " [37., 43.]])"
83 | ]
84 | },
85 | "execution_count": 3,
86 | "metadata": {},
87 | "output_type": "execute_result"
88 | }
89 | ],
90 | "source": [
91 | "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n",
92 | "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n",
93 | "corr2d(X, K)"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "id": "5abc9d97",
99 | "metadata": {
100 | "slideshow": {
101 | "slide_type": "slide"
102 | }
103 | },
104 | "source": [
105 | "实现二维卷积层"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 4,
111 | "id": "450def67",
112 | "metadata": {
113 | "execution": {
114 | "iopub.execute_input": "2023-08-18T07:07:28.610672Z",
115 | "iopub.status.busy": "2023-08-18T07:07:28.609819Z",
116 | "iopub.status.idle": "2023-08-18T07:07:28.615602Z",
117 | "shell.execute_reply": "2023-08-18T07:07:28.614632Z"
118 | },
119 | "origin_pos": 10,
120 | "tab": [
121 | "pytorch"
122 | ]
123 | },
124 | "outputs": [],
125 | "source": [
126 | "class Conv2D(nn.Module):\n",
127 | " def __init__(self, kernel_size):\n",
128 | " super().__init__()\n",
129 | " self.weight = nn.Parameter(torch.rand(kernel_size))\n",
130 | " self.bias = nn.Parameter(torch.zeros(1))\n",
131 | "\n",
132 | " def forward(self, x):\n",
133 | " return corr2d(x, self.weight) + self.bias"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "id": "c5b49b95",
139 | "metadata": {
140 | "slideshow": {
141 | "slide_type": "slide"
142 | }
143 | },
144 | "source": [
145 | "卷积层的一个简单应用:\n",
146 | "检测图像中不同颜色的边缘"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 5,
152 | "id": "dee1bc79",
153 | "metadata": {
154 | "execution": {
155 | "iopub.execute_input": "2023-08-18T07:07:28.620077Z",
156 | "iopub.status.busy": "2023-08-18T07:07:28.619277Z",
157 | "iopub.status.idle": "2023-08-18T07:07:28.626719Z",
158 | "shell.execute_reply": "2023-08-18T07:07:28.625746Z"
159 | },
160 | "origin_pos": 14,
161 | "tab": [
162 | "pytorch"
163 | ]
164 | },
165 | "outputs": [
166 | {
167 | "data": {
168 | "text/plain": [
169 | "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n",
170 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n",
171 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n",
172 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n",
173 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n",
174 | " [1., 1., 0., 0., 0., 0., 1., 1.]])"
175 | ]
176 | },
177 | "execution_count": 5,
178 | "metadata": {},
179 | "output_type": "execute_result"
180 | }
181 | ],
182 | "source": [
183 | "X = torch.ones((6, 8))\n",
184 | "X[:, 2:6] = 0\n",
185 | "X"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 6,
191 | "id": "d042bda0",
192 | "metadata": {
193 | "execution": {
194 | "iopub.execute_input": "2023-08-18T07:07:28.630101Z",
195 | "iopub.status.busy": "2023-08-18T07:07:28.629606Z",
196 | "iopub.status.idle": "2023-08-18T07:07:28.634133Z",
197 | "shell.execute_reply": "2023-08-18T07:07:28.633165Z"
198 | },
199 | "origin_pos": 17,
200 | "tab": [
201 | "pytorch"
202 | ]
203 | },
204 | "outputs": [],
205 | "source": [
206 | "K = torch.tensor([[1.0, -1.0]])"
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "id": "02307562",
212 | "metadata": {
213 | "slideshow": {
214 | "slide_type": "slide"
215 | }
216 | },
217 | "source": [
218 | "输出`Y`中的1代表从白色到黑色的边缘,-1代表从黑色到白色的边缘"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": 7,
224 | "id": "36de9e2a",
225 | "metadata": {
226 | "execution": {
227 | "iopub.execute_input": "2023-08-18T07:07:28.639056Z",
228 | "iopub.status.busy": "2023-08-18T07:07:28.638505Z",
229 | "iopub.status.idle": "2023-08-18T07:07:28.646532Z",
230 | "shell.execute_reply": "2023-08-18T07:07:28.645509Z"
231 | },
232 | "origin_pos": 19,
233 | "tab": [
234 | "pytorch"
235 | ]
236 | },
237 | "outputs": [
238 | {
239 | "data": {
240 | "text/plain": [
241 | "tensor([[ 0., 1., 0., 0., 0., -1., 0.],\n",
242 | " [ 0., 1., 0., 0., 0., -1., 0.],\n",
243 | " [ 0., 1., 0., 0., 0., -1., 0.],\n",
244 | " [ 0., 1., 0., 0., 0., -1., 0.],\n",
245 | " [ 0., 1., 0., 0., 0., -1., 0.],\n",
246 | " [ 0., 1., 0., 0., 0., -1., 0.]])"
247 | ]
248 | },
249 | "execution_count": 7,
250 | "metadata": {},
251 | "output_type": "execute_result"
252 | }
253 | ],
254 | "source": [
255 | "Y = corr2d(X, K)\n",
256 | "Y"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "id": "f8fc54d3",
262 | "metadata": {
263 | "slideshow": {
264 | "slide_type": "slide"
265 | }
266 | },
267 | "source": [
268 | "卷积核`K`只可以检测垂直边缘"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 8,
274 | "id": "0a754b2d",
275 | "metadata": {
276 | "execution": {
277 | "iopub.execute_input": "2023-08-18T07:07:28.651371Z",
278 | "iopub.status.busy": "2023-08-18T07:07:28.650819Z",
279 | "iopub.status.idle": "2023-08-18T07:07:28.658419Z",
280 | "shell.execute_reply": "2023-08-18T07:07:28.657436Z"
281 | },
282 | "origin_pos": 21,
283 | "tab": [
284 | "pytorch"
285 | ]
286 | },
287 | "outputs": [
288 | {
289 | "data": {
290 | "text/plain": [
291 | "tensor([[0., 0., 0., 0., 0.],\n",
292 | " [0., 0., 0., 0., 0.],\n",
293 | " [0., 0., 0., 0., 0.],\n",
294 | " [0., 0., 0., 0., 0.],\n",
295 | " [0., 0., 0., 0., 0.],\n",
296 | " [0., 0., 0., 0., 0.],\n",
297 | " [0., 0., 0., 0., 0.],\n",
298 | " [0., 0., 0., 0., 0.]])"
299 | ]
300 | },
301 | "execution_count": 8,
302 | "metadata": {},
303 | "output_type": "execute_result"
304 | }
305 | ],
306 | "source": [
307 | "corr2d(X.t(), K)"
308 | ]
309 | },
310 | {
311 | "cell_type": "markdown",
312 | "id": "d3c3d2e5",
313 | "metadata": {
314 | "slideshow": {
315 | "slide_type": "slide"
316 | }
317 | },
318 | "source": [
319 | "学习由`X`生成`Y`的卷积核"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 9,
325 | "id": "2b423578",
326 | "metadata": {
327 | "execution": {
328 | "iopub.execute_input": "2023-08-18T07:07:28.662260Z",
329 | "iopub.status.busy": "2023-08-18T07:07:28.661527Z",
330 | "iopub.status.idle": "2023-08-18T07:07:28.681412Z",
331 | "shell.execute_reply": "2023-08-18T07:07:28.680192Z"
332 | },
333 | "origin_pos": 24,
334 | "tab": [
335 | "pytorch"
336 | ]
337 | },
338 | "outputs": [
339 | {
340 | "name": "stdout",
341 | "output_type": "stream",
342 | "text": [
343 | "epoch 2, loss 6.422\n",
344 | "epoch 4, loss 1.225\n",
345 | "epoch 6, loss 0.266\n",
346 | "epoch 8, loss 0.070\n",
347 | "epoch 10, loss 0.022\n"
348 | ]
349 | }
350 | ],
351 | "source": [
352 | "conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), bias=False)\n",
353 | "\n",
354 | "X = X.reshape((1, 1, 6, 8))\n",
355 | "Y = Y.reshape((1, 1, 6, 7))\n",
356 | "lr = 3e-2\n",
357 | "\n",
358 | "for i in range(10):\n",
359 | " Y_hat = conv2d(X)\n",
360 | " l = (Y_hat - Y) ** 2\n",
361 | " conv2d.zero_grad()\n",
362 | " l.sum().backward()\n",
363 | " conv2d.weight.data[:] -= lr * conv2d.weight.grad\n",
364 | " if (i + 1) % 2 == 0:\n",
365 | " print(f'epoch {i+1}, loss {l.sum():.3f}')"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "id": "292614cd",
371 | "metadata": {
372 | "slideshow": {
373 | "slide_type": "slide"
374 | }
375 | },
376 | "source": [
377 | "所学的卷积核的权重张量"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 10,
383 | "id": "b40515e8",
384 | "metadata": {
385 | "execution": {
386 | "iopub.execute_input": "2023-08-18T07:07:28.684721Z",
387 | "iopub.status.busy": "2023-08-18T07:07:28.684428Z",
388 | "iopub.status.idle": "2023-08-18T07:07:28.691507Z",
389 | "shell.execute_reply": "2023-08-18T07:07:28.690512Z"
390 | },
391 | "origin_pos": 29,
392 | "tab": [
393 | "pytorch"
394 | ]
395 | },
396 | "outputs": [
397 | {
398 | "data": {
399 | "text/plain": [
400 | "tensor([[ 1.0010, -0.9739]])"
401 | ]
402 | },
403 | "execution_count": 10,
404 | "metadata": {},
405 | "output_type": "execute_result"
406 | }
407 | ],
408 | "source": [
409 | "conv2d.weight.data.reshape((1, 2))"
410 | ]
411 | }
412 | ],
413 | "metadata": {
414 | "celltoolbar": "Slideshow",
415 | "language_info": {
416 | "name": "python"
417 | },
418 | "required_libs": [],
419 | "rise": {
420 | "autolaunch": true,
421 | "enable_chalkboard": true,
422 | "overlay": "
",
423 | "scroll": true
424 | }
425 | },
426 | "nbformat": 4,
427 | "nbformat_minor": 5
428 | }
--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/padding-and-strides.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "7d4404d4",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 填充和步幅\n",
13 | "\n",
14 | "在所有侧边填充1个像素"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "ee25ca28",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:00:27.440657Z",
24 | "iopub.status.busy": "2023-08-18T07:00:27.439788Z",
25 | "iopub.status.idle": "2023-08-18T07:00:28.396461Z",
26 | "shell.execute_reply": "2023-08-18T07:00:28.395508Z"
27 | },
28 | "origin_pos": 2,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "torch.Size([8, 8])"
38 | ]
39 | },
40 | "execution_count": 1,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "import torch\n",
47 | "from torch import nn\n",
48 | "\n",
49 | "\n",
50 | "def comp_conv2d(conv2d, X):\n",
51 | " X = X.reshape((1, 1) + X.shape)\n",
52 | " Y = conv2d(X)\n",
53 | " return Y.reshape(Y.shape[2:])\n",
54 | "\n",
55 | "conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)\n",
56 | "X = torch.rand(size=(8, 8))\n",
57 | "comp_conv2d(conv2d, X).shape"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "id": "04e04824",
63 | "metadata": {
64 | "slideshow": {
65 | "slide_type": "slide"
66 | }
67 | },
68 | "source": [
69 | "填充不同的高度和宽度"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 2,
75 | "id": "5dadebb1",
76 | "metadata": {
77 | "execution": {
78 | "iopub.execute_input": "2023-08-18T07:00:28.400923Z",
79 | "iopub.status.busy": "2023-08-18T07:00:28.400085Z",
80 | "iopub.status.idle": "2023-08-18T07:00:28.406887Z",
81 | "shell.execute_reply": "2023-08-18T07:00:28.406085Z"
82 | },
83 | "origin_pos": 7,
84 | "tab": [
85 | "pytorch"
86 | ]
87 | },
88 | "outputs": [
89 | {
90 | "data": {
91 | "text/plain": [
92 | "torch.Size([8, 8])"
93 | ]
94 | },
95 | "execution_count": 2,
96 | "metadata": {},
97 | "output_type": "execute_result"
98 | }
99 | ],
100 | "source": [
101 | "conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))\n",
102 | "comp_conv2d(conv2d, X).shape"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "id": "01e7aa78",
108 | "metadata": {
109 | "slideshow": {
110 | "slide_type": "slide"
111 | }
112 | },
113 | "source": [
114 | "将高度和宽度的步幅设置为2"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 3,
120 | "id": "7b6ac278",
121 | "metadata": {
122 | "execution": {
123 | "iopub.execute_input": "2023-08-18T07:00:28.410395Z",
124 | "iopub.status.busy": "2023-08-18T07:00:28.410090Z",
125 | "iopub.status.idle": "2023-08-18T07:00:28.416621Z",
126 | "shell.execute_reply": "2023-08-18T07:00:28.415848Z"
127 | },
128 | "origin_pos": 12,
129 | "tab": [
130 | "pytorch"
131 | ]
132 | },
133 | "outputs": [
134 | {
135 | "data": {
136 | "text/plain": [
137 | "torch.Size([4, 4])"
138 | ]
139 | },
140 | "execution_count": 3,
141 | "metadata": {},
142 | "output_type": "execute_result"
143 | }
144 | ],
145 | "source": [
146 | "conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)\n",
147 | "comp_conv2d(conv2d, X).shape"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "id": "53265c61",
153 | "metadata": {
154 | "slideshow": {
155 | "slide_type": "-"
156 | }
157 | },
158 | "source": [
159 | "一个稍微复杂的例子"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 4,
165 | "id": "6f1c0e6c",
166 | "metadata": {
167 | "execution": {
168 | "iopub.execute_input": "2023-08-18T07:00:28.422070Z",
169 | "iopub.status.busy": "2023-08-18T07:00:28.421461Z",
170 | "iopub.status.idle": "2023-08-18T07:00:28.429200Z",
171 | "shell.execute_reply": "2023-08-18T07:00:28.427969Z"
172 | },
173 | "origin_pos": 17,
174 | "tab": [
175 | "pytorch"
176 | ]
177 | },
178 | "outputs": [
179 | {
180 | "data": {
181 | "text/plain": [
182 | "torch.Size([2, 2])"
183 | ]
184 | },
185 | "execution_count": 4,
186 | "metadata": {},
187 | "output_type": "execute_result"
188 | }
189 | ],
190 | "source": [
191 | "conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))\n",
192 | "comp_conv2d(conv2d, X).shape"
193 | ]
194 | }
195 | ],
196 | "metadata": {
197 | "celltoolbar": "Slideshow",
198 | "language_info": {
199 | "name": "python"
200 | },
201 | "required_libs": [],
202 | "rise": {
203 | "autolaunch": true,
204 | "enable_chalkboard": true,
205 | "overlay": "
",
206 | "scroll": true
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 5
211 | }
--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/pooling.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "a3a254cb",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 汇聚层\n",
13 | "\n",
14 | "实现汇聚层的前向传播"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "fe35adac",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:02:20.228639Z",
24 | "iopub.status.busy": "2023-08-18T07:02:20.227964Z",
25 | "iopub.status.idle": "2023-08-18T07:02:20.234155Z",
26 | "shell.execute_reply": "2023-08-18T07:02:20.233266Z"
27 | },
28 | "origin_pos": 4,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import torch\n",
36 | "from torch import nn\n",
37 | "from d2l import torch as d2l\n",
38 | "\n",
39 | "def pool2d(X, pool_size, mode='max'):\n",
40 | " p_h, p_w = pool_size\n",
41 | " Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))\n",
42 | " for i in range(Y.shape[0]):\n",
43 | " for j in range(Y.shape[1]):\n",
44 | " if mode == 'max':\n",
45 | " Y[i, j] = X[i: i + p_h, j: j + p_w].max()\n",
46 | " elif mode == 'avg':\n",
47 | " Y[i, j] = X[i: i + p_h, j: j + p_w].mean()\n",
48 | " return Y"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "id": "0cee43f1",
54 | "metadata": {
55 | "slideshow": {
56 | "slide_type": "slide"
57 | }
58 | },
59 | "source": [
60 | "验证二维最大汇聚层的输出"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": 3,
66 | "id": "3a781c85",
67 | "metadata": {
68 | "execution": {
69 | "iopub.execute_input": "2023-08-18T07:02:20.237767Z",
70 | "iopub.status.busy": "2023-08-18T07:02:20.237211Z",
71 | "iopub.status.idle": "2023-08-18T07:02:20.268065Z",
72 | "shell.execute_reply": "2023-08-18T07:02:20.267212Z"
73 | },
74 | "origin_pos": 7,
75 | "tab": [
76 | "pytorch"
77 | ]
78 | },
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/plain": [
83 | "tensor([[4., 5.],\n",
84 | " [7., 8.]])"
85 | ]
86 | },
87 | "execution_count": 3,
88 | "metadata": {},
89 | "output_type": "execute_result"
90 | }
91 | ],
92 | "source": [
93 | "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n",
94 | "pool2d(X, (2, 2))"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "id": "2a7b5d65",
100 | "metadata": {
101 | "slideshow": {
102 | "slide_type": "-"
103 | }
104 | },
105 | "source": [
106 | "验证平均汇聚层"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 4,
112 | "id": "4f9a1ffd",
113 | "metadata": {
114 | "execution": {
115 | "iopub.execute_input": "2023-08-18T07:02:20.272001Z",
116 | "iopub.status.busy": "2023-08-18T07:02:20.271411Z",
117 | "iopub.status.idle": "2023-08-18T07:02:20.277849Z",
118 | "shell.execute_reply": "2023-08-18T07:02:20.276928Z"
119 | },
120 | "origin_pos": 9,
121 | "tab": [
122 | "pytorch"
123 | ]
124 | },
125 | "outputs": [
126 | {
127 | "data": {
128 | "text/plain": [
129 | "tensor([[2., 3.],\n",
130 | " [5., 6.]])"
131 | ]
132 | },
133 | "execution_count": 4,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "pool2d(X, (2, 2), 'avg')"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "id": "941b8124",
145 | "metadata": {
146 | "slideshow": {
147 | "slide_type": "slide"
148 | }
149 | },
150 | "source": [
151 | "填充和步幅"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 5,
157 | "id": "140d08f5",
158 | "metadata": {
159 | "execution": {
160 | "iopub.execute_input": "2023-08-18T07:02:20.281458Z",
161 | "iopub.status.busy": "2023-08-18T07:02:20.280874Z",
162 | "iopub.status.idle": "2023-08-18T07:02:20.287391Z",
163 | "shell.execute_reply": "2023-08-18T07:02:20.286578Z"
164 | },
165 | "origin_pos": 12,
166 | "tab": [
167 | "pytorch"
168 | ]
169 | },
170 | "outputs": [
171 | {
172 | "data": {
173 | "text/plain": [
174 | "tensor([[[[ 0., 1., 2., 3.],\n",
175 | " [ 4., 5., 6., 7.],\n",
176 | " [ 8., 9., 10., 11.],\n",
177 | " [12., 13., 14., 15.]]]])"
178 | ]
179 | },
180 | "execution_count": 5,
181 | "metadata": {},
182 | "output_type": "execute_result"
183 | }
184 | ],
185 | "source": [
186 | "X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))\n",
187 | "X"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "id": "43710341",
193 | "metadata": {
194 | "slideshow": {
195 | "slide_type": "-"
196 | }
197 | },
198 | "source": [
199 | "深度学习框架中的步幅与汇聚窗口的大小相同"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 6,
205 | "id": "a3cc01e3",
206 | "metadata": {
207 | "execution": {
208 | "iopub.execute_input": "2023-08-18T07:02:20.291052Z",
209 | "iopub.status.busy": "2023-08-18T07:02:20.290402Z",
210 | "iopub.status.idle": "2023-08-18T07:02:20.296276Z",
211 | "shell.execute_reply": "2023-08-18T07:02:20.295476Z"
212 | },
213 | "origin_pos": 17,
214 | "tab": [
215 | "pytorch"
216 | ]
217 | },
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/plain": [
222 | "tensor([[[[10.]]]])"
223 | ]
224 | },
225 | "execution_count": 6,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "pool2d = nn.MaxPool2d(3)\n",
232 | "pool2d(X)"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "id": "4b86f339",
238 | "metadata": {
239 | "slideshow": {
240 | "slide_type": "slide"
241 | }
242 | },
243 | "source": [
244 | "填充和步幅可以手动设定"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": 7,
250 | "id": "9c247428",
251 | "metadata": {
252 | "execution": {
253 | "iopub.execute_input": "2023-08-18T07:02:20.299965Z",
254 | "iopub.status.busy": "2023-08-18T07:02:20.299310Z",
255 | "iopub.status.idle": "2023-08-18T07:02:20.307455Z",
256 | "shell.execute_reply": "2023-08-18T07:02:20.306477Z"
257 | },
258 | "origin_pos": 22,
259 | "tab": [
260 | "pytorch"
261 | ]
262 | },
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": [
267 | "tensor([[[[ 5., 7.],\n",
268 | " [13., 15.]]]])"
269 | ]
270 | },
271 | "execution_count": 7,
272 | "metadata": {},
273 | "output_type": "execute_result"
274 | }
275 | ],
276 | "source": [
277 | "pool2d = nn.MaxPool2d(3, padding=1, stride=2)\n",
278 | "pool2d(X)"
279 | ]
280 | },
281 | {
282 | "cell_type": "markdown",
283 | "id": "7295d3e3",
284 | "metadata": {
285 | "slideshow": {
286 | "slide_type": "-"
287 | }
288 | },
289 | "source": [
290 | "设定一个任意大小的矩形汇聚窗口,并分别设定填充和步幅的高度和宽度"
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": 8,
296 | "id": "7c169b2f",
297 | "metadata": {
298 | "execution": {
299 | "iopub.execute_input": "2023-08-18T07:02:20.311794Z",
300 | "iopub.status.busy": "2023-08-18T07:02:20.311492Z",
301 | "iopub.status.idle": "2023-08-18T07:02:20.320399Z",
302 | "shell.execute_reply": "2023-08-18T07:02:20.319108Z"
303 | },
304 | "origin_pos": 30,
305 | "tab": [
306 | "pytorch"
307 | ]
308 | },
309 | "outputs": [
310 | {
311 | "data": {
312 | "text/plain": [
313 | "tensor([[[[ 5., 7.],\n",
314 | " [13., 15.]]]])"
315 | ]
316 | },
317 | "execution_count": 8,
318 | "metadata": {},
319 | "output_type": "execute_result"
320 | }
321 | ],
322 | "source": [
323 | "pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))\n",
324 | "pool2d(X)"
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "id": "daa999f2",
330 | "metadata": {
331 | "slideshow": {
332 | "slide_type": "slide"
333 | }
334 | },
335 | "source": [
336 | "汇聚层在每个输入通道上单独运算"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 9,
342 | "id": "c0a30a7f",
343 | "metadata": {
344 | "execution": {
345 | "iopub.execute_input": "2023-08-18T07:02:20.325617Z",
346 | "iopub.status.busy": "2023-08-18T07:02:20.324879Z",
347 | "iopub.status.idle": "2023-08-18T07:02:20.335303Z",
348 | "shell.execute_reply": "2023-08-18T07:02:20.334055Z"
349 | },
350 | "origin_pos": 35,
351 | "tab": [
352 | "pytorch"
353 | ]
354 | },
355 | "outputs": [
356 | {
357 | "data": {
358 | "text/plain": [
359 | "tensor([[[[ 0., 1., 2., 3.],\n",
360 | " [ 4., 5., 6., 7.],\n",
361 | " [ 8., 9., 10., 11.],\n",
362 | " [12., 13., 14., 15.]],\n",
363 | "\n",
364 | " [[ 1., 2., 3., 4.],\n",
365 | " [ 5., 6., 7., 8.],\n",
366 | " [ 9., 10., 11., 12.],\n",
367 | " [13., 14., 15., 16.]]]])"
368 | ]
369 | },
370 | "execution_count": 9,
371 | "metadata": {},
372 | "output_type": "execute_result"
373 | }
374 | ],
375 | "source": [
376 | "X = torch.cat((X, X + 1), 1)\n",
377 | "X"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 10,
383 | "id": "e534c8f3",
384 | "metadata": {
385 | "execution": {
386 | "iopub.execute_input": "2023-08-18T07:02:20.340529Z",
387 | "iopub.status.busy": "2023-08-18T07:02:20.339767Z",
388 | "iopub.status.idle": "2023-08-18T07:02:20.349365Z",
389 | "shell.execute_reply": "2023-08-18T07:02:20.348159Z"
390 | },
391 | "origin_pos": 39,
392 | "tab": [
393 | "pytorch"
394 | ]
395 | },
396 | "outputs": [
397 | {
398 | "data": {
399 | "text/plain": [
400 | "tensor([[[[ 5., 7.],\n",
401 | " [13., 15.]],\n",
402 | "\n",
403 | " [[ 6., 8.],\n",
404 | " [14., 16.]]]])"
405 | ]
406 | },
407 | "execution_count": 10,
408 | "metadata": {},
409 | "output_type": "execute_result"
410 | }
411 | ],
412 | "source": [
413 | "pool2d = nn.MaxPool2d(3, padding=1, stride=2)\n",
414 | "pool2d(X)"
415 | ]
416 | }
417 | ],
418 | "metadata": {
419 | "celltoolbar": "Slideshow",
420 | "language_info": {
421 | "name": "python"
422 | },
423 | "required_libs": [],
424 | "rise": {
425 | "autolaunch": true,
426 | "enable_chalkboard": true,
427 | "overlay": "
",
428 | "scroll": true
429 | }
430 | },
431 | "nbformat": 4,
432 | "nbformat_minor": 5
433 | }
--------------------------------------------------------------------------------
/chapter_convolutional-neural-networks/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_deep-learning-computation/custom-layer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "e91e1a31",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 自定义层\n",
13 | "\n",
14 | "构造一个没有任何参数的自定义层"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "dec68045",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:07:17.497408Z",
24 | "iopub.status.busy": "2023-08-18T07:07:17.497077Z",
25 | "iopub.status.idle": "2023-08-18T07:07:17.508357Z",
26 | "shell.execute_reply": "2023-08-18T07:07:17.507175Z"
27 | },
28 | "origin_pos": 7,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "tensor([-2., -1., 0., 1., 2.])"
38 | ]
39 | },
40 | "execution_count": 2,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "import torch\n",
47 | "import torch.nn.functional as F\n",
48 | "from torch import nn\n",
49 | "\n",
50 | "\n",
51 | "class CenteredLayer(nn.Module):\n",
52 | " def __init__(self):\n",
53 | " super().__init__()\n",
54 | "\n",
55 | " def forward(self, X):\n",
56 | " return X - X.mean()\n",
57 | "\n",
58 | "layer = CenteredLayer()\n",
59 | "layer(torch.FloatTensor([1, 2, 3, 4, 5]))"
60 | ]
61 | },
62 | {
63 | "cell_type": "markdown",
64 | "id": "05f8a223",
65 | "metadata": {
66 | "slideshow": {
67 | "slide_type": "slide"
68 | }
69 | },
70 | "source": [
71 | "将层作为组件合并到更复杂的模型中"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": 4,
77 | "id": "6ab302a0",
78 | "metadata": {
79 | "execution": {
80 | "iopub.execute_input": "2023-08-18T07:07:17.523517Z",
81 | "iopub.status.busy": "2023-08-18T07:07:17.523140Z",
82 | "iopub.status.idle": "2023-08-18T07:07:17.534718Z",
83 | "shell.execute_reply": "2023-08-18T07:07:17.533593Z"
84 | },
85 | "origin_pos": 16,
86 | "tab": [
87 | "pytorch"
88 | ]
89 | },
90 | "outputs": [
91 | {
92 | "data": {
93 | "text/plain": [
94 | "tensor(7.4506e-09, grad_fn=)"
95 | ]
96 | },
97 | "execution_count": 4,
98 | "metadata": {},
99 | "output_type": "execute_result"
100 | }
101 | ],
102 | "source": [
103 | "net = nn.Sequential(nn.Linear(8, 128), CenteredLayer())\n",
104 | "\n",
105 | "Y = net(torch.rand(4, 8))\n",
106 | "Y.mean()"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "id": "568d0d74",
112 | "metadata": {
113 | "slideshow": {
114 | "slide_type": "slide"
115 | }
116 | },
117 | "source": [
118 | "带参数的层"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 6,
124 | "id": "4490005a",
125 | "metadata": {
126 | "execution": {
127 | "iopub.execute_input": "2023-08-18T07:07:17.550522Z",
128 | "iopub.status.busy": "2023-08-18T07:07:17.550152Z",
129 | "iopub.status.idle": "2023-08-18T07:07:17.558364Z",
130 | "shell.execute_reply": "2023-08-18T07:07:17.557338Z"
131 | },
132 | "origin_pos": 28,
133 | "tab": [
134 | "pytorch"
135 | ]
136 | },
137 | "outputs": [
138 | {
139 | "data": {
140 | "text/plain": [
141 | "Parameter containing:\n",
142 | "tensor([[ 0.1775, -1.4539, 0.3972],\n",
143 | " [-0.1339, 0.5273, 1.3041],\n",
144 | " [-0.3327, -0.2337, -0.6334],\n",
145 | " [ 1.2076, -0.3937, 0.6851],\n",
146 | " [-0.4716, 0.0894, -0.9195]], requires_grad=True)"
147 | ]
148 | },
149 | "execution_count": 6,
150 | "metadata": {},
151 | "output_type": "execute_result"
152 | }
153 | ],
154 | "source": [
155 | "class MyLinear(nn.Module):\n",
156 | " def __init__(self, in_units, units):\n",
157 | " super().__init__()\n",
158 | " self.weight = nn.Parameter(torch.randn(in_units, units))\n",
159 | " self.bias = nn.Parameter(torch.randn(units,))\n",
160 | " def forward(self, X):\n",
161 | " linear = torch.matmul(X, self.weight.data) + self.bias.data\n",
162 | " return F.relu(linear)\n",
163 | "\n",
164 | "linear = MyLinear(5, 3)\n",
165 | "linear.weight"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "id": "9c8d6fca",
171 | "metadata": {
172 | "slideshow": {
173 | "slide_type": "slide"
174 | }
175 | },
176 | "source": [
177 | "使用自定义层直接执行前向传播计算"
178 | ]
179 | },
180 | {
181 | "cell_type": "code",
182 | "execution_count": 7,
183 | "id": "25f2aabf",
184 | "metadata": {
185 | "execution": {
186 | "iopub.execute_input": "2023-08-18T07:07:17.562706Z",
187 | "iopub.status.busy": "2023-08-18T07:07:17.562337Z",
188 | "iopub.status.idle": "2023-08-18T07:07:17.570015Z",
189 | "shell.execute_reply": "2023-08-18T07:07:17.568916Z"
190 | },
191 | "origin_pos": 32,
192 | "tab": [
193 | "pytorch"
194 | ]
195 | },
196 | "outputs": [
197 | {
198 | "data": {
199 | "text/plain": [
200 | "tensor([[0., 0., 0.],\n",
201 | " [0., 0., 0.]])"
202 | ]
203 | },
204 | "execution_count": 7,
205 | "metadata": {},
206 | "output_type": "execute_result"
207 | }
208 | ],
209 | "source": [
210 | "linear(torch.rand(2, 5))"
211 | ]
212 | },
213 | {
214 | "cell_type": "markdown",
215 | "id": "393a154e",
216 | "metadata": {
217 | "slideshow": {
218 | "slide_type": "-"
219 | }
220 | },
221 | "source": [
222 | "使用自定义层构建模型"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 8,
228 | "id": "fb2953e8",
229 | "metadata": {
230 | "execution": {
231 | "iopub.execute_input": "2023-08-18T07:07:17.574378Z",
232 | "iopub.status.busy": "2023-08-18T07:07:17.574000Z",
233 | "iopub.status.idle": "2023-08-18T07:07:17.582792Z",
234 | "shell.execute_reply": "2023-08-18T07:07:17.581735Z"
235 | },
236 | "origin_pos": 37,
237 | "tab": [
238 | "pytorch"
239 | ]
240 | },
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/plain": [
245 | "tensor([[0.],\n",
246 | " [0.]])"
247 | ]
248 | },
249 | "execution_count": 8,
250 | "metadata": {},
251 | "output_type": "execute_result"
252 | }
253 | ],
254 | "source": [
255 | "net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))\n",
256 | "net(torch.rand(2, 64))"
257 | ]
258 | }
259 | ],
260 | "metadata": {
261 | "celltoolbar": "Slideshow",
262 | "language_info": {
263 | "name": "python"
264 | },
265 | "required_libs": [],
266 | "rise": {
267 | "autolaunch": true,
268 | "enable_chalkboard": true,
269 | "overlay": "
",
270 | "scroll": true
271 | }
272 | },
273 | "nbformat": 4,
274 | "nbformat_minor": 5
275 | }
--------------------------------------------------------------------------------
/chapter_deep-learning-computation/model-construction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "8d628b46",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 层和块\n",
13 | "\n",
14 | "我们先回顾一下多层感知机"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "9895e279",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T06:57:00.244437Z",
24 | "iopub.status.busy": "2023-08-18T06:57:00.243813Z",
25 | "iopub.status.idle": "2023-08-18T06:57:01.320999Z",
26 | "shell.execute_reply": "2023-08-18T06:57:01.320186Z"
27 | },
28 | "origin_pos": 2,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "tensor([[ 0.0343, 0.0264, 0.2505, -0.0243, 0.0945, 0.0012, -0.0141, 0.0666,\n",
38 | " -0.0547, -0.0667],\n",
39 | " [ 0.0772, -0.0274, 0.2638, -0.0191, 0.0394, -0.0324, 0.0102, 0.0707,\n",
40 | " -0.1481, -0.1031]], grad_fn=)"
41 | ]
42 | },
43 | "execution_count": 1,
44 | "metadata": {},
45 | "output_type": "execute_result"
46 | }
47 | ],
48 | "source": [
49 | "import torch\n",
50 | "from torch import nn\n",
51 | "from torch.nn import functional as F\n",
52 | "\n",
53 | "net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))\n",
54 | "\n",
55 | "X = torch.rand(2, 20)\n",
56 | "net(X)"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "id": "389483a7",
62 | "metadata": {
63 | "slideshow": {
64 | "slide_type": "-"
65 | }
66 | },
67 | "source": [
68 | "`nn.Sequential`定义了一种特殊的`Module`"
69 | ]
70 | },
71 | {
72 | "cell_type": "markdown",
73 | "id": "c9adf2a5",
74 | "metadata": {
75 | "slideshow": {
76 | "slide_type": "slide"
77 | }
78 | },
79 | "source": [
80 | "自定义块"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 2,
86 | "id": "876df867",
87 | "metadata": {
88 | "execution": {
89 | "iopub.execute_input": "2023-08-18T06:57:01.325541Z",
90 | "iopub.status.busy": "2023-08-18T06:57:01.324828Z",
91 | "iopub.status.idle": "2023-08-18T06:57:01.330411Z",
92 | "shell.execute_reply": "2023-08-18T06:57:01.329591Z"
93 | },
94 | "origin_pos": 14,
95 | "tab": [
96 | "pytorch"
97 | ]
98 | },
99 | "outputs": [],
100 | "source": [
101 | "class MLP(nn.Module):\n",
102 | " def __init__(self):\n",
103 | " super().__init__()\n",
104 | " self.hidden = nn.Linear(20, 256)\n",
105 | " self.out = nn.Linear(256, 10)\n",
106 | "\n",
107 | " def forward(self, X):\n",
108 | " return self.out(F.relu(self.hidden(X)))"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "id": "d63bddd3",
114 | "metadata": {
115 | "slideshow": {
116 | "slide_type": "slide"
117 | }
118 | },
119 | "source": [
120 | "实例化多层感知机的层,然后在每次调用前向传播函数时调用这些层"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 3,
126 | "id": "f7a34ec3",
127 | "metadata": {
128 | "execution": {
129 | "iopub.execute_input": "2023-08-18T06:57:01.334346Z",
130 | "iopub.status.busy": "2023-08-18T06:57:01.333603Z",
131 | "iopub.status.idle": "2023-08-18T06:57:01.340473Z",
132 | "shell.execute_reply": "2023-08-18T06:57:01.339676Z"
133 | },
134 | "origin_pos": 19,
135 | "tab": [
136 | "pytorch"
137 | ]
138 | },
139 | "outputs": [
140 | {
141 | "data": {
142 | "text/plain": [
143 | "tensor([[ 0.0669, 0.2202, -0.0912, -0.0064, 0.1474, -0.0577, -0.3006, 0.1256,\n",
144 | " -0.0280, 0.4040],\n",
145 | " [ 0.0545, 0.2591, -0.0297, 0.1141, 0.1887, 0.0094, -0.2686, 0.0732,\n",
146 | " -0.0135, 0.3865]], grad_fn=)"
147 | ]
148 | },
149 | "execution_count": 3,
150 | "metadata": {},
151 | "output_type": "execute_result"
152 | }
153 | ],
154 | "source": [
155 | "net = MLP()\n",
156 | "net(X)"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "id": "584a9ee2",
162 | "metadata": {
163 | "slideshow": {
164 | "slide_type": "slide"
165 | }
166 | },
167 | "source": [
168 | "顺序块"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 5,
174 | "id": "9672de9a",
175 | "metadata": {
176 | "execution": {
177 | "iopub.execute_input": "2023-08-18T06:57:01.353302Z",
178 | "iopub.status.busy": "2023-08-18T06:57:01.352727Z",
179 | "iopub.status.idle": "2023-08-18T06:57:01.360268Z",
180 | "shell.execute_reply": "2023-08-18T06:57:01.359462Z"
181 | },
182 | "origin_pos": 31,
183 | "tab": [
184 | "pytorch"
185 | ]
186 | },
187 | "outputs": [
188 | {
189 | "data": {
190 | "text/plain": [
191 | "tensor([[ 2.2759e-01, -4.7003e-02, 4.2846e-01, -1.2546e-01, 1.5296e-01,\n",
192 | " 1.8972e-01, 9.7048e-02, 4.5479e-04, -3.7986e-02, 6.4842e-02],\n",
193 | " [ 2.7825e-01, -9.7517e-02, 4.8541e-01, -2.4519e-01, -8.4580e-02,\n",
194 | " 2.8538e-01, 3.6861e-02, 2.9411e-02, -1.0612e-01, 1.2620e-01]],\n",
195 | " grad_fn=)"
196 | ]
197 | },
198 | "execution_count": 5,
199 | "metadata": {},
200 | "output_type": "execute_result"
201 | }
202 | ],
203 | "source": [
204 | "class MySequential(nn.Module):\n",
205 | " def __init__(self, *args):\n",
206 | " super().__init__()\n",
207 | " for idx, module in enumerate(args):\n",
208 | " self._modules[str(idx)] = module\n",
209 | "\n",
210 | " def forward(self, X):\n",
211 | " for block in self._modules.values():\n",
212 | " X = block(X)\n",
213 | " return X\n",
214 | "\n",
215 | "net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))\n",
216 | "net(X)"
217 | ]
218 | },
219 | {
220 | "cell_type": "markdown",
221 | "id": "3ce57d60",
222 | "metadata": {
223 | "slideshow": {
224 | "slide_type": "slide"
225 | }
226 | },
227 | "source": [
228 | "在前向传播函数中执行代码"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": 7,
234 | "id": "00ebc567",
235 | "metadata": {
236 | "execution": {
237 | "iopub.execute_input": "2023-08-18T06:57:01.373508Z",
238 | "iopub.status.busy": "2023-08-18T06:57:01.372789Z",
239 | "iopub.status.idle": "2023-08-18T06:57:01.380049Z",
240 | "shell.execute_reply": "2023-08-18T06:57:01.379025Z"
241 | },
242 | "origin_pos": 40,
243 | "tab": [
244 | "pytorch"
245 | ]
246 | },
247 | "outputs": [
248 | {
249 | "data": {
250 | "text/plain": [
251 | "tensor(0.1862, grad_fn=)"
252 | ]
253 | },
254 | "execution_count": 7,
255 | "metadata": {},
256 | "output_type": "execute_result"
257 | }
258 | ],
259 | "source": [
260 | "class FixedHiddenMLP(nn.Module):\n",
261 | " def __init__(self):\n",
262 | " super().__init__()\n",
263 | " self.rand_weight = torch.rand((20, 20), requires_grad=False)\n",
264 | " self.linear = nn.Linear(20, 20)\n",
265 | "\n",
266 | " def forward(self, X):\n",
267 | " X = self.linear(X)\n",
268 | " X = F.relu(torch.mm(X, self.rand_weight) + 1)\n",
269 | " X = self.linear(X)\n",
270 | " while X.abs().sum() > 1:\n",
271 | " X /= 2\n",
272 | " return X.sum()\n",
273 | "\n",
274 | "net = FixedHiddenMLP()\n",
275 | "net(X)"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "id": "053d1e5a",
281 | "metadata": {
282 | "slideshow": {
283 | "slide_type": "slide"
284 | }
285 | },
286 | "source": [
287 | "混合搭配各种组合块的方法"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 8,
293 | "id": "6ca3b399",
294 | "metadata": {
295 | "execution": {
296 | "iopub.execute_input": "2023-08-18T06:57:01.384091Z",
297 | "iopub.status.busy": "2023-08-18T06:57:01.383236Z",
298 | "iopub.status.idle": "2023-08-18T06:57:01.394649Z",
299 | "shell.execute_reply": "2023-08-18T06:57:01.393535Z"
300 | },
301 | "origin_pos": 43,
302 | "tab": [
303 | "pytorch"
304 | ]
305 | },
306 | "outputs": [
307 | {
308 | "data": {
309 | "text/plain": [
310 | "tensor(0.2183, grad_fn=)"
311 | ]
312 | },
313 | "execution_count": 8,
314 | "metadata": {},
315 | "output_type": "execute_result"
316 | }
317 | ],
318 | "source": [
319 | "class NestMLP(nn.Module):\n",
320 | " def __init__(self):\n",
321 | " super().__init__()\n",
322 | " self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),\n",
323 | " nn.Linear(64, 32), nn.ReLU())\n",
324 | " self.linear = nn.Linear(32, 16)\n",
325 | "\n",
326 | " def forward(self, X):\n",
327 | " return self.linear(self.net(X))\n",
328 | "\n",
329 | "chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())\n",
330 | "chimera(X)"
331 | ]
332 | }
333 | ],
334 | "metadata": {
335 | "celltoolbar": "Slideshow",
336 | "language_info": {
337 | "name": "python"
338 | },
339 | "required_libs": [],
340 | "rise": {
341 | "autolaunch": true,
342 | "enable_chalkboard": true,
343 | "overlay": "
",
344 | "scroll": true
345 | }
346 | },
347 | "nbformat": 4,
348 | "nbformat_minor": 5
349 | }
--------------------------------------------------------------------------------
/chapter_deep-learning-computation/parameters.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "23850d90",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 参数管理\n",
13 | "\n",
14 | "我们首先看一下具有单隐藏层的多层感知机"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "ab7ef7a0",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:01:09.649068Z",
24 | "iopub.status.busy": "2023-08-18T07:01:09.648305Z",
25 | "iopub.status.idle": "2023-08-18T07:01:10.928992Z",
26 | "shell.execute_reply": "2023-08-18T07:01:10.927959Z"
27 | },
28 | "origin_pos": 2,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "tensor([[-0.0970],\n",
38 | " [-0.0827]], grad_fn=)"
39 | ]
40 | },
41 | "execution_count": 1,
42 | "metadata": {},
43 | "output_type": "execute_result"
44 | }
45 | ],
46 | "source": [
47 | "import torch\n",
48 | "from torch import nn\n",
49 | "\n",
50 | "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))\n",
51 | "X = torch.rand(size=(2, 4))\n",
52 | "net(X)"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "id": "2dbaff55",
58 | "metadata": {
59 | "slideshow": {
60 | "slide_type": "slide"
61 | }
62 | },
63 | "source": [
64 | "参数访问"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 2,
70 | "id": "5e2fff9a",
71 | "metadata": {
72 | "execution": {
73 | "iopub.execute_input": "2023-08-18T07:01:10.933865Z",
74 | "iopub.status.busy": "2023-08-18T07:01:10.933267Z",
75 | "iopub.status.idle": "2023-08-18T07:01:10.939922Z",
76 | "shell.execute_reply": "2023-08-18T07:01:10.938931Z"
77 | },
78 | "origin_pos": 7,
79 | "tab": [
80 | "pytorch"
81 | ]
82 | },
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "OrderedDict([('weight', tensor([[-0.0427, -0.2939, -0.1894, 0.0220, -0.1709, -0.1522, -0.0334, -0.2263]])), ('bias', tensor([0.0887]))])\n"
89 | ]
90 | }
91 | ],
92 | "source": [
93 | "print(net[2].state_dict())"
94 | ]
95 | },
96 | {
97 | "cell_type": "markdown",
98 | "id": "f4e174dc",
99 | "metadata": {
100 | "slideshow": {
101 | "slide_type": "slide"
102 | }
103 | },
104 | "source": [
105 | "目标参数"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 3,
111 | "id": "d0682fff",
112 | "metadata": {
113 | "execution": {
114 | "iopub.execute_input": "2023-08-18T07:01:10.945104Z",
115 | "iopub.status.busy": "2023-08-18T07:01:10.944250Z",
116 | "iopub.status.idle": "2023-08-18T07:01:10.951764Z",
117 | "shell.execute_reply": "2023-08-18T07:01:10.950790Z"
118 | },
119 | "origin_pos": 11,
120 | "tab": [
121 | "pytorch"
122 | ]
123 | },
124 | "outputs": [
125 | {
126 | "name": "stdout",
127 | "output_type": "stream",
128 | "text": [
129 | "\n",
130 | "Parameter containing:\n",
131 | "tensor([0.0887], requires_grad=True)\n",
132 | "tensor([0.0887])\n"
133 | ]
134 | }
135 | ],
136 | "source": [
137 | "print(type(net[2].bias))\n",
138 | "print(net[2].bias)\n",
139 | "print(net[2].bias.data)"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 4,
145 | "id": "3cf4d55b",
146 | "metadata": {
147 | "execution": {
148 | "iopub.execute_input": "2023-08-18T07:01:10.956378Z",
149 | "iopub.status.busy": "2023-08-18T07:01:10.955542Z",
150 | "iopub.status.idle": "2023-08-18T07:01:10.961810Z",
151 | "shell.execute_reply": "2023-08-18T07:01:10.960767Z"
152 | },
153 | "origin_pos": 16,
154 | "tab": [
155 | "pytorch"
156 | ]
157 | },
158 | "outputs": [
159 | {
160 | "data": {
161 | "text/plain": [
162 | "True"
163 | ]
164 | },
165 | "execution_count": 4,
166 | "metadata": {},
167 | "output_type": "execute_result"
168 | }
169 | ],
170 | "source": [
171 | "net[2].weight.grad == None"
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "id": "170b54ab",
177 | "metadata": {
178 | "slideshow": {
179 | "slide_type": "slide"
180 | }
181 | },
182 | "source": [
183 | "一次性访问所有参数"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 5,
189 | "id": "916939ce",
190 | "metadata": {
191 | "execution": {
192 | "iopub.execute_input": "2023-08-18T07:01:10.966725Z",
193 | "iopub.status.busy": "2023-08-18T07:01:10.965969Z",
194 | "iopub.status.idle": "2023-08-18T07:01:10.972600Z",
195 | "shell.execute_reply": "2023-08-18T07:01:10.971655Z"
196 | },
197 | "origin_pos": 19,
198 | "tab": [
199 | "pytorch"
200 | ]
201 | },
202 | "outputs": [
203 | {
204 | "name": "stdout",
205 | "output_type": "stream",
206 | "text": [
207 | "('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))\n",
208 | "('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))\n"
209 | ]
210 | }
211 | ],
212 | "source": [
213 | "print(*[(name, param.shape) for name, param in net[0].named_parameters()])\n",
214 | "print(*[(name, param.shape) for name, param in net.named_parameters()])"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 6,
220 | "id": "116207ef",
221 | "metadata": {
222 | "execution": {
223 | "iopub.execute_input": "2023-08-18T07:01:10.977269Z",
224 | "iopub.status.busy": "2023-08-18T07:01:10.976623Z",
225 | "iopub.status.idle": "2023-08-18T07:01:10.983222Z",
226 | "shell.execute_reply": "2023-08-18T07:01:10.982309Z"
227 | },
228 | "origin_pos": 23,
229 | "tab": [
230 | "pytorch"
231 | ]
232 | },
233 | "outputs": [
234 | {
235 | "data": {
236 | "text/plain": [
237 | "tensor([0.0887])"
238 | ]
239 | },
240 | "execution_count": 6,
241 | "metadata": {},
242 | "output_type": "execute_result"
243 | }
244 | ],
245 | "source": [
246 | "net.state_dict()['2.bias'].data"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "id": "707279d0",
252 | "metadata": {
253 | "slideshow": {
254 | "slide_type": "slide"
255 | }
256 | },
257 | "source": [
258 | "从嵌套块收集参数"
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 7,
264 | "id": "712e31fd",
265 | "metadata": {
266 | "execution": {
267 | "iopub.execute_input": "2023-08-18T07:01:10.988088Z",
268 | "iopub.status.busy": "2023-08-18T07:01:10.987352Z",
269 | "iopub.status.idle": "2023-08-18T07:01:10.998245Z",
270 | "shell.execute_reply": "2023-08-18T07:01:10.997197Z"
271 | },
272 | "origin_pos": 28,
273 | "tab": [
274 | "pytorch"
275 | ]
276 | },
277 | "outputs": [
278 | {
279 | "data": {
280 | "text/plain": [
281 | "tensor([[0.2596],\n",
282 | " [0.2596]], grad_fn=)"
283 | ]
284 | },
285 | "execution_count": 7,
286 | "metadata": {},
287 | "output_type": "execute_result"
288 | }
289 | ],
290 | "source": [
291 | "def block1():\n",
292 | " return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n",
293 | " nn.Linear(8, 4), nn.ReLU())\n",
294 | "\n",
295 | "def block2():\n",
296 | " net = nn.Sequential()\n",
297 | " for i in range(4):\n",
298 | " net.add_module(f'block {i}', block1())\n",
299 | " return net\n",
300 | "\n",
301 | "rgnet = nn.Sequential(block2(), nn.Linear(4, 1))\n",
302 | "rgnet(X)"
303 | ]
304 | },
305 | {
306 | "cell_type": "markdown",
307 | "id": "df7a2644",
308 | "metadata": {
309 | "slideshow": {
310 | "slide_type": "slide"
311 | }
312 | },
313 | "source": [
314 | "设计了网络后,我们看看它是如何工作的"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 8,
320 | "id": "c7d7717d",
321 | "metadata": {
322 | "execution": {
323 | "iopub.execute_input": "2023-08-18T07:01:11.002889Z",
324 | "iopub.status.busy": "2023-08-18T07:01:11.002264Z",
325 | "iopub.status.idle": "2023-08-18T07:01:11.007643Z",
326 | "shell.execute_reply": "2023-08-18T07:01:11.006464Z"
327 | },
328 | "origin_pos": 33,
329 | "tab": [
330 | "pytorch"
331 | ]
332 | },
333 | "outputs": [
334 | {
335 | "name": "stdout",
336 | "output_type": "stream",
337 | "text": [
338 | "Sequential(\n",
339 | " (0): Sequential(\n",
340 | " (block 0): Sequential(\n",
341 | " (0): Linear(in_features=4, out_features=8, bias=True)\n",
342 | " (1): ReLU()\n",
343 | " (2): Linear(in_features=8, out_features=4, bias=True)\n",
344 | " (3): ReLU()\n",
345 | " )\n",
346 | " (block 1): Sequential(\n",
347 | " (0): Linear(in_features=4, out_features=8, bias=True)\n",
348 | " (1): ReLU()\n",
349 | " (2): Linear(in_features=8, out_features=4, bias=True)\n",
350 | " (3): ReLU()\n",
351 | " )\n",
352 | " (block 2): Sequential(\n",
353 | " (0): Linear(in_features=4, out_features=8, bias=True)\n",
354 | " (1): ReLU()\n",
355 | " (2): Linear(in_features=8, out_features=4, bias=True)\n",
356 | " (3): ReLU()\n",
357 | " )\n",
358 | " (block 3): Sequential(\n",
359 | " (0): Linear(in_features=4, out_features=8, bias=True)\n",
360 | " (1): ReLU()\n",
361 | " (2): Linear(in_features=8, out_features=4, bias=True)\n",
362 | " (3): ReLU()\n",
363 | " )\n",
364 | " )\n",
365 | " (1): Linear(in_features=4, out_features=1, bias=True)\n",
366 | ")\n"
367 | ]
368 | }
369 | ],
370 | "source": [
371 | "print(rgnet)"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 9,
377 | "id": "939ba4d3",
378 | "metadata": {
379 | "execution": {
380 | "iopub.execute_input": "2023-08-18T07:01:11.012522Z",
381 | "iopub.status.busy": "2023-08-18T07:01:11.011839Z",
382 | "iopub.status.idle": "2023-08-18T07:01:11.018508Z",
383 | "shell.execute_reply": "2023-08-18T07:01:11.017590Z"
384 | },
385 | "origin_pos": 37,
386 | "tab": [
387 | "pytorch"
388 | ]
389 | },
390 | "outputs": [
391 | {
392 | "data": {
393 | "text/plain": [
394 | "tensor([ 0.1999, -0.4073, -0.1200, -0.2033, -0.1573, 0.3546, -0.2141, -0.2483])"
395 | ]
396 | },
397 | "execution_count": 9,
398 | "metadata": {},
399 | "output_type": "execute_result"
400 | }
401 | ],
402 | "source": [
403 | "rgnet[0][1][0].bias.data"
404 | ]
405 | },
406 | {
407 | "cell_type": "markdown",
408 | "id": "77b45fbb",
409 | "metadata": {
410 | "slideshow": {
411 | "slide_type": "slide"
412 | }
413 | },
414 | "source": [
415 | "内置初始化"
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": 10,
421 | "id": "2f00d5e7",
422 | "metadata": {
423 | "execution": {
424 | "iopub.execute_input": "2023-08-18T07:01:11.023955Z",
425 | "iopub.status.busy": "2023-08-18T07:01:11.023046Z",
426 | "iopub.status.idle": "2023-08-18T07:01:11.033287Z",
427 | "shell.execute_reply": "2023-08-18T07:01:11.032096Z"
428 | },
429 | "origin_pos": 47,
430 | "tab": [
431 | "pytorch"
432 | ]
433 | },
434 | "outputs": [
435 | {
436 | "data": {
437 | "text/plain": [
438 | "(tensor([-0.0214, -0.0015, -0.0100, -0.0058]), tensor(0.))"
439 | ]
440 | },
441 | "execution_count": 10,
442 | "metadata": {},
443 | "output_type": "execute_result"
444 | }
445 | ],
446 | "source": [
447 | "def init_normal(m):\n",
448 | " if type(m) == nn.Linear:\n",
449 | " nn.init.normal_(m.weight, mean=0, std=0.01)\n",
450 | " nn.init.zeros_(m.bias)\n",
451 | "net.apply(init_normal)\n",
452 | "net[0].weight.data[0], net[0].bias.data[0]"
453 | ]
454 | },
455 | {
456 | "cell_type": "code",
457 | "execution_count": 11,
458 | "id": "49ee306c",
459 | "metadata": {
460 | "execution": {
461 | "iopub.execute_input": "2023-08-18T07:01:11.038321Z",
462 | "iopub.status.busy": "2023-08-18T07:01:11.037607Z",
463 | "iopub.status.idle": "2023-08-18T07:01:11.049009Z",
464 | "shell.execute_reply": "2023-08-18T07:01:11.047793Z"
465 | },
466 | "origin_pos": 52,
467 | "tab": [
468 | "pytorch"
469 | ]
470 | },
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "(tensor([1., 1., 1., 1.]), tensor(0.))"
476 | ]
477 | },
478 | "execution_count": 11,
479 | "metadata": {},
480 | "output_type": "execute_result"
481 | }
482 | ],
483 | "source": [
484 | "def init_constant(m):\n",
485 | " if type(m) == nn.Linear:\n",
486 | " nn.init.constant_(m.weight, 1)\n",
487 | " nn.init.zeros_(m.bias)\n",
488 | "net.apply(init_constant)\n",
489 | "net[0].weight.data[0], net[0].bias.data[0]"
490 | ]
491 | },
492 | {
493 | "cell_type": "markdown",
494 | "id": "478059aa",
495 | "metadata": {
496 | "slideshow": {
497 | "slide_type": "slide"
498 | }
499 | },
500 | "source": [
501 | "对某些块应用不同的初始化方法"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": 12,
507 | "id": "1a90ffaa",
508 | "metadata": {
509 | "execution": {
510 | "iopub.execute_input": "2023-08-18T07:01:11.054335Z",
511 | "iopub.status.busy": "2023-08-18T07:01:11.053550Z",
512 | "iopub.status.idle": "2023-08-18T07:01:11.063215Z",
513 | "shell.execute_reply": "2023-08-18T07:01:11.062244Z"
514 | },
515 | "origin_pos": 57,
516 | "tab": [
517 | "pytorch"
518 | ]
519 | },
520 | "outputs": [
521 | {
522 | "name": "stdout",
523 | "output_type": "stream",
524 | "text": [
525 | "tensor([ 0.5236, 0.0516, -0.3236, 0.3794])\n",
526 | "tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])\n"
527 | ]
528 | }
529 | ],
530 | "source": [
531 | "def init_xavier(m):\n",
532 | " if type(m) == nn.Linear:\n",
533 | " nn.init.xavier_uniform_(m.weight)\n",
534 | "def init_42(m):\n",
535 | " if type(m) == nn.Linear:\n",
536 | " nn.init.constant_(m.weight, 42)\n",
537 | "\n",
538 | "net[0].apply(init_xavier)\n",
539 | "net[2].apply(init_42)\n",
540 | "print(net[0].weight.data[0])\n",
541 | "print(net[2].weight.data)"
542 | ]
543 | },
544 | {
545 | "cell_type": "markdown",
546 | "id": "0a70ae16",
547 | "metadata": {
548 | "slideshow": {
549 | "slide_type": "slide"
550 | }
551 | },
552 | "source": [
553 | "自定义初始化"
554 | ]
555 | },
556 | {
557 | "cell_type": "code",
558 | "execution_count": 13,
559 | "id": "9166f6e3",
560 | "metadata": {
561 | "execution": {
562 | "iopub.execute_input": "2023-08-18T07:01:11.068164Z",
563 | "iopub.status.busy": "2023-08-18T07:01:11.067460Z",
564 | "iopub.status.idle": "2023-08-18T07:01:11.079228Z",
565 | "shell.execute_reply": "2023-08-18T07:01:11.078069Z"
566 | },
567 | "origin_pos": 66,
568 | "tab": [
569 | "pytorch"
570 | ]
571 | },
572 | "outputs": [
573 | {
574 | "name": "stdout",
575 | "output_type": "stream",
576 | "text": [
577 | "Init weight torch.Size([8, 4])\n",
578 | "Init weight torch.Size([1, 8])\n"
579 | ]
580 | },
581 | {
582 | "data": {
583 | "text/plain": [
584 | "tensor([[5.4079, 9.3334, 5.0616, 8.3095],\n",
585 | " [0.0000, 7.2788, -0.0000, -0.0000]], grad_fn=)"
586 | ]
587 | },
588 | "execution_count": 13,
589 | "metadata": {},
590 | "output_type": "execute_result"
591 | }
592 | ],
593 | "source": [
594 | "def my_init(m):\n",
595 | " if type(m) == nn.Linear:\n",
596 | " print(\"Init\", *[(name, param.shape)\n",
597 | " for name, param in m.named_parameters()][0])\n",
598 | " nn.init.uniform_(m.weight, -10, 10)\n",
599 | " m.weight.data *= m.weight.data.abs() >= 5\n",
600 | "\n",
601 | "net.apply(my_init)\n",
602 | "net[0].weight[:2]"
603 | ]
604 | },
605 | {
606 | "cell_type": "code",
607 | "execution_count": 14,
608 | "id": "5b9af1f8",
609 | "metadata": {
610 | "execution": {
611 | "iopub.execute_input": "2023-08-18T07:01:11.084158Z",
612 | "iopub.status.busy": "2023-08-18T07:01:11.083416Z",
613 | "iopub.status.idle": "2023-08-18T07:01:11.092672Z",
614 | "shell.execute_reply": "2023-08-18T07:01:11.091537Z"
615 | },
616 | "origin_pos": 71,
617 | "tab": [
618 | "pytorch"
619 | ]
620 | },
621 | "outputs": [
622 | {
623 | "data": {
624 | "text/plain": [
625 | "tensor([42.0000, 10.3334, 6.0616, 9.3095])"
626 | ]
627 | },
628 | "execution_count": 14,
629 | "metadata": {},
630 | "output_type": "execute_result"
631 | }
632 | ],
633 | "source": [
634 | "net[0].weight.data[:] += 1\n",
635 | "net[0].weight.data[0, 0] = 42\n",
636 | "net[0].weight.data[0]"
637 | ]
638 | },
639 | {
640 | "cell_type": "markdown",
641 | "id": "9031168e",
642 | "metadata": {
643 | "slideshow": {
644 | "slide_type": "slide"
645 | }
646 | },
647 | "source": [
648 | "参数绑定"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": 15,
654 | "id": "69660fa7",
655 | "metadata": {
656 | "execution": {
657 | "iopub.execute_input": "2023-08-18T07:01:11.097767Z",
658 | "iopub.status.busy": "2023-08-18T07:01:11.096948Z",
659 | "iopub.status.idle": "2023-08-18T07:01:11.108904Z",
660 | "shell.execute_reply": "2023-08-18T07:01:11.107763Z"
661 | },
662 | "origin_pos": 77,
663 | "tab": [
664 | "pytorch"
665 | ]
666 | },
667 | "outputs": [
668 | {
669 | "name": "stdout",
670 | "output_type": "stream",
671 | "text": [
672 | "tensor([True, True, True, True, True, True, True, True])\n",
673 | "tensor([True, True, True, True, True, True, True, True])\n"
674 | ]
675 | }
676 | ],
677 | "source": [
678 | "shared = nn.Linear(8, 8)\n",
679 | "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n",
680 | " shared, nn.ReLU(),\n",
681 | " shared, nn.ReLU(),\n",
682 | " nn.Linear(8, 1))\n",
683 | "net(X)\n",
684 | "print(net[2].weight.data[0] == net[4].weight.data[0])\n",
685 | "net[2].weight.data[0, 0] = 100\n",
686 | "print(net[2].weight.data[0] == net[4].weight.data[0])"
687 | ]
688 | }
689 | ],
690 | "metadata": {
691 | "celltoolbar": "Slideshow",
692 | "language_info": {
693 | "name": "python"
694 | },
695 | "required_libs": [],
696 | "rise": {
697 | "autolaunch": true,
698 | "enable_chalkboard": true,
699 | "overlay": "
",
700 | "scroll": true
701 | }
702 | },
703 | "nbformat": 4,
704 | "nbformat_minor": 5
705 | }
--------------------------------------------------------------------------------
/chapter_deep-learning-computation/read-write.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "3c764167",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 读写文件\n",
13 | "\n",
14 | "加载和保存张量"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "id": "1ab53461",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T06:56:43.733002Z",
24 | "iopub.status.busy": "2023-08-18T06:56:43.732347Z",
25 | "iopub.status.idle": "2023-08-18T06:56:43.741208Z",
26 | "shell.execute_reply": "2023-08-18T06:56:43.740416Z"
27 | },
28 | "origin_pos": 7,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "tensor([0, 1, 2, 3])"
38 | ]
39 | },
40 | "execution_count": 2,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "import torch\n",
47 | "from torch import nn\n",
48 | "from torch.nn import functional as F\n",
49 | "\n",
50 | "x = torch.arange(4)\n",
51 | "torch.save(x, 'x-file')\n",
52 | "\n",
53 | "x2 = torch.load('x-file')\n",
54 | "x2"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "id": "6c8adb8b",
60 | "metadata": {
61 | "slideshow": {
62 | "slide_type": "slide"
63 | }
64 | },
65 | "source": [
66 | "存储一个张量列表,然后把它们读回内存"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 3,
72 | "id": "81027fe1",
73 | "metadata": {
74 | "execution": {
75 | "iopub.execute_input": "2023-08-18T06:56:43.744676Z",
76 | "iopub.status.busy": "2023-08-18T06:56:43.744140Z",
77 | "iopub.status.idle": "2023-08-18T06:56:43.751376Z",
78 | "shell.execute_reply": "2023-08-18T06:56:43.750630Z"
79 | },
80 | "origin_pos": 12,
81 | "tab": [
82 | "pytorch"
83 | ]
84 | },
85 | "outputs": [
86 | {
87 | "data": {
88 | "text/plain": [
89 | "(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))"
90 | ]
91 | },
92 | "execution_count": 3,
93 | "metadata": {},
94 | "output_type": "execute_result"
95 | }
96 | ],
97 | "source": [
98 | "y = torch.zeros(4)\n",
99 | "torch.save([x, y],'x-files')\n",
100 | "x2, y2 = torch.load('x-files')\n",
101 | "(x2, y2)"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "id": "47678e7f",
107 | "metadata": {
108 | "slideshow": {
109 | "slide_type": "-"
110 | }
111 | },
112 | "source": [
113 | "写入或读取从字符串映射到张量的字典"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 4,
119 | "id": "fde1cb33",
120 | "metadata": {
121 | "execution": {
122 | "iopub.execute_input": "2023-08-18T06:56:43.754777Z",
123 | "iopub.status.busy": "2023-08-18T06:56:43.754313Z",
124 | "iopub.status.idle": "2023-08-18T06:56:43.761150Z",
125 | "shell.execute_reply": "2023-08-18T06:56:43.760369Z"
126 | },
127 | "origin_pos": 17,
128 | "tab": [
129 | "pytorch"
130 | ]
131 | },
132 | "outputs": [
133 | {
134 | "data": {
135 | "text/plain": [
136 | "{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}"
137 | ]
138 | },
139 | "execution_count": 4,
140 | "metadata": {},
141 | "output_type": "execute_result"
142 | }
143 | ],
144 | "source": [
145 | "mydict = {'x': x, 'y': y}\n",
146 | "torch.save(mydict, 'mydict')\n",
147 | "mydict2 = torch.load('mydict')\n",
148 | "mydict2"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "id": "9cc9655d",
154 | "metadata": {
155 | "slideshow": {
156 | "slide_type": "slide"
157 | }
158 | },
159 | "source": [
160 | "加载和保存模型参数"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 5,
166 | "id": "2672b5c2",
167 | "metadata": {
168 | "execution": {
169 | "iopub.execute_input": "2023-08-18T06:56:43.764609Z",
170 | "iopub.status.busy": "2023-08-18T06:56:43.764090Z",
171 | "iopub.status.idle": "2023-08-18T06:56:43.773070Z",
172 | "shell.execute_reply": "2023-08-18T06:56:43.772277Z"
173 | },
174 | "origin_pos": 22,
175 | "tab": [
176 | "pytorch"
177 | ]
178 | },
179 | "outputs": [],
180 | "source": [
181 | "class MLP(nn.Module):\n",
182 | " def __init__(self):\n",
183 | " super().__init__()\n",
184 | " self.hidden = nn.Linear(20, 256)\n",
185 | " self.output = nn.Linear(256, 10)\n",
186 | "\n",
187 | " def forward(self, x):\n",
188 | " return self.output(F.relu(self.hidden(x)))\n",
189 | "\n",
190 | "net = MLP()\n",
191 | "X = torch.randn(size=(2, 20))\n",
192 | "Y = net(X)"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "id": "72d77012",
198 | "metadata": {
199 | "slideshow": {
200 | "slide_type": "slide"
201 | }
202 | },
203 | "source": [
204 | "将模型的参数存储在一个叫做“mlp.params”的文件中"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 6,
210 | "id": "a53c1315",
211 | "metadata": {
212 | "execution": {
213 | "iopub.execute_input": "2023-08-18T06:56:43.776452Z",
214 | "iopub.status.busy": "2023-08-18T06:56:43.775942Z",
215 | "iopub.status.idle": "2023-08-18T06:56:43.780387Z",
216 | "shell.execute_reply": "2023-08-18T06:56:43.779636Z"
217 | },
218 | "origin_pos": 27,
219 | "tab": [
220 | "pytorch"
221 | ]
222 | },
223 | "outputs": [],
224 | "source": [
225 | "torch.save(net.state_dict(), 'mlp.params')"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "id": "647cbcef",
231 | "metadata": {
232 | "slideshow": {
233 | "slide_type": "slide"
234 | }
235 | },
236 | "source": [
237 | "实例化了原始多层感知机模型的一个备份。\n",
238 | "直接读取文件中存储的参数"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 7,
244 | "id": "da5e1b3f",
245 | "metadata": {
246 | "execution": {
247 | "iopub.execute_input": "2023-08-18T06:56:43.783850Z",
248 | "iopub.status.busy": "2023-08-18T06:56:43.783240Z",
249 | "iopub.status.idle": "2023-08-18T06:56:43.789905Z",
250 | "shell.execute_reply": "2023-08-18T06:56:43.789164Z"
251 | },
252 | "origin_pos": 32,
253 | "tab": [
254 | "pytorch"
255 | ]
256 | },
257 | "outputs": [
258 | {
259 | "data": {
260 | "text/plain": [
261 | "MLP(\n",
262 | " (hidden): Linear(in_features=20, out_features=256, bias=True)\n",
263 | " (output): Linear(in_features=256, out_features=10, bias=True)\n",
264 | ")"
265 | ]
266 | },
267 | "execution_count": 7,
268 | "metadata": {},
269 | "output_type": "execute_result"
270 | }
271 | ],
272 | "source": [
273 | "clone = MLP()\n",
274 | "clone.load_state_dict(torch.load('mlp.params'))\n",
275 | "clone.eval()"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 8,
281 | "id": "a25ba1f1",
282 | "metadata": {
283 | "execution": {
284 | "iopub.execute_input": "2023-08-18T06:56:43.793400Z",
285 | "iopub.status.busy": "2023-08-18T06:56:43.792788Z",
286 | "iopub.status.idle": "2023-08-18T06:56:43.798329Z",
287 | "shell.execute_reply": "2023-08-18T06:56:43.797576Z"
288 | },
289 | "origin_pos": 37,
290 | "tab": [
291 | "pytorch"
292 | ]
293 | },
294 | "outputs": [
295 | {
296 | "data": {
297 | "text/plain": [
298 | "tensor([[True, True, True, True, True, True, True, True, True, True],\n",
299 | " [True, True, True, True, True, True, True, True, True, True]])"
300 | ]
301 | },
302 | "execution_count": 8,
303 | "metadata": {},
304 | "output_type": "execute_result"
305 | }
306 | ],
307 | "source": [
308 | "Y_clone = clone(X)\n",
309 | "Y_clone == Y"
310 | ]
311 | }
312 | ],
313 | "metadata": {
314 | "celltoolbar": "Slideshow",
315 | "language_info": {
316 | "name": "python"
317 | },
318 | "required_libs": [],
319 | "rise": {
320 | "autolaunch": true,
321 | "enable_chalkboard": true,
322 | "overlay": "
",
323 | "scroll": true
324 | }
325 | },
326 | "nbformat": 4,
327 | "nbformat_minor": 5
328 | }
--------------------------------------------------------------------------------
/chapter_deep-learning-computation/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_deep-learning-computation/use-gpu.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "b867430b",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# GPU\n",
13 | "\n",
14 | "查看显卡信息"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "369d9baa",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T06:58:06.499888Z",
24 | "iopub.status.busy": "2023-08-18T06:58:06.499324Z",
25 | "iopub.status.idle": "2023-08-18T06:58:06.859541Z",
26 | "shell.execute_reply": "2023-08-18T06:58:06.858210Z"
27 | },
28 | "origin_pos": 1,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "name": "stdout",
36 | "output_type": "stream",
37 | "text": [
38 | "Fri Aug 18 06:58:06 2023 \r\n",
39 | "+-----------------------------------------------------------------------------+\r\n",
40 | "| NVIDIA-SMI 470.161.03 Driver Version: 470.161.03 CUDA Version: 11.7 |\r\n",
41 | "|-------------------------------+----------------------+----------------------+\r\n",
42 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n",
43 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n",
44 | "| | | MIG M. |\r\n",
45 | "|===============================+======================+======================|\r\n",
46 | "| 0 Tesla V100-SXM2... Off | 00000000:00:1B.0 Off | 0 |\r\n",
47 | "| N/A 41C P0 42W / 300W | 0MiB / 16160MiB | 0% Default |\r\n",
48 | "| | | N/A |\r\n",
49 | "+-------------------------------+----------------------+----------------------+\r\n"
50 | ]
51 | },
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "| 1 Tesla V100-SXM2... Off | 00000000:00:1C.0 Off | 0 |\r\n",
57 | "| N/A 44C P0 113W / 300W | 1456MiB / 16160MiB | 53% Default |\r\n",
58 | "| | | N/A |\r\n",
59 | "+-------------------------------+----------------------+----------------------+\r\n",
60 | "| 2 Tesla V100-SXM2... Off | 00000000:00:1D.0 Off | 0 |\r\n",
61 | "| N/A 43C P0 120W / 300W | 1358MiB / 16160MiB | 55% Default |\r\n",
62 | "| | | N/A |\r\n",
63 | "+-------------------------------+----------------------+----------------------+\r\n",
64 | "| 3 Tesla V100-SXM2... Off | 00000000:00:1E.0 Off | 0 |\r\n",
65 | "| N/A 42C P0 47W / 300W | 0MiB / 16160MiB | 0% Default |\r\n",
66 | "| | | N/A |\r\n",
67 | "+-------------------------------+----------------------+----------------------+\r\n",
68 | " \r\n",
69 | "+-----------------------------------------------------------------------------+\r\n",
70 | "| Processes: |\r\n",
71 | "| GPU GI CI PID Type Process name GPU Memory |\r\n",
72 | "| ID ID Usage |\r\n",
73 | "|=============================================================================|\r\n",
74 | "+-----------------------------------------------------------------------------+\r\n"
75 | ]
76 | }
77 | ],
78 | "source": [
79 | "!nvidia-smi"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "id": "19db9813",
85 | "metadata": {
86 | "slideshow": {
87 | "slide_type": "slide"
88 | }
89 | },
90 | "source": [
91 | "计算设备"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 2,
97 | "id": "9f69ad46",
98 | "metadata": {
99 | "execution": {
100 | "iopub.execute_input": "2023-08-18T06:58:06.865430Z",
101 | "iopub.status.busy": "2023-08-18T06:58:06.864979Z",
102 | "iopub.status.idle": "2023-08-18T06:58:07.970615Z",
103 | "shell.execute_reply": "2023-08-18T06:58:07.969801Z"
104 | },
105 | "origin_pos": 10,
106 | "tab": [
107 | "pytorch"
108 | ]
109 | },
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/plain": [
114 | "(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))"
115 | ]
116 | },
117 | "execution_count": 2,
118 | "metadata": {},
119 | "output_type": "execute_result"
120 | }
121 | ],
122 | "source": [
123 | "import torch\n",
124 | "from torch import nn\n",
125 | "\n",
126 | "torch.device('cpu'), torch.device('cuda'), torch.device('cuda:1')"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "id": "1e0f86ba",
132 | "metadata": {
133 | "slideshow": {
134 | "slide_type": "-"
135 | }
136 | },
137 | "source": [
138 | "查询可用gpu的数量"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 3,
144 | "id": "c29151b0",
145 | "metadata": {
146 | "execution": {
147 | "iopub.execute_input": "2023-08-18T06:58:07.974568Z",
148 | "iopub.status.busy": "2023-08-18T06:58:07.973917Z",
149 | "iopub.status.idle": "2023-08-18T06:58:07.979097Z",
150 | "shell.execute_reply": "2023-08-18T06:58:07.978337Z"
151 | },
152 | "origin_pos": 15,
153 | "tab": [
154 | "pytorch"
155 | ]
156 | },
157 | "outputs": [
158 | {
159 | "data": {
160 | "text/plain": [
161 | "2"
162 | ]
163 | },
164 | "execution_count": 3,
165 | "metadata": {},
166 | "output_type": "execute_result"
167 | }
168 | ],
169 | "source": [
170 | "torch.cuda.device_count()"
171 | ]
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "id": "add6b576",
176 | "metadata": {
177 | "slideshow": {
178 | "slide_type": "slide"
179 | }
180 | },
181 | "source": [
182 | "这两个函数允许我们在不存在所需所有GPU的情况下运行代码"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 4,
188 | "id": "cda0ab76",
189 | "metadata": {
190 | "execution": {
191 | "iopub.execute_input": "2023-08-18T06:58:07.983261Z",
192 | "iopub.status.busy": "2023-08-18T06:58:07.982604Z",
193 | "iopub.status.idle": "2023-08-18T06:58:07.990309Z",
194 | "shell.execute_reply": "2023-08-18T06:58:07.989541Z"
195 | },
196 | "origin_pos": 20,
197 | "tab": [
198 | "pytorch"
199 | ]
200 | },
201 | "outputs": [
202 | {
203 | "data": {
204 | "text/plain": [
205 | "(device(type='cuda', index=0),\n",
206 | " device(type='cpu'),\n",
207 | " [device(type='cuda', index=0), device(type='cuda', index=1)])"
208 | ]
209 | },
210 | "execution_count": 4,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "def try_gpu(i=0): \n",
217 | " \"\"\"如果存在,则返回gpu(i),否则返回cpu()\"\"\"\n",
218 | " if torch.cuda.device_count() >= i + 1:\n",
219 | " return torch.device(f'cuda:{i}')\n",
220 | " return torch.device('cpu')\n",
221 | "\n",
222 | "def try_all_gpus(): \n",
223 | " \"\"\"返回所有可用的GPU,如果没有GPU,则返回[cpu(),]\"\"\"\n",
224 | " devices = [torch.device(f'cuda:{i}')\n",
225 | " for i in range(torch.cuda.device_count())]\n",
226 | " return devices if devices else [torch.device('cpu')]\n",
227 | "\n",
228 | "try_gpu(), try_gpu(10), try_all_gpus()"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "id": "013f4e4b",
234 | "metadata": {
235 | "slideshow": {
236 | "slide_type": "slide"
237 | }
238 | },
239 | "source": [
240 | "查询张量所在的设备"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 5,
246 | "id": "f6ab0f26",
247 | "metadata": {
248 | "execution": {
249 | "iopub.execute_input": "2023-08-18T06:58:07.994741Z",
250 | "iopub.status.busy": "2023-08-18T06:58:07.994126Z",
251 | "iopub.status.idle": "2023-08-18T06:58:07.999439Z",
252 | "shell.execute_reply": "2023-08-18T06:58:07.998673Z"
253 | },
254 | "origin_pos": 25,
255 | "tab": [
256 | "pytorch"
257 | ]
258 | },
259 | "outputs": [
260 | {
261 | "data": {
262 | "text/plain": [
263 | "device(type='cpu')"
264 | ]
265 | },
266 | "execution_count": 5,
267 | "metadata": {},
268 | "output_type": "execute_result"
269 | }
270 | ],
271 | "source": [
272 | "x = torch.tensor([1, 2, 3])\n",
273 | "x.device"
274 | ]
275 | },
276 | {
277 | "cell_type": "markdown",
278 | "id": "9404d10b",
279 | "metadata": {
280 | "slideshow": {
281 | "slide_type": "slide"
282 | }
283 | },
284 | "source": [
285 | "存储在GPU上"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 6,
291 | "id": "a67dbf2f",
292 | "metadata": {
293 | "execution": {
294 | "iopub.execute_input": "2023-08-18T06:58:08.004162Z",
295 | "iopub.status.busy": "2023-08-18T06:58:08.003541Z",
296 | "iopub.status.idle": "2023-08-18T06:58:09.277879Z",
297 | "shell.execute_reply": "2023-08-18T06:58:09.277008Z"
298 | },
299 | "origin_pos": 30,
300 | "tab": [
301 | "pytorch"
302 | ]
303 | },
304 | "outputs": [
305 | {
306 | "data": {
307 | "text/plain": [
308 | "tensor([[1., 1., 1.],\n",
309 | " [1., 1., 1.]], device='cuda:0')"
310 | ]
311 | },
312 | "execution_count": 6,
313 | "metadata": {},
314 | "output_type": "execute_result"
315 | }
316 | ],
317 | "source": [
318 | "X = torch.ones(2, 3, device=try_gpu())\n",
319 | "X"
320 | ]
321 | },
322 | {
323 | "cell_type": "markdown",
324 | "id": "5bf59dc4",
325 | "metadata": {
326 | "slideshow": {
327 | "slide_type": "-"
328 | }
329 | },
330 | "source": [
331 | "第二个GPU上创建一个随机张量"
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": 7,
337 | "id": "7c0d4a84",
338 | "metadata": {
339 | "execution": {
340 | "iopub.execute_input": "2023-08-18T06:58:09.282814Z",
341 | "iopub.status.busy": "2023-08-18T06:58:09.282230Z",
342 | "iopub.status.idle": "2023-08-18T06:58:10.279046Z",
343 | "shell.execute_reply": "2023-08-18T06:58:10.278227Z"
344 | },
345 | "origin_pos": 35,
346 | "tab": [
347 | "pytorch"
348 | ]
349 | },
350 | "outputs": [
351 | {
352 | "data": {
353 | "text/plain": [
354 | "tensor([[0.4860, 0.1285, 0.0440],\n",
355 | " [0.9743, 0.4159, 0.9979]], device='cuda:1')"
356 | ]
357 | },
358 | "execution_count": 7,
359 | "metadata": {},
360 | "output_type": "execute_result"
361 | }
362 | ],
363 | "source": [
364 | "Y = torch.rand(2, 3, device=try_gpu(1))\n",
365 | "Y"
366 | ]
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "id": "397b13bf",
371 | "metadata": {
372 | "slideshow": {
373 | "slide_type": "slide"
374 | }
375 | },
376 | "source": [
377 | "要计算`X + Y`,我们需要决定在哪里执行这个操作"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 8,
383 | "id": "9e700cd2",
384 | "metadata": {
385 | "execution": {
386 | "iopub.execute_input": "2023-08-18T06:58:10.284097Z",
387 | "iopub.status.busy": "2023-08-18T06:58:10.283529Z",
388 | "iopub.status.idle": "2023-08-18T06:58:10.290795Z",
389 | "shell.execute_reply": "2023-08-18T06:58:10.290007Z"
390 | },
391 | "origin_pos": 40,
392 | "tab": [
393 | "pytorch"
394 | ]
395 | },
396 | "outputs": [
397 | {
398 | "name": "stdout",
399 | "output_type": "stream",
400 | "text": [
401 | "tensor([[1., 1., 1.],\n",
402 | " [1., 1., 1.]], device='cuda:0')\n",
403 | "tensor([[1., 1., 1.],\n",
404 | " [1., 1., 1.]], device='cuda:1')\n"
405 | ]
406 | }
407 | ],
408 | "source": [
409 | "Z = X.cuda(1)\n",
410 | "print(X)\n",
411 | "print(Z)"
412 | ]
413 | },
414 | {
415 | "cell_type": "markdown",
416 | "id": "a8407698",
417 | "metadata": {
418 | "slideshow": {
419 | "slide_type": "slide"
420 | }
421 | },
422 | "source": [
423 | "现在数据在同一个GPU上(`Z`和`Y`都在),我们可以将它们相加"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 9,
429 | "id": "b2f04f35",
430 | "metadata": {
431 | "execution": {
432 | "iopub.execute_input": "2023-08-18T06:58:10.295377Z",
433 | "iopub.status.busy": "2023-08-18T06:58:10.294845Z",
434 | "iopub.status.idle": "2023-08-18T06:58:10.301122Z",
435 | "shell.execute_reply": "2023-08-18T06:58:10.300297Z"
436 | },
437 | "origin_pos": 43,
438 | "tab": [
439 | "pytorch"
440 | ]
441 | },
442 | "outputs": [
443 | {
444 | "data": {
445 | "text/plain": [
446 | "tensor([[1.4860, 1.1285, 1.0440],\n",
447 | " [1.9743, 1.4159, 1.9979]], device='cuda:1')"
448 | ]
449 | },
450 | "execution_count": 9,
451 | "metadata": {},
452 | "output_type": "execute_result"
453 | }
454 | ],
455 | "source": [
456 | "Y + Z"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": 10,
462 | "id": "d6b95aa1",
463 | "metadata": {
464 | "execution": {
465 | "iopub.execute_input": "2023-08-18T06:58:10.305143Z",
466 | "iopub.status.busy": "2023-08-18T06:58:10.304592Z",
467 | "iopub.status.idle": "2023-08-18T06:58:10.309707Z",
468 | "shell.execute_reply": "2023-08-18T06:58:10.308894Z"
469 | },
470 | "origin_pos": 48,
471 | "tab": [
472 | "pytorch"
473 | ]
474 | },
475 | "outputs": [
476 | {
477 | "data": {
478 | "text/plain": [
479 | "True"
480 | ]
481 | },
482 | "execution_count": 10,
483 | "metadata": {},
484 | "output_type": "execute_result"
485 | }
486 | ],
487 | "source": [
488 | "Z.cuda(1) is Z"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "id": "86e67482",
494 | "metadata": {
495 | "slideshow": {
496 | "slide_type": "slide"
497 | }
498 | },
499 | "source": [
500 | "神经网络与GPU"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "execution_count": 12,
506 | "id": "955f7f67",
507 | "metadata": {
508 | "execution": {
509 | "iopub.execute_input": "2023-08-18T06:58:10.340989Z",
510 | "iopub.status.busy": "2023-08-18T06:58:10.340312Z",
511 | "iopub.status.idle": "2023-08-18T06:58:10.930969Z",
512 | "shell.execute_reply": "2023-08-18T06:58:10.930143Z"
513 | },
514 | "origin_pos": 56,
515 | "tab": [
516 | "pytorch"
517 | ]
518 | },
519 | "outputs": [
520 | {
521 | "data": {
522 | "text/plain": [
523 | "tensor([[-0.4275],\n",
524 | " [-0.4275]], device='cuda:0', grad_fn=)"
525 | ]
526 | },
527 | "execution_count": 12,
528 | "metadata": {},
529 | "output_type": "execute_result"
530 | }
531 | ],
532 | "source": [
533 | "net = nn.Sequential(nn.Linear(3, 1))\n",
534 | "net = net.to(device=try_gpu())\n",
535 | "\n",
536 | "net(X)"
537 | ]
538 | },
539 | {
540 | "cell_type": "markdown",
541 | "id": "b1ae89a4",
542 | "metadata": {
543 | "slideshow": {
544 | "slide_type": "-"
545 | }
546 | },
547 | "source": [
548 | "确认模型参数存储在同一个GPU上"
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "execution_count": 13,
554 | "id": "bd727993",
555 | "metadata": {
556 | "execution": {
557 | "iopub.execute_input": "2023-08-18T06:58:10.935087Z",
558 | "iopub.status.busy": "2023-08-18T06:58:10.934497Z",
559 | "iopub.status.idle": "2023-08-18T06:58:10.939740Z",
560 | "shell.execute_reply": "2023-08-18T06:58:10.938974Z"
561 | },
562 | "origin_pos": 59,
563 | "tab": [
564 | "pytorch"
565 | ]
566 | },
567 | "outputs": [
568 | {
569 | "data": {
570 | "text/plain": [
571 | "device(type='cuda', index=0)"
572 | ]
573 | },
574 | "execution_count": 13,
575 | "metadata": {},
576 | "output_type": "execute_result"
577 | }
578 | ],
579 | "source": [
580 | "net[0].weight.data.device"
581 | ]
582 | }
583 | ],
584 | "metadata": {
585 | "celltoolbar": "Slideshow",
586 | "language_info": {
587 | "name": "python"
588 | },
589 | "required_libs": [],
590 | "rise": {
591 | "autolaunch": true,
592 | "enable_chalkboard": true,
593 | "overlay": "
",
594 | "scroll": true
595 | }
596 | },
597 | "nbformat": 4,
598 | "nbformat_minor": 5
599 | }
--------------------------------------------------------------------------------
/chapter_linear-networks/linear-regression-concise.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "a5b9ec6e",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "slide"
9 | }
10 | },
11 | "source": [
12 | "# 线性回归的简洁实现\n",
13 | "\n",
14 | "通过使用深度学习框架来简洁地实现\n",
15 | "线性回归模型\n",
16 | "生成数据集"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "id": "c26b741f",
23 | "metadata": {
24 | "execution": {
25 | "iopub.execute_input": "2023-08-18T07:01:54.616404Z",
26 | "iopub.status.busy": "2023-08-18T07:01:54.615685Z",
27 | "iopub.status.idle": "2023-08-18T07:01:54.643472Z",
28 | "shell.execute_reply": "2023-08-18T07:01:54.642512Z"
29 | },
30 | "origin_pos": 5,
31 | "tab": [
32 | "pytorch"
33 | ]
34 | },
35 | "outputs": [],
36 | "source": [
37 | "import numpy as np\n",
38 | "import torch\n",
39 | "from torch.utils import data\n",
40 | "from d2l import torch as d2l\n",
41 | "\n",
42 | "true_w = torch.tensor([2, -3.4])\n",
43 | "true_b = 4.2\n",
44 | "features, labels = d2l.synthetic_data(true_w, true_b, 1000)"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "25eda004",
50 | "metadata": {
51 | "slideshow": {
52 | "slide_type": "slide"
53 | }
54 | },
55 | "source": [
56 | "调用框架中现有的API来读取数据"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 5,
62 | "id": "7c6919b8",
63 | "metadata": {
64 | "execution": {
65 | "iopub.execute_input": "2023-08-18T07:01:54.665574Z",
66 | "iopub.status.busy": "2023-08-18T07:01:54.664999Z",
67 | "iopub.status.idle": "2023-08-18T07:01:54.673523Z",
68 | "shell.execute_reply": "2023-08-18T07:01:54.672688Z"
69 | },
70 | "origin_pos": 13,
71 | "tab": [
72 | "pytorch"
73 | ]
74 | },
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "[tensor([[-1.3116, -0.3062],\n",
80 | " [-1.5653, 0.4830],\n",
81 | " [-0.8893, -0.9466],\n",
82 | " [-1.2417, 1.6891],\n",
83 | " [-0.7148, 0.1376],\n",
84 | " [-0.2162, -0.6122],\n",
85 | " [ 2.4048, -0.3211],\n",
86 | " [-0.1516, 0.4997],\n",
87 | " [ 1.5298, -0.2291],\n",
88 | " [ 1.3895, 1.2602]]),\n",
89 | " tensor([[ 2.6073],\n",
90 | " [-0.5787],\n",
91 | " [ 5.6339],\n",
92 | " [-4.0211],\n",
93 | " [ 2.3117],\n",
94 | " [ 5.8492],\n",
95 | " [10.0926],\n",
96 | " [ 2.1932],\n",
97 | " [ 8.0441],\n",
98 | " [ 2.6943]])]"
99 | ]
100 | },
101 | "execution_count": 5,
102 | "metadata": {},
103 | "output_type": "execute_result"
104 | }
105 | ],
106 | "source": [
107 | "def load_array(data_arrays, batch_size, is_train=True): \n",
108 | " \"\"\"构造一个PyTorch数据迭代器\"\"\"\n",
109 | " dataset = data.TensorDataset(*data_arrays)\n",
110 | " return data.DataLoader(dataset, batch_size, shuffle=is_train)\n",
111 | "\n",
112 | "batch_size = 10\n",
113 | "data_iter = load_array((features, labels), batch_size)\n",
114 | "\n",
115 | "next(iter(data_iter))"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "id": "b4c6012b",
121 | "metadata": {
122 | "slideshow": {
123 | "slide_type": "slide"
124 | }
125 | },
126 | "source": [
127 | "使用框架的预定义好的层"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 6,
133 | "id": "85c54a1a",
134 | "metadata": {
135 | "execution": {
136 | "iopub.execute_input": "2023-08-18T07:01:54.677177Z",
137 | "iopub.status.busy": "2023-08-18T07:01:54.676580Z",
138 | "iopub.status.idle": "2023-08-18T07:01:54.680914Z",
139 | "shell.execute_reply": "2023-08-18T07:01:54.680130Z"
140 | },
141 | "origin_pos": 20,
142 | "tab": [
143 | "pytorch"
144 | ]
145 | },
146 | "outputs": [],
147 | "source": [
148 | "from torch import nn\n",
149 | "\n",
150 | "net = nn.Sequential(nn.Linear(2, 1))"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "id": "9bf96a4d",
156 | "metadata": {
157 | "slideshow": {
158 | "slide_type": "-"
159 | }
160 | },
161 | "source": [
162 | "初始化模型参数"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 7,
168 | "id": "31716c55",
169 | "metadata": {
170 | "execution": {
171 | "iopub.execute_input": "2023-08-18T07:01:54.684561Z",
172 | "iopub.status.busy": "2023-08-18T07:01:54.684036Z",
173 | "iopub.status.idle": "2023-08-18T07:01:54.690673Z",
174 | "shell.execute_reply": "2023-08-18T07:01:54.689754Z"
175 | },
176 | "origin_pos": 29,
177 | "tab": [
178 | "pytorch"
179 | ]
180 | },
181 | "outputs": [
182 | {
183 | "data": {
184 | "text/plain": [
185 | "tensor([0.])"
186 | ]
187 | },
188 | "execution_count": 7,
189 | "metadata": {},
190 | "output_type": "execute_result"
191 | }
192 | ],
193 | "source": [
194 | "net[0].weight.data.normal_(0, 0.01)\n",
195 | "net[0].bias.data.fill_(0)"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "id": "11c01887",
201 | "metadata": {
202 | "slideshow": {
203 | "slide_type": "slide"
204 | }
205 | },
206 | "source": [
207 | "计算均方误差使用的是`MSELoss`类,也称为平方$L_2$范数"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 8,
213 | "id": "19a417ac",
214 | "metadata": {
215 | "execution": {
216 | "iopub.execute_input": "2023-08-18T07:01:54.695575Z",
217 | "iopub.status.busy": "2023-08-18T07:01:54.694922Z",
218 | "iopub.status.idle": "2023-08-18T07:01:54.699373Z",
219 | "shell.execute_reply": "2023-08-18T07:01:54.698348Z"
220 | },
221 | "origin_pos": 41,
222 | "tab": [
223 | "pytorch"
224 | ]
225 | },
226 | "outputs": [],
227 | "source": [
228 | "loss = nn.MSELoss()"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "id": "68315d47",
234 | "metadata": {
235 | "slideshow": {
236 | "slide_type": "-"
237 | }
238 | },
239 | "source": [
240 | "实例化一个`SGD`实例"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 9,
246 | "id": "1ae0989f",
247 | "metadata": {
248 | "execution": {
249 | "iopub.execute_input": "2023-08-18T07:01:54.703905Z",
250 | "iopub.status.busy": "2023-08-18T07:01:54.703368Z",
251 | "iopub.status.idle": "2023-08-18T07:01:54.708081Z",
252 | "shell.execute_reply": "2023-08-18T07:01:54.706987Z"
253 | },
254 | "origin_pos": 50,
255 | "tab": [
256 | "pytorch"
257 | ]
258 | },
259 | "outputs": [],
260 | "source": [
261 | "trainer = torch.optim.SGD(net.parameters(), lr=0.03)"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "id": "d5991157",
267 | "metadata": {
268 | "slideshow": {
269 | "slide_type": "slide"
270 | }
271 | },
272 | "source": [
273 | "训练过程代码与我们从零开始实现时所做的非常相似"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 10,
279 | "id": "1270d706",
280 | "metadata": {
281 | "execution": {
282 | "iopub.execute_input": "2023-08-18T07:01:54.712705Z",
283 | "iopub.status.busy": "2023-08-18T07:01:54.712113Z",
284 | "iopub.status.idle": "2023-08-18T07:01:54.922720Z",
285 | "shell.execute_reply": "2023-08-18T07:01:54.921580Z"
286 | },
287 | "origin_pos": 55,
288 | "tab": [
289 | "pytorch"
290 | ]
291 | },
292 | "outputs": [
293 | {
294 | "name": "stdout",
295 | "output_type": "stream",
296 | "text": [
297 | "epoch 1, loss 0.000248\n",
298 | "epoch 2, loss 0.000103\n",
299 | "epoch 3, loss 0.000103\n"
300 | ]
301 | }
302 | ],
303 | "source": [
304 | "num_epochs = 3\n",
305 | "for epoch in range(num_epochs):\n",
306 | " for X, y in data_iter:\n",
307 | " l = loss(net(X) ,y)\n",
308 | " trainer.zero_grad()\n",
309 | " l.backward()\n",
310 | " trainer.step()\n",
311 | " l = loss(net(features), labels)\n",
312 | " print(f'epoch {epoch + 1}, loss {l:f}')"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "id": "7c8e4f61",
318 | "metadata": {
319 | "slideshow": {
320 | "slide_type": "slide"
321 | }
322 | },
323 | "source": [
324 | "比较生成数据集的真实参数和通过有限数据训练获得的模型参数"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 11,
330 | "id": "aa7cef5a",
331 | "metadata": {
332 | "execution": {
333 | "iopub.execute_input": "2023-08-18T07:01:54.927464Z",
334 | "iopub.status.busy": "2023-08-18T07:01:54.927072Z",
335 | "iopub.status.idle": "2023-08-18T07:01:54.935672Z",
336 | "shell.execute_reply": "2023-08-18T07:01:54.934585Z"
337 | },
338 | "origin_pos": 60,
339 | "tab": [
340 | "pytorch"
341 | ]
342 | },
343 | "outputs": [
344 | {
345 | "name": "stdout",
346 | "output_type": "stream",
347 | "text": [
348 | "w的估计误差: tensor([-0.0010, -0.0003])\n",
349 | "b的估计误差: tensor([-0.0003])\n"
350 | ]
351 | }
352 | ],
353 | "source": [
354 | "w = net[0].weight.data\n",
355 | "print('w的估计误差:', true_w - w.reshape(true_w.shape))\n",
356 | "b = net[0].bias.data\n",
357 | "print('b的估计误差:', true_b - b)"
358 | ]
359 | }
360 | ],
361 | "metadata": {
362 | "celltoolbar": "Slideshow",
363 | "language_info": {
364 | "name": "python"
365 | },
366 | "required_libs": [],
367 | "rise": {
368 | "autolaunch": true,
369 | "enable_chalkboard": true,
370 | "overlay": "
",
371 | "scroll": true
372 | }
373 | },
374 | "nbformat": 4,
375 | "nbformat_minor": 5
376 | }
--------------------------------------------------------------------------------
/chapter_linear-networks/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_multilayer-perceptrons/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "e030be85",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "slide"
9 | }
10 | },
11 | "source": [
12 | "# 自然语言推断与数据集\n",
13 | "\n",
14 | "斯坦福自然语言推断语料库(Stanford Natural Language Inference,SNLI)"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "85ccbfd4",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:06:00.201212Z",
24 | "iopub.status.busy": "2023-08-18T07:06:00.200144Z",
25 | "iopub.status.idle": "2023-08-18T07:06:09.370822Z",
26 | "shell.execute_reply": "2023-08-18T07:06:09.368591Z"
27 | },
28 | "origin_pos": 2,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import os\n",
36 | "import re\n",
37 | "import torch\n",
38 | "from torch import nn\n",
39 | "from d2l import torch as d2l\n",
40 | "\n",
41 | "d2l.DATA_HUB['SNLI'] = (\n",
42 | " 'https://nlp.stanford.edu/projects/snli/snli_1.0.zip',\n",
43 | " '9fcde07509c7e87ec61c640c1b2753d9041758e4')\n",
44 | "\n",
45 | "data_dir = d2l.download_extract('SNLI')"
46 | ]
47 | },
48 | {
49 | "cell_type": "markdown",
50 | "id": "4b6dcd15",
51 | "metadata": {
52 | "slideshow": {
53 | "slide_type": "slide"
54 | }
55 | },
56 | "source": [
57 | "读取数据集"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 2,
63 | "id": "fa839f80",
64 | "metadata": {
65 | "execution": {
66 | "iopub.execute_input": "2023-08-18T07:06:09.377922Z",
67 | "iopub.status.busy": "2023-08-18T07:06:09.377380Z",
68 | "iopub.status.idle": "2023-08-18T07:06:09.392203Z",
69 | "shell.execute_reply": "2023-08-18T07:06:09.390984Z"
70 | },
71 | "origin_pos": 5,
72 | "tab": [
73 | "pytorch"
74 | ]
75 | },
76 | "outputs": [],
77 | "source": [
78 | "def read_snli(data_dir, is_train):\n",
79 | " \"\"\"将SNLI数据集解析为前提、假设和标签\"\"\"\n",
80 | " def extract_text(s):\n",
81 | " s = re.sub('\\\\(', '', s)\n",
82 | " s = re.sub('\\\\)', '', s)\n",
83 | " s = re.sub('\\\\s{2,}', ' ', s)\n",
84 | " return s.strip()\n",
85 | " label_set = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n",
86 | " file_name = os.path.join(data_dir, 'snli_1.0_train.txt'\n",
87 | " if is_train else 'snli_1.0_test.txt')\n",
88 | " with open(file_name, 'r') as f:\n",
89 | " rows = [row.split('\\t') for row in f.readlines()[1:]]\n",
90 | " premises = [extract_text(row[1]) for row in rows if row[0] in label_set]\n",
91 | " hypotheses = [extract_text(row[2]) for row in rows if row[0] \\\n",
92 | " in label_set]\n",
93 | " labels = [label_set[row[0]] for row in rows if row[0] in label_set]\n",
94 | " return premises, hypotheses, labels"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "id": "ab3a4dc3",
100 | "metadata": {
101 | "slideshow": {
102 | "slide_type": "slide"
103 | }
104 | },
105 | "source": [
106 | "打印前3对"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 3,
112 | "id": "19101f9e",
113 | "metadata": {
114 | "execution": {
115 | "iopub.execute_input": "2023-08-18T07:06:09.397297Z",
116 | "iopub.status.busy": "2023-08-18T07:06:09.396407Z",
117 | "iopub.status.idle": "2023-08-18T07:06:23.206512Z",
118 | "shell.execute_reply": "2023-08-18T07:06:23.205574Z"
119 | },
120 | "origin_pos": 7,
121 | "tab": [
122 | "pytorch"
123 | ]
124 | },
125 | "outputs": [
126 | {
127 | "name": "stdout",
128 | "output_type": "stream",
129 | "text": [
130 | "前提: A person on a horse jumps over a broken down airplane .\n",
131 | "假设: A person is training his horse for a competition .\n",
132 | "标签: 2\n",
133 | "前提: A person on a horse jumps over a broken down airplane .\n",
134 | "假设: A person is at a diner , ordering an omelette .\n",
135 | "标签: 1\n",
136 | "前提: A person on a horse jumps over a broken down airplane .\n",
137 | "假设: A person is outdoors , on a horse .\n",
138 | "标签: 0\n"
139 | ]
140 | }
141 | ],
142 | "source": [
143 | "train_data = read_snli(data_dir, is_train=True)\n",
144 | "for x0, x1, y in zip(train_data[0][:3], train_data[1][:3], train_data[2][:3]):\n",
145 | " print('前提:', x0)\n",
146 | " print('假设:', x1)\n",
147 | " print('标签:', y)"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "id": "ffb6b0b0",
153 | "metadata": {
154 | "slideshow": {
155 | "slide_type": "slide"
156 | }
157 | },
158 | "source": [
159 | "标签“蕴涵”“矛盾”和“中性”是平衡的"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 4,
165 | "id": "972ca3d1",
166 | "metadata": {
167 | "execution": {
168 | "iopub.execute_input": "2023-08-18T07:06:23.210300Z",
169 | "iopub.status.busy": "2023-08-18T07:06:23.209728Z",
170 | "iopub.status.idle": "2023-08-18T07:06:23.531128Z",
171 | "shell.execute_reply": "2023-08-18T07:06:23.530246Z"
172 | },
173 | "origin_pos": 9,
174 | "tab": [
175 | "pytorch"
176 | ]
177 | },
178 | "outputs": [
179 | {
180 | "name": "stdout",
181 | "output_type": "stream",
182 | "text": [
183 | "[183416, 183187, 182764]\n",
184 | "[3368, 3237, 3219]\n"
185 | ]
186 | }
187 | ],
188 | "source": [
189 | "test_data = read_snli(data_dir, is_train=False)\n",
190 | "for data in [train_data, test_data]:\n",
191 | " print([[row for row in data[2]].count(i) for i in range(3)])"
192 | ]
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "id": "8fe876db",
197 | "metadata": {
198 | "slideshow": {
199 | "slide_type": "slide"
200 | }
201 | },
202 | "source": [
203 | "定义用于加载数据集的类"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 5,
209 | "id": "b8b15f65",
210 | "metadata": {
211 | "execution": {
212 | "iopub.execute_input": "2023-08-18T07:06:23.534933Z",
213 | "iopub.status.busy": "2023-08-18T07:06:23.534365Z",
214 | "iopub.status.idle": "2023-08-18T07:06:23.542550Z",
215 | "shell.execute_reply": "2023-08-18T07:06:23.541714Z"
216 | },
217 | "origin_pos": 12,
218 | "tab": [
219 | "pytorch"
220 | ]
221 | },
222 | "outputs": [],
223 | "source": [
224 | "class SNLIDataset(torch.utils.data.Dataset):\n",
225 | " \"\"\"用于加载SNLI数据集的自定义数据集\"\"\"\n",
226 | " def __init__(self, dataset, num_steps, vocab=None):\n",
227 | " self.num_steps = num_steps\n",
228 | " all_premise_tokens = d2l.tokenize(dataset[0])\n",
229 | " all_hypothesis_tokens = d2l.tokenize(dataset[1])\n",
230 | " if vocab is None:\n",
231 | " self.vocab = d2l.Vocab(all_premise_tokens + \\\n",
232 | " all_hypothesis_tokens, min_freq=5, reserved_tokens=[''])\n",
233 | " else:\n",
234 | " self.vocab = vocab\n",
235 | " self.premises = self._pad(all_premise_tokens)\n",
236 | " self.hypotheses = self._pad(all_hypothesis_tokens)\n",
237 | " self.labels = torch.tensor(dataset[2])\n",
238 | " print('read ' + str(len(self.premises)) + ' examples')\n",
239 | "\n",
240 | " def _pad(self, lines):\n",
241 | " return torch.tensor([d2l.truncate_pad(\n",
242 | " self.vocab[line], self.num_steps, self.vocab[''])\n",
243 | " for line in lines])\n",
244 | "\n",
245 | " def __getitem__(self, idx):\n",
246 | " return (self.premises[idx], self.hypotheses[idx]), self.labels[idx]\n",
247 | "\n",
248 | " def __len__(self):\n",
249 | " return len(self.premises)"
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "id": "f4ab0616",
255 | "metadata": {
256 | "slideshow": {
257 | "slide_type": "slide"
258 | }
259 | },
260 | "source": [
261 | "整合代码"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 7,
267 | "id": "08d0c755",
268 | "metadata": {
269 | "execution": {
270 | "iopub.execute_input": "2023-08-18T07:06:23.554839Z",
271 | "iopub.status.busy": "2023-08-18T07:06:23.554288Z",
272 | "iopub.status.idle": "2023-08-18T07:07:02.488484Z",
273 | "shell.execute_reply": "2023-08-18T07:07:02.487658Z"
274 | },
275 | "origin_pos": 19,
276 | "tab": [
277 | "pytorch"
278 | ]
279 | },
280 | "outputs": [
281 | {
282 | "name": "stdout",
283 | "output_type": "stream",
284 | "text": [
285 | "read 549367 examples\n"
286 | ]
287 | },
288 | {
289 | "name": "stdout",
290 | "output_type": "stream",
291 | "text": [
292 | "read 9824 examples\n"
293 | ]
294 | },
295 | {
296 | "data": {
297 | "text/plain": [
298 | "18678"
299 | ]
300 | },
301 | "execution_count": 7,
302 | "metadata": {},
303 | "output_type": "execute_result"
304 | }
305 | ],
306 | "source": [
307 | "def load_data_snli(batch_size, num_steps=50):\n",
308 | " \"\"\"下载SNLI数据集并返回数据迭代器和词表\"\"\"\n",
309 | " num_workers = d2l.get_dataloader_workers()\n",
310 | " data_dir = d2l.download_extract('SNLI')\n",
311 | " train_data = read_snli(data_dir, True)\n",
312 | " test_data = read_snli(data_dir, False)\n",
313 | " train_set = SNLIDataset(train_data, num_steps)\n",
314 | " test_set = SNLIDataset(test_data, num_steps, train_set.vocab)\n",
315 | " train_iter = torch.utils.data.DataLoader(train_set, batch_size,\n",
316 | " shuffle=True,\n",
317 | " num_workers=num_workers)\n",
318 | " test_iter = torch.utils.data.DataLoader(test_set, batch_size,\n",
319 | " shuffle=False,\n",
320 | " num_workers=num_workers)\n",
321 | " return train_iter, test_iter, train_set.vocab\n",
322 | "\n",
323 | "train_iter, test_iter, vocab = load_data_snli(128, 50)\n",
324 | "len(vocab)"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 8,
330 | "id": "d7411a33",
331 | "metadata": {
332 | "execution": {
333 | "iopub.execute_input": "2023-08-18T07:07:02.492220Z",
334 | "iopub.status.busy": "2023-08-18T07:07:02.491909Z",
335 | "iopub.status.idle": "2023-08-18T07:07:02.966465Z",
336 | "shell.execute_reply": "2023-08-18T07:07:02.965137Z"
337 | },
338 | "origin_pos": 21,
339 | "tab": [
340 | "pytorch"
341 | ]
342 | },
343 | "outputs": [
344 | {
345 | "name": "stdout",
346 | "output_type": "stream",
347 | "text": [
348 | "torch.Size([128, 50])\n",
349 | "torch.Size([128, 50])\n",
350 | "torch.Size([128])\n"
351 | ]
352 | }
353 | ],
354 | "source": [
355 | "for X, Y in train_iter:\n",
356 | " print(X[0].shape)\n",
357 | " print(X[1].shape)\n",
358 | " print(Y.shape)\n",
359 | " break"
360 | ]
361 | }
362 | ],
363 | "metadata": {
364 | "celltoolbar": "Slideshow",
365 | "language_info": {
366 | "name": "python"
367 | },
368 | "required_libs": [],
369 | "rise": {
370 | "autolaunch": true,
371 | "enable_chalkboard": true,
372 | "overlay": "
",
373 | "scroll": true
374 | }
375 | },
376 | "nbformat": 4,
377 | "nbformat_minor": 5
378 | }
--------------------------------------------------------------------------------
/chapter_natural-language-processing-applications/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_preliminaries/autograd.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "807b7862",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 自动微分\n",
13 | "\n",
14 | "假设我们想对函数$y=2\\mathbf{x}^{\\top}\\mathbf{x}$关于列向量$\\mathbf{x}$求导"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "98cd8a9e",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:07:31.627945Z",
24 | "iopub.status.busy": "2023-08-18T07:07:31.627424Z",
25 | "iopub.status.idle": "2023-08-18T07:07:32.686372Z",
26 | "shell.execute_reply": "2023-08-18T07:07:32.685559Z"
27 | },
28 | "origin_pos": 2,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/plain": [
37 | "tensor([0., 1., 2., 3.])"
38 | ]
39 | },
40 | "execution_count": 1,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "import torch\n",
47 | "\n",
48 | "x = torch.arange(4.0)\n",
49 | "x"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "id": "4bd68367",
55 | "metadata": {
56 | "slideshow": {
57 | "slide_type": "slide"
58 | }
59 | },
60 | "source": [
61 | "在我们计算$y$关于$\\mathbf{x}$的梯度之前,需要一个地方来存储梯度"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 2,
67 | "id": "e27a5df4",
68 | "metadata": {
69 | "execution": {
70 | "iopub.execute_input": "2023-08-18T07:07:32.690633Z",
71 | "iopub.status.busy": "2023-08-18T07:07:32.689882Z",
72 | "iopub.status.idle": "2023-08-18T07:07:32.694159Z",
73 | "shell.execute_reply": "2023-08-18T07:07:32.693367Z"
74 | },
75 | "origin_pos": 7,
76 | "tab": [
77 | "pytorch"
78 | ]
79 | },
80 | "outputs": [],
81 | "source": [
82 | "x.requires_grad_(True)\n",
83 | "x.grad"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "id": "aeab4090",
89 | "metadata": {
90 | "slideshow": {
91 | "slide_type": "-"
92 | }
93 | },
94 | "source": [
95 | "现在计算$y$"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 3,
101 | "id": "4c3f80b7",
102 | "metadata": {
103 | "execution": {
104 | "iopub.execute_input": "2023-08-18T07:07:32.698006Z",
105 | "iopub.status.busy": "2023-08-18T07:07:32.697167Z",
106 | "iopub.status.idle": "2023-08-18T07:07:32.705385Z",
107 | "shell.execute_reply": "2023-08-18T07:07:32.704593Z"
108 | },
109 | "origin_pos": 12,
110 | "tab": [
111 | "pytorch"
112 | ]
113 | },
114 | "outputs": [
115 | {
116 | "data": {
117 | "text/plain": [
118 | "tensor(28., grad_fn=)"
119 | ]
120 | },
121 | "execution_count": 3,
122 | "metadata": {},
123 | "output_type": "execute_result"
124 | }
125 | ],
126 | "source": [
127 | "y = 2 * torch.dot(x, x)\n",
128 | "y"
129 | ]
130 | },
131 | {
132 | "cell_type": "markdown",
133 | "id": "c6ffaf8b",
134 | "metadata": {
135 | "slideshow": {
136 | "slide_type": "slide"
137 | }
138 | },
139 | "source": [
140 | "通过调用反向传播函数来自动计算`y`关于`x`每个分量的梯度"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 4,
146 | "id": "a1c3a419",
147 | "metadata": {
148 | "execution": {
149 | "iopub.execute_input": "2023-08-18T07:07:32.708698Z",
150 | "iopub.status.busy": "2023-08-18T07:07:32.708196Z",
151 | "iopub.status.idle": "2023-08-18T07:07:32.713924Z",
152 | "shell.execute_reply": "2023-08-18T07:07:32.713091Z"
153 | },
154 | "origin_pos": 17,
155 | "tab": [
156 | "pytorch"
157 | ]
158 | },
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/plain": [
163 | "tensor([ 0., 4., 8., 12.])"
164 | ]
165 | },
166 | "execution_count": 4,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "y.backward()\n",
173 | "x.grad"
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "execution_count": 5,
179 | "id": "b8493d0a",
180 | "metadata": {
181 | "execution": {
182 | "iopub.execute_input": "2023-08-18T07:07:32.718858Z",
183 | "iopub.status.busy": "2023-08-18T07:07:32.718156Z",
184 | "iopub.status.idle": "2023-08-18T07:07:32.724091Z",
185 | "shell.execute_reply": "2023-08-18T07:07:32.723104Z"
186 | },
187 | "origin_pos": 22,
188 | "tab": [
189 | "pytorch"
190 | ]
191 | },
192 | "outputs": [
193 | {
194 | "data": {
195 | "text/plain": [
196 | "tensor([True, True, True, True])"
197 | ]
198 | },
199 | "execution_count": 5,
200 | "metadata": {},
201 | "output_type": "execute_result"
202 | }
203 | ],
204 | "source": [
205 | "x.grad == 4 * x"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "id": "dcb2d3a8",
211 | "metadata": {
212 | "slideshow": {
213 | "slide_type": "slide"
214 | }
215 | },
216 | "source": [
217 | "现在计算`x`的另一个函数"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 6,
223 | "id": "f2fcd392",
224 | "metadata": {
225 | "execution": {
226 | "iopub.execute_input": "2023-08-18T07:07:32.729368Z",
227 | "iopub.status.busy": "2023-08-18T07:07:32.728433Z",
228 | "iopub.status.idle": "2023-08-18T07:07:32.736493Z",
229 | "shell.execute_reply": "2023-08-18T07:07:32.735715Z"
230 | },
231 | "origin_pos": 27,
232 | "tab": [
233 | "pytorch"
234 | ]
235 | },
236 | "outputs": [
237 | {
238 | "data": {
239 | "text/plain": [
240 | "tensor([1., 1., 1., 1.])"
241 | ]
242 | },
243 | "execution_count": 6,
244 | "metadata": {},
245 | "output_type": "execute_result"
246 | }
247 | ],
248 | "source": [
249 | "x.grad.zero_()\n",
250 | "y = x.sum()\n",
251 | "y.backward()\n",
252 | "x.grad"
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "id": "6ab86a5c",
258 | "metadata": {
259 | "slideshow": {
260 | "slide_type": "slide"
261 | }
262 | },
263 | "source": [
264 | "深度学习中\n",
265 | ",我们的目的不是计算微分矩阵,而是单独计算批量中每个样本的偏导数之和"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 7,
271 | "id": "f4e62a5d",
272 | "metadata": {
273 | "execution": {
274 | "iopub.execute_input": "2023-08-18T07:07:32.740109Z",
275 | "iopub.status.busy": "2023-08-18T07:07:32.739419Z",
276 | "iopub.status.idle": "2023-08-18T07:07:32.745803Z",
277 | "shell.execute_reply": "2023-08-18T07:07:32.744893Z"
278 | },
279 | "origin_pos": 32,
280 | "tab": [
281 | "pytorch"
282 | ]
283 | },
284 | "outputs": [
285 | {
286 | "data": {
287 | "text/plain": [
288 | "tensor([0., 2., 4., 6.])"
289 | ]
290 | },
291 | "execution_count": 7,
292 | "metadata": {},
293 | "output_type": "execute_result"
294 | }
295 | ],
296 | "source": [
297 | "x.grad.zero_()\n",
298 | "y = x * x\n",
299 | "y.sum().backward()\n",
300 | "x.grad"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "id": "aba72343",
306 | "metadata": {
307 | "slideshow": {
308 | "slide_type": "slide"
309 | }
310 | },
311 | "source": [
312 | "将某些计算移动到记录的计算图之外"
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": 8,
318 | "id": "8dab493d",
319 | "metadata": {
320 | "execution": {
321 | "iopub.execute_input": "2023-08-18T07:07:32.749398Z",
322 | "iopub.status.busy": "2023-08-18T07:07:32.748759Z",
323 | "iopub.status.idle": "2023-08-18T07:07:32.755280Z",
324 | "shell.execute_reply": "2023-08-18T07:07:32.754543Z"
325 | },
326 | "origin_pos": 37,
327 | "tab": [
328 | "pytorch"
329 | ]
330 | },
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/plain": [
335 | "tensor([True, True, True, True])"
336 | ]
337 | },
338 | "execution_count": 8,
339 | "metadata": {},
340 | "output_type": "execute_result"
341 | }
342 | ],
343 | "source": [
344 | "x.grad.zero_()\n",
345 | "y = x * x\n",
346 | "u = y.detach()\n",
347 | "z = u * x\n",
348 | "\n",
349 | "z.sum().backward()\n",
350 | "x.grad == u"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 9,
356 | "id": "271a9b3a",
357 | "metadata": {
358 | "execution": {
359 | "iopub.execute_input": "2023-08-18T07:07:32.759344Z",
360 | "iopub.status.busy": "2023-08-18T07:07:32.758633Z",
361 | "iopub.status.idle": "2023-08-18T07:07:32.764663Z",
362 | "shell.execute_reply": "2023-08-18T07:07:32.763922Z"
363 | },
364 | "origin_pos": 42,
365 | "tab": [
366 | "pytorch"
367 | ]
368 | },
369 | "outputs": [
370 | {
371 | "data": {
372 | "text/plain": [
373 | "tensor([True, True, True, True])"
374 | ]
375 | },
376 | "execution_count": 9,
377 | "metadata": {},
378 | "output_type": "execute_result"
379 | }
380 | ],
381 | "source": [
382 | "x.grad.zero_()\n",
383 | "y.sum().backward()\n",
384 | "x.grad == 2 * x"
385 | ]
386 | },
387 | {
388 | "cell_type": "markdown",
389 | "id": "491b3462",
390 | "metadata": {
391 | "slideshow": {
392 | "slide_type": "slide"
393 | }
394 | },
395 | "source": [
396 | "即使构建函数的计算图需要通过Python控制流(例如,条件、循环或任意函数调用),我们仍然可以计算得到的变量的梯度"
397 | ]
398 | },
399 | {
400 | "cell_type": "code",
401 | "execution_count": 12,
402 | "id": "2595bdc0",
403 | "metadata": {
404 | "execution": {
405 | "iopub.execute_input": "2023-08-18T07:07:32.785728Z",
406 | "iopub.status.busy": "2023-08-18T07:07:32.785179Z",
407 | "iopub.status.idle": "2023-08-18T07:07:32.790672Z",
408 | "shell.execute_reply": "2023-08-18T07:07:32.789892Z"
409 | },
410 | "origin_pos": 57,
411 | "tab": [
412 | "pytorch"
413 | ]
414 | },
415 | "outputs": [
416 | {
417 | "data": {
418 | "text/plain": [
419 | "tensor(True)"
420 | ]
421 | },
422 | "execution_count": 12,
423 | "metadata": {},
424 | "output_type": "execute_result"
425 | }
426 | ],
427 | "source": [
428 | "def f(a):\n",
429 | " b = a * 2\n",
430 | " while b.norm() < 1000:\n",
431 | " b = b * 2\n",
432 | " if b.sum() > 0:\n",
433 | " c = b\n",
434 | " else:\n",
435 | " c = 100 * b\n",
436 | " return c\n",
437 | "\n",
438 | "a = torch.randn(size=(), requires_grad=True)\n",
439 | "d = f(a)\n",
440 | "d.backward()\n",
441 | "\n",
442 | "a.grad == d / a"
443 | ]
444 | }
445 | ],
446 | "metadata": {
447 | "celltoolbar": "Slideshow",
448 | "language_info": {
449 | "name": "python"
450 | },
451 | "required_libs": [],
452 | "rise": {
453 | "autolaunch": true,
454 | "enable_chalkboard": true,
455 | "overlay": "
",
456 | "scroll": true
457 | }
458 | },
459 | "nbformat": 4,
460 | "nbformat_minor": 5
461 | }
--------------------------------------------------------------------------------
/chapter_preliminaries/lookup-api.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "a9a80d6d",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 查阅文档\n",
13 | "\n"
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "id": "0be06f74",
19 | "metadata": {
20 | "slideshow": {
21 | "slide_type": "-"
22 | }
23 | },
24 | "source": [
25 | "查询随机数生成模块中的所有属性"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 1,
31 | "id": "8f7f4d63",
32 | "metadata": {
33 | "execution": {
34 | "iopub.execute_input": "2023-08-18T07:05:30.519062Z",
35 | "iopub.status.busy": "2023-08-18T07:05:30.518501Z",
36 | "iopub.status.idle": "2023-08-18T07:05:31.469749Z",
37 | "shell.execute_reply": "2023-08-18T07:05:31.468858Z"
38 | },
39 | "origin_pos": 6,
40 | "tab": [
41 | "pytorch"
42 | ]
43 | },
44 | "outputs": [
45 | {
46 | "name": "stdout",
47 | "output_type": "stream",
48 | "text": [
49 | "['AbsTransform', 'AffineTransform', 'Bernoulli', 'Beta', 'Binomial', 'CatTransform', 'Categorical', 'Cauchy', 'Chi2', 'ComposeTransform', 'ContinuousBernoulli', 'CorrCholeskyTransform', 'CumulativeDistributionTransform', 'Dirichlet', 'Distribution', 'ExpTransform', 'Exponential', 'ExponentialFamily', 'FisherSnedecor', 'Gamma', 'Geometric', 'Gumbel', 'HalfCauchy', 'HalfNormal', 'Independent', 'IndependentTransform', 'Kumaraswamy', 'LKJCholesky', 'Laplace', 'LogNormal', 'LogisticNormal', 'LowRankMultivariateNormal', 'LowerCholeskyTransform', 'MixtureSameFamily', 'Multinomial', 'MultivariateNormal', 'NegativeBinomial', 'Normal', 'OneHotCategorical', 'OneHotCategoricalStraightThrough', 'Pareto', 'Poisson', 'PowerTransform', 'RelaxedBernoulli', 'RelaxedOneHotCategorical', 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', 'SoftplusTransform', 'StackTransform', 'StickBreakingTransform', 'StudentT', 'TanhTransform', 'Transform', 'TransformedDistribution', 'Uniform', 'VonMises', 'Weibull', 'Wishart', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'bernoulli', 'beta', 'biject_to', 'binomial', 'categorical', 'cauchy', 'chi2', 'constraint_registry', 'constraints', 'continuous_bernoulli', 'dirichlet', 'distribution', 'exp_family', 'exponential', 'fishersnedecor', 'gamma', 'geometric', 'gumbel', 'half_cauchy', 'half_normal', 'identity_transform', 'independent', 'kl', 'kl_divergence', 'kumaraswamy', 'laplace', 'lkj_cholesky', 'log_normal', 'logistic_normal', 'lowrank_multivariate_normal', 'mixture_same_family', 'multinomial', 'multivariate_normal', 'negative_binomial', 'normal', 'one_hot_categorical', 'pareto', 'poisson', 'register_kl', 'relaxed_bernoulli', 'relaxed_categorical', 'studentT', 'transform_to', 'transformed_distribution', 'transforms', 'uniform', 'utils', 'von_mises', 'weibull', 'wishart']\n"
50 | ]
51 | }
52 | ],
53 | "source": [
54 | "import torch\n",
55 | "\n",
56 | "print(dir(torch.distributions))"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "id": "4b506160",
62 | "metadata": {
63 | "slideshow": {
64 | "slide_type": "slide"
65 | }
66 | },
67 | "source": [
68 | "查看张量`ones`函数的用法"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 2,
74 | "id": "a16494ed",
75 | "metadata": {
76 | "execution": {
77 | "iopub.execute_input": "2023-08-18T07:05:31.473606Z",
78 | "iopub.status.busy": "2023-08-18T07:05:31.472946Z",
79 | "iopub.status.idle": "2023-08-18T07:05:31.477780Z",
80 | "shell.execute_reply": "2023-08-18T07:05:31.476938Z"
81 | },
82 | "origin_pos": 11,
83 | "tab": [
84 | "pytorch"
85 | ]
86 | },
87 | "outputs": [
88 | {
89 | "name": "stdout",
90 | "output_type": "stream",
91 | "text": [
92 | "Help on built-in function ones in module torch:\n",
93 | "\n",
94 | "ones(...)\n",
95 | " ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor\n",
96 | " \n",
97 | " Returns a tensor filled with the scalar value `1`, with the shape defined\n",
98 | " by the variable argument :attr:`size`.\n",
99 | " \n",
100 | " Args:\n",
101 | " size (int...): a sequence of integers defining the shape of the output tensor.\n",
102 | " Can be a variable number of arguments or a collection like a list or tuple.\n",
103 | " \n",
104 | " Keyword arguments:\n",
105 | " out (Tensor, optional): the output tensor.\n",
106 | " dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.\n",
107 | " Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).\n",
108 | " layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.\n",
109 | " Default: ``torch.strided``.\n",
110 | " device (:class:`torch.device`, optional): the desired device of returned tensor.\n",
111 | " Default: if ``None``, uses the current device for the default tensor type\n",
112 | " (see :func:`torch.set_default_tensor_type`). :attr:`device` will be the CPU\n",
113 | " for CPU tensor types and the current CUDA device for CUDA tensor types.\n",
114 | " requires_grad (bool, optional): If autograd should record operations on the\n",
115 | " returned tensor. Default: ``False``.\n",
116 | " \n",
117 | " Example::\n",
118 | " \n",
119 | " >>> torch.ones(2, 3)\n",
120 | " tensor([[ 1., 1., 1.],\n",
121 | " [ 1., 1., 1.]])\n",
122 | " \n",
123 | " >>> torch.ones(5)\n",
124 | " tensor([ 1., 1., 1., 1., 1.])\n",
125 | "\n"
126 | ]
127 | }
128 | ],
129 | "source": [
130 | "help(torch.ones)"
131 | ]
132 | },
133 | {
134 | "cell_type": "markdown",
135 | "id": "5bb71da8",
136 | "metadata": {
137 | "slideshow": {
138 | "slide_type": "slide"
139 | }
140 | },
141 | "source": [
142 | "运行一个快速测试"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 3,
148 | "id": "7870b2f5",
149 | "metadata": {
150 | "execution": {
151 | "iopub.execute_input": "2023-08-18T07:05:31.481310Z",
152 | "iopub.status.busy": "2023-08-18T07:05:31.480685Z",
153 | "iopub.status.idle": "2023-08-18T07:05:31.490398Z",
154 | "shell.execute_reply": "2023-08-18T07:05:31.489581Z"
155 | },
156 | "origin_pos": 16,
157 | "tab": [
158 | "pytorch"
159 | ]
160 | },
161 | "outputs": [
162 | {
163 | "data": {
164 | "text/plain": [
165 | "tensor([1., 1., 1., 1.])"
166 | ]
167 | },
168 | "execution_count": 3,
169 | "metadata": {},
170 | "output_type": "execute_result"
171 | }
172 | ],
173 | "source": [
174 | "torch.ones(4)"
175 | ]
176 | }
177 | ],
178 | "metadata": {
179 | "celltoolbar": "Slideshow",
180 | "language_info": {
181 | "name": "python"
182 | },
183 | "required_libs": [],
184 | "rise": {
185 | "autolaunch": true,
186 | "enable_chalkboard": true,
187 | "overlay": "
",
188 | "scroll": true
189 | }
190 | },
191 | "nbformat": 4,
192 | "nbformat_minor": 5
193 | }
--------------------------------------------------------------------------------
/chapter_preliminaries/ndarray.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "ee4089f8",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 数据操作\n",
13 | "\n"
14 | ]
15 | },
16 | {
17 | "cell_type": "markdown",
18 | "id": "52ec48cf",
19 | "metadata": {
20 | "slideshow": {
21 | "slide_type": "-"
22 | }
23 | },
24 | "source": [
25 | "首先,我们导入`torch`。请注意,虽然它被称为PyTorch,但是代码中使用`torch`而不是`pytorch`"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 1,
31 | "id": "278e6d3f",
32 | "metadata": {
33 | "execution": {
34 | "iopub.execute_input": "2023-08-18T07:05:01.545874Z",
35 | "iopub.status.busy": "2023-08-18T07:05:01.545147Z",
36 | "iopub.status.idle": "2023-08-18T07:05:02.992816Z",
37 | "shell.execute_reply": "2023-08-18T07:05:02.991719Z"
38 | },
39 | "origin_pos": 5,
40 | "tab": [
41 | "pytorch"
42 | ]
43 | },
44 | "outputs": [],
45 | "source": [
46 | "import torch"
47 | ]
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "id": "8f89b9c0",
52 | "metadata": {
53 | "slideshow": {
54 | "slide_type": "slide"
55 | }
56 | },
57 | "source": [
58 | "张量表示一个由数值组成的数组,这个数组可能有多个维度"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 2,
64 | "id": "b1700627",
65 | "metadata": {
66 | "execution": {
67 | "iopub.execute_input": "2023-08-18T07:05:02.997386Z",
68 | "iopub.status.busy": "2023-08-18T07:05:02.996970Z",
69 | "iopub.status.idle": "2023-08-18T07:05:03.007632Z",
70 | "shell.execute_reply": "2023-08-18T07:05:03.006483Z"
71 | },
72 | "origin_pos": 13,
73 | "tab": [
74 | "pytorch"
75 | ]
76 | },
77 | "outputs": [
78 | {
79 | "data": {
80 | "text/plain": [
81 | "tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])"
82 | ]
83 | },
84 | "execution_count": 2,
85 | "metadata": {},
86 | "output_type": "execute_result"
87 | }
88 | ],
89 | "source": [
90 | "x = torch.arange(12)\n",
91 | "x"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "id": "8ed94c74",
97 | "metadata": {
98 | "slideshow": {
99 | "slide_type": "slide"
100 | }
101 | },
102 | "source": [
103 | "可以通过张量的`shape`属性来访问张量(沿每个轴的长度)的*形状*\n",
104 | "和张量中元素的总数"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 3,
110 | "id": "b86b6572",
111 | "metadata": {
112 | "execution": {
113 | "iopub.execute_input": "2023-08-18T07:05:03.011628Z",
114 | "iopub.status.busy": "2023-08-18T07:05:03.011110Z",
115 | "iopub.status.idle": "2023-08-18T07:05:03.017191Z",
116 | "shell.execute_reply": "2023-08-18T07:05:03.016193Z"
117 | },
118 | "origin_pos": 17,
119 | "tab": [
120 | "pytorch"
121 | ]
122 | },
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/plain": [
127 | "torch.Size([12])"
128 | ]
129 | },
130 | "execution_count": 3,
131 | "metadata": {},
132 | "output_type": "execute_result"
133 | }
134 | ],
135 | "source": [
136 | "x.shape"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 4,
142 | "id": "b8b69ca9",
143 | "metadata": {
144 | "execution": {
145 | "iopub.execute_input": "2023-08-18T07:05:03.020938Z",
146 | "iopub.status.busy": "2023-08-18T07:05:03.020483Z",
147 | "iopub.status.idle": "2023-08-18T07:05:03.026998Z",
148 | "shell.execute_reply": "2023-08-18T07:05:03.025752Z"
149 | },
150 | "origin_pos": 20,
151 | "tab": [
152 | "pytorch"
153 | ]
154 | },
155 | "outputs": [
156 | {
157 | "data": {
158 | "text/plain": [
159 | "12"
160 | ]
161 | },
162 | "execution_count": 4,
163 | "metadata": {},
164 | "output_type": "execute_result"
165 | }
166 | ],
167 | "source": [
168 | "x.numel()"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "id": "588600c4",
174 | "metadata": {
175 | "slideshow": {
176 | "slide_type": "slide"
177 | }
178 | },
179 | "source": [
180 | "要想改变一个张量的形状而不改变元素数量和元素值,可以调用`reshape`函数"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": 5,
186 | "id": "0f294243",
187 | "metadata": {
188 | "execution": {
189 | "iopub.execute_input": "2023-08-18T07:05:03.031842Z",
190 | "iopub.status.busy": "2023-08-18T07:05:03.031448Z",
191 | "iopub.status.idle": "2023-08-18T07:05:03.039288Z",
192 | "shell.execute_reply": "2023-08-18T07:05:03.038227Z"
193 | },
194 | "origin_pos": 24,
195 | "tab": [
196 | "pytorch"
197 | ]
198 | },
199 | "outputs": [
200 | {
201 | "data": {
202 | "text/plain": [
203 | "tensor([[ 0, 1, 2, 3],\n",
204 | " [ 4, 5, 6, 7],\n",
205 | " [ 8, 9, 10, 11]])"
206 | ]
207 | },
208 | "execution_count": 5,
209 | "metadata": {},
210 | "output_type": "execute_result"
211 | }
212 | ],
213 | "source": [
214 | "X = x.reshape(3, 4)\n",
215 | "X"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "id": "403fad02",
221 | "metadata": {
222 | "slideshow": {
223 | "slide_type": "slide"
224 | }
225 | },
226 | "source": [
227 | "使用全0、全1、其他常量,或者从特定分布中随机采样的数字"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 6,
233 | "id": "b23c3056",
234 | "metadata": {
235 | "execution": {
236 | "iopub.execute_input": "2023-08-18T07:05:03.044733Z",
237 | "iopub.status.busy": "2023-08-18T07:05:03.043866Z",
238 | "iopub.status.idle": "2023-08-18T07:05:03.052195Z",
239 | "shell.execute_reply": "2023-08-18T07:05:03.051146Z"
240 | },
241 | "origin_pos": 29,
242 | "tab": [
243 | "pytorch"
244 | ]
245 | },
246 | "outputs": [
247 | {
248 | "data": {
249 | "text/plain": [
250 | "tensor([[[0., 0., 0., 0.],\n",
251 | " [0., 0., 0., 0.],\n",
252 | " [0., 0., 0., 0.]],\n",
253 | "\n",
254 | " [[0., 0., 0., 0.],\n",
255 | " [0., 0., 0., 0.],\n",
256 | " [0., 0., 0., 0.]]])"
257 | ]
258 | },
259 | "execution_count": 6,
260 | "metadata": {},
261 | "output_type": "execute_result"
262 | }
263 | ],
264 | "source": [
265 | "torch.zeros((2, 3, 4))"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 7,
271 | "id": "25981960",
272 | "metadata": {
273 | "execution": {
274 | "iopub.execute_input": "2023-08-18T07:05:03.057264Z",
275 | "iopub.status.busy": "2023-08-18T07:05:03.056578Z",
276 | "iopub.status.idle": "2023-08-18T07:05:03.064973Z",
277 | "shell.execute_reply": "2023-08-18T07:05:03.063853Z"
278 | },
279 | "origin_pos": 34,
280 | "tab": [
281 | "pytorch"
282 | ]
283 | },
284 | "outputs": [
285 | {
286 | "data": {
287 | "text/plain": [
288 | "tensor([[[1., 1., 1., 1.],\n",
289 | " [1., 1., 1., 1.],\n",
290 | " [1., 1., 1., 1.]],\n",
291 | "\n",
292 | " [[1., 1., 1., 1.],\n",
293 | " [1., 1., 1., 1.],\n",
294 | " [1., 1., 1., 1.]]])"
295 | ]
296 | },
297 | "execution_count": 7,
298 | "metadata": {},
299 | "output_type": "execute_result"
300 | }
301 | ],
302 | "source": [
303 | "torch.ones((2, 3, 4))"
304 | ]
305 | },
306 | {
307 | "cell_type": "code",
308 | "execution_count": 8,
309 | "id": "2493f09a",
310 | "metadata": {
311 | "execution": {
312 | "iopub.execute_input": "2023-08-18T07:05:03.069946Z",
313 | "iopub.status.busy": "2023-08-18T07:05:03.069231Z",
314 | "iopub.status.idle": "2023-08-18T07:05:03.077304Z",
315 | "shell.execute_reply": "2023-08-18T07:05:03.076139Z"
316 | },
317 | "origin_pos": 39,
318 | "tab": [
319 | "pytorch"
320 | ]
321 | },
322 | "outputs": [
323 | {
324 | "data": {
325 | "text/plain": [
326 | "tensor([[-0.0135, 0.0665, 0.0912, 0.3212],\n",
327 | " [ 1.4653, 0.1843, -1.6995, -0.3036],\n",
328 | " [ 1.7646, 1.0450, 0.2457, -0.7732]])"
329 | ]
330 | },
331 | "execution_count": 8,
332 | "metadata": {},
333 | "output_type": "execute_result"
334 | }
335 | ],
336 | "source": [
337 | "torch.randn(3, 4)"
338 | ]
339 | },
340 | {
341 | "cell_type": "markdown",
342 | "id": "211d5b8e",
343 | "metadata": {
344 | "slideshow": {
345 | "slide_type": "slide"
346 | }
347 | },
348 | "source": [
349 | "通过提供包含数值的Python列表(或嵌套列表),来为所需张量中的每个元素赋予确定值"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 9,
355 | "id": "708be494",
356 | "metadata": {
357 | "execution": {
358 | "iopub.execute_input": "2023-08-18T07:05:03.082360Z",
359 | "iopub.status.busy": "2023-08-18T07:05:03.081424Z",
360 | "iopub.status.idle": "2023-08-18T07:05:03.090148Z",
361 | "shell.execute_reply": "2023-08-18T07:05:03.088973Z"
362 | },
363 | "origin_pos": 44,
364 | "tab": [
365 | "pytorch"
366 | ]
367 | },
368 | "outputs": [
369 | {
370 | "data": {
371 | "text/plain": [
372 | "tensor([[2, 1, 4, 3],\n",
373 | " [1, 2, 3, 4],\n",
374 | " [4, 3, 2, 1]])"
375 | ]
376 | },
377 | "execution_count": 9,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])"
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "id": "14185106",
389 | "metadata": {
390 | "slideshow": {
391 | "slide_type": "slide"
392 | }
393 | },
394 | "source": [
395 | "常见的标准算术运算符(`+`、`-`、`*`、`/`和`**`)都可以被升级为按元素运算"
396 | ]
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 10,
401 | "id": "99b28553",
402 | "metadata": {
403 | "execution": {
404 | "iopub.execute_input": "2023-08-18T07:05:03.095504Z",
405 | "iopub.status.busy": "2023-08-18T07:05:03.094688Z",
406 | "iopub.status.idle": "2023-08-18T07:05:03.106084Z",
407 | "shell.execute_reply": "2023-08-18T07:05:03.104976Z"
408 | },
409 | "origin_pos": 49,
410 | "tab": [
411 | "pytorch"
412 | ]
413 | },
414 | "outputs": [
415 | {
416 | "data": {
417 | "text/plain": [
418 | "(tensor([ 3., 4., 6., 10.]),\n",
419 | " tensor([-1., 0., 2., 6.]),\n",
420 | " tensor([ 2., 4., 8., 16.]),\n",
421 | " tensor([0.5000, 1.0000, 2.0000, 4.0000]),\n",
422 | " tensor([ 1., 4., 16., 64.]))"
423 | ]
424 | },
425 | "execution_count": 10,
426 | "metadata": {},
427 | "output_type": "execute_result"
428 | }
429 | ],
430 | "source": [
431 | "x = torch.tensor([1.0, 2, 4, 8])\n",
432 | "y = torch.tensor([2, 2, 2, 2])\n",
433 | "x + y, x - y, x * y, x / y, x ** y"
434 | ]
435 | },
436 | {
437 | "cell_type": "markdown",
438 | "id": "7e770d36",
439 | "metadata": {
440 | "slideshow": {
441 | "slide_type": "-"
442 | }
443 | },
444 | "source": [
445 | "“按元素”方式可以应用更多的计算"
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 11,
451 | "id": "ef07c995",
452 | "metadata": {
453 | "execution": {
454 | "iopub.execute_input": "2023-08-18T07:05:03.110973Z",
455 | "iopub.status.busy": "2023-08-18T07:05:03.110221Z",
456 | "iopub.status.idle": "2023-08-18T07:05:03.120389Z",
457 | "shell.execute_reply": "2023-08-18T07:05:03.119471Z"
458 | },
459 | "origin_pos": 54,
460 | "tab": [
461 | "pytorch"
462 | ]
463 | },
464 | "outputs": [
465 | {
466 | "data": {
467 | "text/plain": [
468 | "tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])"
469 | ]
470 | },
471 | "execution_count": 11,
472 | "metadata": {},
473 | "output_type": "execute_result"
474 | }
475 | ],
476 | "source": [
477 | "torch.exp(x)"
478 | ]
479 | },
480 | {
481 | "cell_type": "markdown",
482 | "id": "e8a09ecd",
483 | "metadata": {
484 | "slideshow": {
485 | "slide_type": "slide"
486 | }
487 | },
488 | "source": [
489 | "我们也可以把多个张量*连结*(concatenate)在一起"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": 12,
495 | "id": "a583b891",
496 | "metadata": {
497 | "execution": {
498 | "iopub.execute_input": "2023-08-18T07:05:03.125263Z",
499 | "iopub.status.busy": "2023-08-18T07:05:03.124477Z",
500 | "iopub.status.idle": "2023-08-18T07:05:03.136328Z",
501 | "shell.execute_reply": "2023-08-18T07:05:03.135199Z"
502 | },
503 | "origin_pos": 59,
504 | "tab": [
505 | "pytorch"
506 | ]
507 | },
508 | "outputs": [
509 | {
510 | "data": {
511 | "text/plain": [
512 | "(tensor([[ 0., 1., 2., 3.],\n",
513 | " [ 4., 5., 6., 7.],\n",
514 | " [ 8., 9., 10., 11.],\n",
515 | " [ 2., 1., 4., 3.],\n",
516 | " [ 1., 2., 3., 4.],\n",
517 | " [ 4., 3., 2., 1.]]),\n",
518 | " tensor([[ 0., 1., 2., 3., 2., 1., 4., 3.],\n",
519 | " [ 4., 5., 6., 7., 1., 2., 3., 4.],\n",
520 | " [ 8., 9., 10., 11., 4., 3., 2., 1.]]))"
521 | ]
522 | },
523 | "execution_count": 12,
524 | "metadata": {},
525 | "output_type": "execute_result"
526 | }
527 | ],
528 | "source": [
529 | "X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n",
530 | "Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])\n",
531 | "torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)"
532 | ]
533 | },
534 | {
535 | "cell_type": "markdown",
536 | "id": "fac225a4",
537 | "metadata": {
538 | "slideshow": {
539 | "slide_type": "slide"
540 | }
541 | },
542 | "source": [
543 | "通过*逻辑运算符*构建二元张量"
544 | ]
545 | },
546 | {
547 | "cell_type": "code",
548 | "execution_count": 13,
549 | "id": "6405ec63",
550 | "metadata": {
551 | "execution": {
552 | "iopub.execute_input": "2023-08-18T07:05:03.141449Z",
553 | "iopub.status.busy": "2023-08-18T07:05:03.140776Z",
554 | "iopub.status.idle": "2023-08-18T07:05:03.148692Z",
555 | "shell.execute_reply": "2023-08-18T07:05:03.147491Z"
556 | },
557 | "origin_pos": 63,
558 | "tab": [
559 | "pytorch"
560 | ]
561 | },
562 | "outputs": [
563 | {
564 | "data": {
565 | "text/plain": [
566 | "tensor([[False, True, False, True],\n",
567 | " [False, False, False, False],\n",
568 | " [False, False, False, False]])"
569 | ]
570 | },
571 | "execution_count": 13,
572 | "metadata": {},
573 | "output_type": "execute_result"
574 | }
575 | ],
576 | "source": [
577 | "X == Y"
578 | ]
579 | },
580 | {
581 | "cell_type": "markdown",
582 | "id": "ab37c495",
583 | "metadata": {
584 | "slideshow": {
585 | "slide_type": "slide"
586 | }
587 | },
588 | "source": [
589 | "对张量中的所有元素进行求和,会产生一个单元素张量"
590 | ]
591 | },
592 | {
593 | "cell_type": "code",
594 | "execution_count": 14,
595 | "id": "a13cb291",
596 | "metadata": {
597 | "execution": {
598 | "iopub.execute_input": "2023-08-18T07:05:03.153907Z",
599 | "iopub.status.busy": "2023-08-18T07:05:03.152814Z",
600 | "iopub.status.idle": "2023-08-18T07:05:03.160277Z",
601 | "shell.execute_reply": "2023-08-18T07:05:03.159188Z"
602 | },
603 | "origin_pos": 65,
604 | "tab": [
605 | "pytorch"
606 | ]
607 | },
608 | "outputs": [
609 | {
610 | "data": {
611 | "text/plain": [
612 | "tensor(66.)"
613 | ]
614 | },
615 | "execution_count": 14,
616 | "metadata": {},
617 | "output_type": "execute_result"
618 | }
619 | ],
620 | "source": [
621 | "X.sum()"
622 | ]
623 | },
624 | {
625 | "cell_type": "markdown",
626 | "id": "63f0408f",
627 | "metadata": {
628 | "slideshow": {
629 | "slide_type": "slide"
630 | }
631 | },
632 | "source": [
633 | "即使形状不同,我们仍然可以通过调用\n",
634 | "*广播机制*(broadcasting mechanism)来执行按元素操作"
635 | ]
636 | },
637 | {
638 | "cell_type": "code",
639 | "execution_count": 15,
640 | "id": "a1de79a2",
641 | "metadata": {
642 | "execution": {
643 | "iopub.execute_input": "2023-08-18T07:05:03.165305Z",
644 | "iopub.status.busy": "2023-08-18T07:05:03.164274Z",
645 | "iopub.status.idle": "2023-08-18T07:05:03.172771Z",
646 | "shell.execute_reply": "2023-08-18T07:05:03.171692Z"
647 | },
648 | "origin_pos": 69,
649 | "tab": [
650 | "pytorch"
651 | ]
652 | },
653 | "outputs": [
654 | {
655 | "data": {
656 | "text/plain": [
657 | "(tensor([[0],\n",
658 | " [1],\n",
659 | " [2]]),\n",
660 | " tensor([[0, 1]]))"
661 | ]
662 | },
663 | "execution_count": 15,
664 | "metadata": {},
665 | "output_type": "execute_result"
666 | }
667 | ],
668 | "source": [
669 | "a = torch.arange(3).reshape((3, 1))\n",
670 | "b = torch.arange(2).reshape((1, 2))\n",
671 | "a, b"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 16,
677 | "id": "4d8904b1",
678 | "metadata": {
679 | "execution": {
680 | "iopub.execute_input": "2023-08-18T07:05:03.177900Z",
681 | "iopub.status.busy": "2023-08-18T07:05:03.176935Z",
682 | "iopub.status.idle": "2023-08-18T07:05:03.184212Z",
683 | "shell.execute_reply": "2023-08-18T07:05:03.183156Z"
684 | },
685 | "origin_pos": 73,
686 | "tab": [
687 | "pytorch"
688 | ]
689 | },
690 | "outputs": [
691 | {
692 | "data": {
693 | "text/plain": [
694 | "tensor([[0, 1],\n",
695 | " [1, 2],\n",
696 | " [2, 3]])"
697 | ]
698 | },
699 | "execution_count": 16,
700 | "metadata": {},
701 | "output_type": "execute_result"
702 | }
703 | ],
704 | "source": [
705 | "a + b"
706 | ]
707 | },
708 | {
709 | "cell_type": "markdown",
710 | "id": "16a2cb7b",
711 | "metadata": {
712 | "slideshow": {
713 | "slide_type": "slide"
714 | }
715 | },
716 | "source": [
717 | "可以用`[-1]`选择最后一个元素,可以用`[1:3]`选择第二个和第三个元素"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": 17,
723 | "id": "b62b00c7",
724 | "metadata": {
725 | "execution": {
726 | "iopub.execute_input": "2023-08-18T07:05:03.189786Z",
727 | "iopub.status.busy": "2023-08-18T07:05:03.188961Z",
728 | "iopub.status.idle": "2023-08-18T07:05:03.197712Z",
729 | "shell.execute_reply": "2023-08-18T07:05:03.196559Z"
730 | },
731 | "origin_pos": 75,
732 | "tab": [
733 | "pytorch"
734 | ]
735 | },
736 | "outputs": [
737 | {
738 | "data": {
739 | "text/plain": [
740 | "(tensor([ 8., 9., 10., 11.]),\n",
741 | " tensor([[ 4., 5., 6., 7.],\n",
742 | " [ 8., 9., 10., 11.]]))"
743 | ]
744 | },
745 | "execution_count": 17,
746 | "metadata": {},
747 | "output_type": "execute_result"
748 | }
749 | ],
750 | "source": [
751 | "X[-1], X[1:3]"
752 | ]
753 | },
754 | {
755 | "cell_type": "markdown",
756 | "id": "9d9f38fe",
757 | "metadata": {
758 | "slideshow": {
759 | "slide_type": "slide"
760 | }
761 | },
762 | "source": [
763 | "除读取外,我们还可以通过指定索引来将元素写入矩阵"
764 | ]
765 | },
766 | {
767 | "cell_type": "code",
768 | "execution_count": 18,
769 | "id": "56a8261a",
770 | "metadata": {
771 | "execution": {
772 | "iopub.execute_input": "2023-08-18T07:05:03.203157Z",
773 | "iopub.status.busy": "2023-08-18T07:05:03.202390Z",
774 | "iopub.status.idle": "2023-08-18T07:05:03.210176Z",
775 | "shell.execute_reply": "2023-08-18T07:05:03.209097Z"
776 | },
777 | "origin_pos": 78,
778 | "tab": [
779 | "pytorch"
780 | ]
781 | },
782 | "outputs": [
783 | {
784 | "data": {
785 | "text/plain": [
786 | "tensor([[ 0., 1., 2., 3.],\n",
787 | " [ 4., 5., 9., 7.],\n",
788 | " [ 8., 9., 10., 11.]])"
789 | ]
790 | },
791 | "execution_count": 18,
792 | "metadata": {},
793 | "output_type": "execute_result"
794 | }
795 | ],
796 | "source": [
797 | "X[1, 2] = 9\n",
798 | "X"
799 | ]
800 | },
801 | {
802 | "cell_type": "markdown",
803 | "id": "70b1b082",
804 | "metadata": {
805 | "slideshow": {
806 | "slide_type": "slide"
807 | }
808 | },
809 | "source": [
810 | "为多个元素赋值相同的值,我们只需要索引所有元素,然后为它们赋值"
811 | ]
812 | },
813 | {
814 | "cell_type": "code",
815 | "execution_count": 19,
816 | "id": "bd48bae9",
817 | "metadata": {
818 | "execution": {
819 | "iopub.execute_input": "2023-08-18T07:05:03.214118Z",
820 | "iopub.status.busy": "2023-08-18T07:05:03.213430Z",
821 | "iopub.status.idle": "2023-08-18T07:05:03.221215Z",
822 | "shell.execute_reply": "2023-08-18T07:05:03.220084Z"
823 | },
824 | "origin_pos": 81,
825 | "tab": [
826 | "pytorch"
827 | ]
828 | },
829 | "outputs": [
830 | {
831 | "data": {
832 | "text/plain": [
833 | "tensor([[12., 12., 12., 12.],\n",
834 | " [12., 12., 12., 12.],\n",
835 | " [ 8., 9., 10., 11.]])"
836 | ]
837 | },
838 | "execution_count": 19,
839 | "metadata": {},
840 | "output_type": "execute_result"
841 | }
842 | ],
843 | "source": [
844 | "X[0:2, :] = 12\n",
845 | "X"
846 | ]
847 | },
848 | {
849 | "cell_type": "markdown",
850 | "id": "9360fd66",
851 | "metadata": {
852 | "slideshow": {
853 | "slide_type": "slide"
854 | }
855 | },
856 | "source": [
857 | "运行一些操作可能会导致为新结果分配内存"
858 | ]
859 | },
860 | {
861 | "cell_type": "code",
862 | "execution_count": 20,
863 | "id": "6bcd6d07",
864 | "metadata": {
865 | "execution": {
866 | "iopub.execute_input": "2023-08-18T07:05:03.225106Z",
867 | "iopub.status.busy": "2023-08-18T07:05:03.224353Z",
868 | "iopub.status.idle": "2023-08-18T07:05:03.231715Z",
869 | "shell.execute_reply": "2023-08-18T07:05:03.230626Z"
870 | },
871 | "origin_pos": 84,
872 | "tab": [
873 | "pytorch"
874 | ]
875 | },
876 | "outputs": [
877 | {
878 | "data": {
879 | "text/plain": [
880 | "False"
881 | ]
882 | },
883 | "execution_count": 20,
884 | "metadata": {},
885 | "output_type": "execute_result"
886 | }
887 | ],
888 | "source": [
889 | "before = id(Y)\n",
890 | "Y = Y + X\n",
891 | "id(Y) == before"
892 | ]
893 | },
894 | {
895 | "cell_type": "markdown",
896 | "id": "1764d724",
897 | "metadata": {
898 | "slideshow": {
899 | "slide_type": "-"
900 | }
901 | },
902 | "source": [
903 | "执行原地操作"
904 | ]
905 | },
906 | {
907 | "cell_type": "code",
908 | "execution_count": 21,
909 | "id": "13b7fdf6",
910 | "metadata": {
911 | "execution": {
912 | "iopub.execute_input": "2023-08-18T07:05:03.236933Z",
913 | "iopub.status.busy": "2023-08-18T07:05:03.236016Z",
914 | "iopub.status.idle": "2023-08-18T07:05:03.243252Z",
915 | "shell.execute_reply": "2023-08-18T07:05:03.242153Z"
916 | },
917 | "origin_pos": 89,
918 | "tab": [
919 | "pytorch"
920 | ]
921 | },
922 | "outputs": [
923 | {
924 | "name": "stdout",
925 | "output_type": "stream",
926 | "text": [
927 | "id(Z): 140327634811696\n",
928 | "id(Z): 140327634811696\n"
929 | ]
930 | }
931 | ],
932 | "source": [
933 | "Z = torch.zeros_like(Y)\n",
934 | "print('id(Z):', id(Z))\n",
935 | "Z[:] = X + Y\n",
936 | "print('id(Z):', id(Z))"
937 | ]
938 | },
939 | {
940 | "cell_type": "markdown",
941 | "id": "08b6232e",
942 | "metadata": {
943 | "slideshow": {
944 | "slide_type": "slide"
945 | }
946 | },
947 | "source": [
948 | "如果在后续计算中没有重复使用`X`,\n",
949 | "我们也可以使用`X[:] = X + Y`或`X += Y`来减少操作的内存开销"
950 | ]
951 | },
952 | {
953 | "cell_type": "code",
954 | "execution_count": 22,
955 | "id": "c8a97d75",
956 | "metadata": {
957 | "execution": {
958 | "iopub.execute_input": "2023-08-18T07:05:03.248290Z",
959 | "iopub.status.busy": "2023-08-18T07:05:03.247521Z",
960 | "iopub.status.idle": "2023-08-18T07:05:03.255046Z",
961 | "shell.execute_reply": "2023-08-18T07:05:03.253935Z"
962 | },
963 | "origin_pos": 94,
964 | "tab": [
965 | "pytorch"
966 | ]
967 | },
968 | "outputs": [
969 | {
970 | "data": {
971 | "text/plain": [
972 | "True"
973 | ]
974 | },
975 | "execution_count": 22,
976 | "metadata": {},
977 | "output_type": "execute_result"
978 | }
979 | ],
980 | "source": [
981 | "before = id(X)\n",
982 | "X += Y\n",
983 | "id(X) == before"
984 | ]
985 | },
986 | {
987 | "cell_type": "markdown",
988 | "id": "62d8bda0",
989 | "metadata": {
990 | "slideshow": {
991 | "slide_type": "slide"
992 | }
993 | },
994 | "source": [
995 | "转换为NumPy张量(`ndarray`)"
996 | ]
997 | },
998 | {
999 | "cell_type": "code",
1000 | "execution_count": 23,
1001 | "id": "7386f580",
1002 | "metadata": {
1003 | "execution": {
1004 | "iopub.execute_input": "2023-08-18T07:05:03.259655Z",
1005 | "iopub.status.busy": "2023-08-18T07:05:03.259273Z",
1006 | "iopub.status.idle": "2023-08-18T07:05:03.266501Z",
1007 | "shell.execute_reply": "2023-08-18T07:05:03.265738Z"
1008 | },
1009 | "origin_pos": 100,
1010 | "tab": [
1011 | "pytorch"
1012 | ]
1013 | },
1014 | "outputs": [
1015 | {
1016 | "data": {
1017 | "text/plain": [
1018 | "(numpy.ndarray, torch.Tensor)"
1019 | ]
1020 | },
1021 | "execution_count": 23,
1022 | "metadata": {},
1023 | "output_type": "execute_result"
1024 | }
1025 | ],
1026 | "source": [
1027 | "A = X.numpy()\n",
1028 | "B = torch.tensor(A)\n",
1029 | "type(A), type(B)"
1030 | ]
1031 | },
1032 | {
1033 | "cell_type": "markdown",
1034 | "id": "bd310838",
1035 | "metadata": {
1036 | "slideshow": {
1037 | "slide_type": "-"
1038 | }
1039 | },
1040 | "source": [
1041 | "将大小为1的张量转换为Python标量"
1042 | ]
1043 | },
1044 | {
1045 | "cell_type": "code",
1046 | "execution_count": 24,
1047 | "id": "10a429bd",
1048 | "metadata": {
1049 | "execution": {
1050 | "iopub.execute_input": "2023-08-18T07:05:03.270566Z",
1051 | "iopub.status.busy": "2023-08-18T07:05:03.270102Z",
1052 | "iopub.status.idle": "2023-08-18T07:05:03.276982Z",
1053 | "shell.execute_reply": "2023-08-18T07:05:03.276051Z"
1054 | },
1055 | "origin_pos": 105,
1056 | "tab": [
1057 | "pytorch"
1058 | ]
1059 | },
1060 | "outputs": [
1061 | {
1062 | "data": {
1063 | "text/plain": [
1064 | "(tensor([3.5000]), 3.5, 3.5, 3)"
1065 | ]
1066 | },
1067 | "execution_count": 24,
1068 | "metadata": {},
1069 | "output_type": "execute_result"
1070 | }
1071 | ],
1072 | "source": [
1073 | "a = torch.tensor([3.5])\n",
1074 | "a, a.item(), float(a), int(a)"
1075 | ]
1076 | }
1077 | ],
1078 | "metadata": {
1079 | "celltoolbar": "Slideshow",
1080 | "language_info": {
1081 | "name": "python"
1082 | },
1083 | "required_libs": [],
1084 | "rise": {
1085 | "autolaunch": true,
1086 | "enable_chalkboard": true,
1087 | "overlay": "
",
1088 | "scroll": true
1089 | }
1090 | },
1091 | "nbformat": 4,
1092 | "nbformat_minor": 5
1093 | }
--------------------------------------------------------------------------------
/chapter_preliminaries/pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "bbe8a716",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 数据预处理\n",
13 | "\n",
14 | "创建一个人工数据集,并存储在CSV(逗号分隔值)文件"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "ee72fd16",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:03:38.903209Z",
24 | "iopub.status.busy": "2023-08-18T07:03:38.902351Z",
25 | "iopub.status.idle": "2023-08-18T07:03:38.918117Z",
26 | "shell.execute_reply": "2023-08-18T07:03:38.916775Z"
27 | },
28 | "origin_pos": 1,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [],
34 | "source": [
35 | "import os\n",
36 | "\n",
37 | "os.makedirs(os.path.join('..', 'data'), exist_ok=True)\n",
38 | "data_file = os.path.join('..', 'data', 'house_tiny.csv')\n",
39 | "with open(data_file, 'w') as f:\n",
40 | " f.write('NumRooms,Alley,Price\\n')\n",
41 | " f.write('NA,Pave,127500\\n')\n",
42 | " f.write('2,NA,106000\\n')\n",
43 | " f.write('4,NA,178100\\n')\n",
44 | " f.write('NA,NA,140000\\n')"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "50063800",
50 | "metadata": {
51 | "slideshow": {
52 | "slide_type": "slide"
53 | }
54 | },
55 | "source": [
56 | "从创建的CSV文件中加载原始数据集"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 2,
62 | "id": "5fb16e52",
63 | "metadata": {
64 | "execution": {
65 | "iopub.execute_input": "2023-08-18T07:03:38.923957Z",
66 | "iopub.status.busy": "2023-08-18T07:03:38.923101Z",
67 | "iopub.status.idle": "2023-08-18T07:03:39.372116Z",
68 | "shell.execute_reply": "2023-08-18T07:03:39.371151Z"
69 | },
70 | "origin_pos": 3,
71 | "tab": [
72 | "pytorch"
73 | ]
74 | },
75 | "outputs": [
76 | {
77 | "name": "stdout",
78 | "output_type": "stream",
79 | "text": [
80 | " NumRooms Alley Price\n",
81 | "0 NaN Pave 127500\n",
82 | "1 2.0 NaN 106000\n",
83 | "2 4.0 NaN 178100\n",
84 | "3 NaN NaN 140000\n"
85 | ]
86 | }
87 | ],
88 | "source": [
89 | "import pandas as pd\n",
90 | "\n",
91 | "data = pd.read_csv(data_file)\n",
92 | "print(data)"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "id": "2a0d9ba1",
98 | "metadata": {
99 | "slideshow": {
100 | "slide_type": "slide"
101 | }
102 | },
103 | "source": [
104 | "为了处理缺失的数据,典型的方法包括*插值法*和*删除法*,\n",
105 | "这里,我们将考虑插值法"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": 3,
111 | "id": "d460a301",
112 | "metadata": {
113 | "execution": {
114 | "iopub.execute_input": "2023-08-18T07:03:39.375828Z",
115 | "iopub.status.busy": "2023-08-18T07:03:39.375535Z",
116 | "iopub.status.idle": "2023-08-18T07:03:39.389220Z",
117 | "shell.execute_reply": "2023-08-18T07:03:39.387998Z"
118 | },
119 | "origin_pos": 5,
120 | "tab": [
121 | "pytorch"
122 | ]
123 | },
124 | "outputs": [
125 | {
126 | "name": "stdout",
127 | "output_type": "stream",
128 | "text": [
129 | " NumRooms Alley\n",
130 | "0 3.0 Pave\n",
131 | "1 2.0 NaN\n",
132 | "2 4.0 NaN\n",
133 | "3 3.0 NaN\n"
134 | ]
135 | }
136 | ],
137 | "source": [
138 | "inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]\n",
139 | "inputs = inputs.fillna(inputs.mean())\n",
140 | "print(inputs)"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "id": "13fbca82",
146 | "metadata": {
147 | "slideshow": {
148 | "slide_type": "slide"
149 | }
150 | },
151 | "source": [
152 | "对于`inputs`中的类别值或离散值,我们将“NaN”视为一个类别"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 4,
158 | "id": "09ab8738",
159 | "metadata": {
160 | "execution": {
161 | "iopub.execute_input": "2023-08-18T07:03:39.394176Z",
162 | "iopub.status.busy": "2023-08-18T07:03:39.393444Z",
163 | "iopub.status.idle": "2023-08-18T07:03:39.409892Z",
164 | "shell.execute_reply": "2023-08-18T07:03:39.408559Z"
165 | },
166 | "origin_pos": 7,
167 | "tab": [
168 | "pytorch"
169 | ]
170 | },
171 | "outputs": [
172 | {
173 | "name": "stdout",
174 | "output_type": "stream",
175 | "text": [
176 | " NumRooms Alley_Pave Alley_nan\n",
177 | "0 3.0 1 0\n",
178 | "1 2.0 0 1\n",
179 | "2 4.0 0 1\n",
180 | "3 3.0 0 1\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "inputs = pd.get_dummies(inputs, dummy_na=True)\n",
186 | "print(inputs)"
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "id": "56c7800a",
192 | "metadata": {
193 | "slideshow": {
194 | "slide_type": "slide"
195 | }
196 | },
197 | "source": [
198 | "现在`inputs`和`outputs`中的所有条目都是数值类型,它们可以转换为张量格式"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 5,
204 | "id": "4f551c6d",
205 | "metadata": {
206 | "execution": {
207 | "iopub.execute_input": "2023-08-18T07:03:39.414531Z",
208 | "iopub.status.busy": "2023-08-18T07:03:39.413831Z",
209 | "iopub.status.idle": "2023-08-18T07:03:40.467689Z",
210 | "shell.execute_reply": "2023-08-18T07:03:40.466637Z"
211 | },
212 | "origin_pos": 10,
213 | "tab": [
214 | "pytorch"
215 | ]
216 | },
217 | "outputs": [
218 | {
219 | "data": {
220 | "text/plain": [
221 | "(tensor([[3., 1., 0.],\n",
222 | " [2., 0., 1.],\n",
223 | " [4., 0., 1.],\n",
224 | " [3., 0., 1.]], dtype=torch.float64),\n",
225 | " tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))"
226 | ]
227 | },
228 | "execution_count": 5,
229 | "metadata": {},
230 | "output_type": "execute_result"
231 | }
232 | ],
233 | "source": [
234 | "import torch\n",
235 | "\n",
236 | "X = torch.tensor(inputs.to_numpy(dtype=float))\n",
237 | "y = torch.tensor(outputs.to_numpy(dtype=float))\n",
238 | "X, y"
239 | ]
240 | }
241 | ],
242 | "metadata": {
243 | "celltoolbar": "Slideshow",
244 | "language_info": {
245 | "name": "python"
246 | },
247 | "required_libs": [],
248 | "rise": {
249 | "autolaunch": true,
250 | "enable_chalkboard": true,
251 | "overlay": "
",
252 | "scroll": true
253 | }
254 | },
255 | "nbformat": 4,
256 | "nbformat_minor": 5
257 | }
--------------------------------------------------------------------------------
/chapter_preliminaries/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_recurrent-modern/encoder-decoder.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "80a4901a",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 编码器-解码器架构\n",
13 | "\n",
14 | "编码器"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "id": "17f77c60",
21 | "metadata": {
22 | "execution": {
23 | "iopub.execute_input": "2023-08-18T07:05:48.406295Z",
24 | "iopub.status.busy": "2023-08-18T07:05:48.405469Z",
25 | "iopub.status.idle": "2023-08-18T07:05:49.653322Z",
26 | "shell.execute_reply": "2023-08-18T07:05:49.651979Z"
27 | },
28 | "origin_pos": 2,
29 | "tab": [
30 | "pytorch"
31 | ]
32 | },
33 | "outputs": [],
34 | "source": [
35 | "from torch import nn\n",
36 | "\n",
37 | "\n",
38 | "class Encoder(nn.Module):\n",
39 | " \"\"\"编码器-解码器架构的基本编码器接口\"\"\"\n",
40 | " def __init__(self, **kwargs):\n",
41 | " super(Encoder, self).__init__(**kwargs)\n",
42 | "\n",
43 | " def forward(self, X, *args):\n",
44 | " raise NotImplementedError"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "614d0a9c",
50 | "metadata": {
51 | "slideshow": {
52 | "slide_type": "slide"
53 | }
54 | },
55 | "source": [
56 | "解码器"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": 2,
62 | "id": "5c7a6471",
63 | "metadata": {
64 | "execution": {
65 | "iopub.execute_input": "2023-08-18T07:05:49.659889Z",
66 | "iopub.status.busy": "2023-08-18T07:05:49.659020Z",
67 | "iopub.status.idle": "2023-08-18T07:05:49.666360Z",
68 | "shell.execute_reply": "2023-08-18T07:05:49.665230Z"
69 | },
70 | "origin_pos": 7,
71 | "tab": [
72 | "pytorch"
73 | ]
74 | },
75 | "outputs": [],
76 | "source": [
77 | "class Decoder(nn.Module):\n",
78 | " \"\"\"编码器-解码器架构的基本解码器接口\"\"\"\n",
79 | " def __init__(self, **kwargs):\n",
80 | " super(Decoder, self).__init__(**kwargs)\n",
81 | "\n",
82 | " def init_state(self, enc_outputs, *args):\n",
83 | " raise NotImplementedError\n",
84 | "\n",
85 | " def forward(self, X, state):\n",
86 | " raise NotImplementedError"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "id": "eae87cdc",
92 | "metadata": {
93 | "slideshow": {
94 | "slide_type": "slide"
95 | }
96 | },
97 | "source": [
98 | "合并编码器和解码器"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 3,
104 | "id": "53fb0929",
105 | "metadata": {
106 | "execution": {
107 | "iopub.execute_input": "2023-08-18T07:05:49.671685Z",
108 | "iopub.status.busy": "2023-08-18T07:05:49.670944Z",
109 | "iopub.status.idle": "2023-08-18T07:05:49.678831Z",
110 | "shell.execute_reply": "2023-08-18T07:05:49.677718Z"
111 | },
112 | "origin_pos": 12,
113 | "tab": [
114 | "pytorch"
115 | ]
116 | },
117 | "outputs": [],
118 | "source": [
119 | "class EncoderDecoder(nn.Module):\n",
120 | " \"\"\"编码器-解码器架构的基类\"\"\"\n",
121 | " def __init__(self, encoder, decoder, **kwargs):\n",
122 | " super(EncoderDecoder, self).__init__(**kwargs)\n",
123 | " self.encoder = encoder\n",
124 | " self.decoder = decoder\n",
125 | "\n",
126 | " def forward(self, enc_X, dec_X, *args):\n",
127 | " enc_outputs = self.encoder(enc_X, *args)\n",
128 | " dec_state = self.decoder.init_state(enc_outputs, *args)\n",
129 | " return self.decoder(dec_X, dec_state)"
130 | ]
131 | }
132 | ],
133 | "metadata": {
134 | "celltoolbar": "Slideshow",
135 | "language_info": {
136 | "name": "python"
137 | },
138 | "required_libs": [],
139 | "rise": {
140 | "autolaunch": true,
141 | "enable_chalkboard": true,
142 | "overlay": "
",
143 | "scroll": true
144 | }
145 | },
146 | "nbformat": 4,
147 | "nbformat_minor": 5
148 | }
--------------------------------------------------------------------------------
/chapter_recurrent-modern/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_recurrent-neural-networks/rise.css:
--------------------------------------------------------------------------------
1 |
2 | div.text_cell_render.rendered_html {
3 | padding: 0.35em 0.1em;
4 | }
5 |
6 | div.code_cell {
7 | font-size: 120%;
8 | }
9 |
10 | div.my-top-right {
11 | position: absolute;
12 | right: 5%;
13 | top: 1em;
14 | font-size: 2em;
15 | }
16 |
17 | div.my-top-left {
18 | position: absolute;
19 | left: 5%;
20 | top: 1em;
21 | font-size: 2em;
22 | }
23 |
--------------------------------------------------------------------------------
/chapter_recurrent-neural-networks/text-preprocessing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "0afcc24a",
6 | "metadata": {
7 | "slideshow": {
8 | "slide_type": "-"
9 | }
10 | },
11 | "source": [
12 | "# 文本预处理\n",
13 | "\n"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 1,
19 | "id": "bb8907ca",
20 | "metadata": {
21 | "execution": {
22 | "iopub.execute_input": "2023-08-18T07:02:24.243885Z",
23 | "iopub.status.busy": "2023-08-18T07:02:24.243343Z",
24 | "iopub.status.idle": "2023-08-18T07:02:26.213654Z",
25 | "shell.execute_reply": "2023-08-18T07:02:26.212745Z"
26 | },
27 | "origin_pos": 2,
28 | "tab": [
29 | "pytorch"
30 | ]
31 | },
32 | "outputs": [],
33 | "source": [
34 | "import collections\n",
35 | "import re\n",
36 | "from d2l import torch as d2l"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "id": "80e119b6",
42 | "metadata": {
43 | "slideshow": {
44 | "slide_type": "-"
45 | }
46 | },
47 | "source": [
48 | "将数据集读取到由多条文本行组成的列表中"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 2,
54 | "id": "ac0f9f0d",
55 | "metadata": {
56 | "execution": {
57 | "iopub.execute_input": "2023-08-18T07:02:26.218338Z",
58 | "iopub.status.busy": "2023-08-18T07:02:26.217685Z",
59 | "iopub.status.idle": "2023-08-18T07:02:26.304928Z",
60 | "shell.execute_reply": "2023-08-18T07:02:26.304151Z"
61 | },
62 | "origin_pos": 6,
63 | "tab": [
64 | "pytorch"
65 | ]
66 | },
67 | "outputs": [
68 | {
69 | "name": "stdout",
70 | "output_type": "stream",
71 | "text": [
72 | "Downloading ../data/timemachine.txt from http://d2l-data.s3-accelerate.amazonaws.com/timemachine.txt...\n",
73 | "# 文本总行数: 3221\n",
74 | "the time machine by h g wells\n",
75 | "twinkled and his usually pale face was flushed and animated the\n"
76 | ]
77 | }
78 | ],
79 | "source": [
80 | "d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL + 'timemachine.txt',\n",
81 | " '090b5e7e70c295757f55df93cb0a180b9691891a')\n",
82 | "\n",
83 | "def read_time_machine(): \n",
84 | " \"\"\"将时间机器数据集加载到文本行的列表中\"\"\"\n",
85 | " with open(d2l.download('time_machine'), 'r') as f:\n",
86 | " lines = f.readlines()\n",
87 | " return [re.sub('[^A-Za-z]+', ' ', line).strip().lower() for line in lines]\n",
88 | "\n",
89 | "lines = read_time_machine()\n",
90 | "print(f'\n",
91 | "print(lines[0])\n",
92 | "print(lines[10])"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "id": "b899d3e4",
98 | "metadata": {
99 | "slideshow": {
100 | "slide_type": "slide"
101 | }
102 | },
103 | "source": [
104 | "每个文本序列又被拆分成一个词元列表"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 3,
110 | "id": "afd6a9df",
111 | "metadata": {
112 | "execution": {
113 | "iopub.execute_input": "2023-08-18T07:02:26.308604Z",
114 | "iopub.status.busy": "2023-08-18T07:02:26.308048Z",
115 | "iopub.status.idle": "2023-08-18T07:02:26.317083Z",
116 | "shell.execute_reply": "2023-08-18T07:02:26.316264Z"
117 | },
118 | "origin_pos": 8,
119 | "tab": [
120 | "pytorch"
121 | ]
122 | },
123 | "outputs": [
124 | {
125 | "name": "stdout",
126 | "output_type": "stream",
127 | "text": [
128 | "['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n",
129 | "[]\n",
130 | "[]\n",
131 | "[]\n",
132 | "[]\n",
133 | "['i']\n",
134 | "[]\n",
135 | "[]\n",
136 | "['the', 'time', 'traveller', 'for', 'so', 'it', 'will', 'be', 'convenient', 'to', 'speak', 'of', 'him']\n",
137 | "['was', 'expounding', 'a', 'recondite', 'matter', 'to', 'us', 'his', 'grey', 'eyes', 'shone', 'and']\n",
138 | "['twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the']\n"
139 | ]
140 | }
141 | ],
142 | "source": [
143 | "def tokenize(lines, token='word'): \n",
144 | " \"\"\"将文本行拆分为单词或字符词元\"\"\"\n",
145 | " if token == 'word':\n",
146 | " return [line.split() for line in lines]\n",
147 | " elif token == 'char':\n",
148 | " return [list(line) for line in lines]\n",
149 | " else:\n",
150 | " print('错误:未知词元类型:' + token)\n",
151 | "\n",
152 | "tokens = tokenize(lines)\n",
153 | "for i in range(11):\n",
154 | " print(tokens[i])"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "id": "4f4bc35b",
160 | "metadata": {
161 | "slideshow": {
162 | "slide_type": "slide"
163 | }
164 | },
165 | "source": [
166 | "构建一个字典,通常也叫做*词表*(vocabulary),\n",
167 | "用来将字符串类型的词元映射到从$0$开始的数字索引中"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 4,
173 | "id": "16db7dad",
174 | "metadata": {
175 | "execution": {
176 | "iopub.execute_input": "2023-08-18T07:02:26.320587Z",
177 | "iopub.status.busy": "2023-08-18T07:02:26.320050Z",
178 | "iopub.status.idle": "2023-08-18T07:02:26.330519Z",
179 | "shell.execute_reply": "2023-08-18T07:02:26.329736Z"
180 | },
181 | "origin_pos": 10,
182 | "tab": [
183 | "pytorch"
184 | ]
185 | },
186 | "outputs": [],
187 | "source": [
188 | "class Vocab: \n",
189 | " \"\"\"文本词表\"\"\"\n",
190 | " def __init__(self, tokens=None, min_freq=0, reserved_tokens=None):\n",
191 | " if tokens is None:\n",
192 | " tokens = []\n",
193 | " if reserved_tokens is None:\n",
194 | " reserved_tokens = []\n",
195 | " counter = count_corpus(tokens)\n",
196 | " self._token_freqs = sorted(counter.items(), key=lambda x: x[1],\n",
197 | " reverse=True)\n",
198 | " self.idx_to_token = [''] + reserved_tokens\n",
199 | " self.token_to_idx = {token: idx\n",
200 | " for idx, token in enumerate(self.idx_to_token)}\n",
201 | " for token, freq in self._token_freqs:\n",
202 | " if freq < min_freq:\n",
203 | " break\n",
204 | " if token not in self.token_to_idx:\n",
205 | " self.idx_to_token.append(token)\n",
206 | " self.token_to_idx[token] = len(self.idx_to_token) - 1\n",
207 | "\n",
208 | " def __len__(self):\n",
209 | " return len(self.idx_to_token)\n",
210 | "\n",
211 | " def __getitem__(self, tokens):\n",
212 | " if not isinstance(tokens, (list, tuple)):\n",
213 | " return self.token_to_idx.get(tokens, self.unk)\n",
214 | " return [self.__getitem__(token) for token in tokens]\n",
215 | "\n",
216 | " def to_tokens(self, indices):\n",
217 | " if not isinstance(indices, (list, tuple)):\n",
218 | " return self.idx_to_token[indices]\n",
219 | " return [self.idx_to_token[index] for index in indices]\n",
220 | "\n",
221 | " @property\n",
222 | " def unk(self):\n",
223 | " return 0\n",
224 | "\n",
225 | " @property\n",
226 | " def token_freqs(self):\n",
227 | " return self._token_freqs\n",
228 | "\n",
229 | "def count_corpus(tokens): \n",
230 | " \"\"\"统计词元的频率\"\"\"\n",
231 | " if len(tokens) == 0 or isinstance(tokens[0], list):\n",
232 | " tokens = [token for line in tokens for token in line]\n",
233 | " return collections.Counter(tokens)"
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "id": "8bea4a87",
239 | "metadata": {
240 | "slideshow": {
241 | "slide_type": "slide"
242 | }
243 | },
244 | "source": [
245 | "构建词表"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 5,
251 | "id": "1501d478",
252 | "metadata": {
253 | "execution": {
254 | "iopub.execute_input": "2023-08-18T07:02:26.333942Z",
255 | "iopub.status.busy": "2023-08-18T07:02:26.333382Z",
256 | "iopub.status.idle": "2023-08-18T07:02:26.346927Z",
257 | "shell.execute_reply": "2023-08-18T07:02:26.346182Z"
258 | },
259 | "origin_pos": 12,
260 | "tab": [
261 | "pytorch"
262 | ]
263 | },
264 | "outputs": [
265 | {
266 | "name": "stdout",
267 | "output_type": "stream",
268 | "text": [
269 | "[('', 0), ('the', 1), ('i', 2), ('and', 3), ('of', 4), ('a', 5), ('to', 6), ('was', 7), ('in', 8), ('that', 9)]\n"
270 | ]
271 | }
272 | ],
273 | "source": [
274 | "vocab = Vocab(tokens)\n",
275 | "print(list(vocab.token_to_idx.items())[:10])"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "id": "f1cfdd0c",
281 | "metadata": {
282 | "slideshow": {
283 | "slide_type": "-"
284 | }
285 | },
286 | "source": [
287 | "将每一条文本行转换成一个数字索引列表"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 6,
293 | "id": "f0244f09",
294 | "metadata": {
295 | "execution": {
296 | "iopub.execute_input": "2023-08-18T07:02:26.350343Z",
297 | "iopub.status.busy": "2023-08-18T07:02:26.349779Z",
298 | "iopub.status.idle": "2023-08-18T07:02:26.354215Z",
299 | "shell.execute_reply": "2023-08-18T07:02:26.353468Z"
300 | },
301 | "origin_pos": 14,
302 | "tab": [
303 | "pytorch"
304 | ]
305 | },
306 | "outputs": [
307 | {
308 | "name": "stdout",
309 | "output_type": "stream",
310 | "text": [
311 | "文本: ['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n",
312 | "索引: [1, 19, 50, 40, 2183, 2184, 400]\n",
313 | "文本: ['twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the']\n",
314 | "索引: [2186, 3, 25, 1044, 362, 113, 7, 1421, 3, 1045, 1]\n"
315 | ]
316 | }
317 | ],
318 | "source": [
319 | "for i in [0, 10]:\n",
320 | " print('文本:', tokens[i])\n",
321 | " print('索引:', vocab[tokens[i]])"
322 | ]
323 | },
324 | {
325 | "cell_type": "markdown",
326 | "id": "b400e092",
327 | "metadata": {
328 | "slideshow": {
329 | "slide_type": "slide"
330 | }
331 | },
332 | "source": [
333 | "将所有功能打包到`load_corpus_time_machine`函数中"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 7,
339 | "id": "578ed76f",
340 | "metadata": {
341 | "execution": {
342 | "iopub.execute_input": "2023-08-18T07:02:26.357414Z",
343 | "iopub.status.busy": "2023-08-18T07:02:26.357141Z",
344 | "iopub.status.idle": "2023-08-18T07:02:26.470812Z",
345 | "shell.execute_reply": "2023-08-18T07:02:26.470008Z"
346 | },
347 | "origin_pos": 16,
348 | "tab": [
349 | "pytorch"
350 | ]
351 | },
352 | "outputs": [
353 | {
354 | "data": {
355 | "text/plain": [
356 | "(170580, 28)"
357 | ]
358 | },
359 | "execution_count": 7,
360 | "metadata": {},
361 | "output_type": "execute_result"
362 | }
363 | ],
364 | "source": [
365 | "def load_corpus_time_machine(max_tokens=-1): \n",
366 | " \"\"\"返回时光机器数据集的词元索引列表和词表\"\"\"\n",
367 | " lines = read_time_machine()\n",
368 | " tokens = tokenize(lines, 'char')\n",
369 | " vocab = Vocab(tokens)\n",
370 | " corpus = [vocab[token] for line in tokens for token in line]\n",
371 | " if max_tokens > 0:\n",
372 | " corpus = corpus[:max_tokens]\n",
373 | " return corpus, vocab\n",
374 | "\n",
375 | "corpus, vocab = load_corpus_time_machine()\n",
376 | "len(corpus), len(vocab)"
377 | ]
378 | }
379 | ],
380 | "metadata": {
381 | "celltoolbar": "Slideshow",
382 | "language_info": {
383 | "name": "python"
384 | },
385 | "required_libs": [],
386 | "rise": {
387 | "autolaunch": true,
388 | "enable_chalkboard": true,
389 | "overlay": "",
390 | "scroll": true
391 | }
392 | },
393 | "nbformat": 4,
394 | "nbformat_minor": 5
395 | }
--------------------------------------------------------------------------------