├── README.md ├── chapter_attention-mechanisms ├── attention-scoring-functions.ipynb ├── bahdanau-attention.ipynb ├── multihead-attention.ipynb ├── nadaraya-waston.ipynb ├── rise.css ├── self-attention-and-positional-encoding.ipynb └── transformer.ipynb ├── chapter_computational-performance ├── multiple-gpus-concise.ipynb ├── multiple-gpus.ipynb └── rise.css ├── chapter_computer-vision ├── anchor.ipynb ├── bounding-box.ipynb ├── fcn.ipynb ├── fine-tuning.ipynb ├── image-augmentation.ipynb ├── kaggle-cifar10.ipynb ├── kaggle-dog.ipynb ├── multiscale-object-detection.ipynb ├── neural-style.ipynb ├── object-detection-dataset.ipynb ├── rise.css ├── semantic-segmentation-and-dataset.ipynb ├── ssd.ipynb └── transposed-conv.ipynb ├── chapter_convolutional-modern ├── alexnet.ipynb ├── batch-norm.ipynb ├── densenet.ipynb ├── googlenet.ipynb ├── nin.ipynb ├── resnet.ipynb ├── rise.css └── vgg.ipynb ├── chapter_convolutional-neural-networks ├── channels.ipynb ├── conv-layer.ipynb ├── lenet.ipynb ├── padding-and-strides.ipynb ├── pooling.ipynb └── rise.css ├── chapter_deep-learning-computation ├── custom-layer.ipynb ├── model-construction.ipynb ├── parameters.ipynb ├── read-write.ipynb ├── rise.css └── use-gpu.ipynb ├── chapter_linear-networks ├── image-classification-dataset.ipynb ├── linear-regression-concise.ipynb ├── linear-regression-scratch.ipynb ├── linear-regression.ipynb ├── rise.css ├── softmax-regression-concise.ipynb └── softmax-regression-scratch.ipynb ├── chapter_multilayer-perceptrons ├── dropout.ipynb ├── kaggle-house-price.ipynb ├── mlp-concise.ipynb ├── mlp-scratch.ipynb ├── mlp.ipynb ├── numerical-stability-and-init.ipynb ├── rise.css ├── underfit-overfit.ipynb └── weight-decay.ipynb ├── chapter_natural-language-processing-applications ├── natural-language-inference-and-dataset.ipynb ├── natural-language-inference-bert.ipynb └── rise.css ├── chapter_preliminaries ├── autograd.ipynb ├── calculus.ipynb ├── linear-algebra.ipynb ├── lookup-api.ipynb ├── ndarray.ipynb ├── pandas.ipynb └── rise.css ├── chapter_recurrent-modern ├── bi-rnn.ipynb ├── deep-rnn.ipynb ├── encoder-decoder.ipynb ├── gru.ipynb ├── lstm.ipynb ├── machine-translation-and-dataset.ipynb ├── rise.css └── seq2seq.ipynb └── chapter_recurrent-neural-networks ├── language-models-and-dataset.ipynb ├── rise.css ├── rnn-concise.ipynb ├── rnn-scratch.ipynb ├── sequence.ipynb └── text-preprocessing.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # d2l-ai/d2l-zh-pytorch-slides 2 | 3 | This repo contains generated notebook slides. To open it locally, we suggest you to install the [rise](https://rise.readthedocs.io/en/stable/) extension. 4 | 5 | You can also preview them in nbviwer: 6 | - [chapter_preliminaries/ndarray.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/ndarray.ipynb) 7 | - [chapter_preliminaries/pandas.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/pandas.ipynb) 8 | - [chapter_preliminaries/linear-algebra.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/linear-algebra.ipynb) 9 | - [chapter_preliminaries/calculus.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/calculus.ipynb) 10 | - [chapter_preliminaries/autograd.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/autograd.ipynb) 11 | - [chapter_preliminaries/lookup-api.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_preliminaries/lookup-api.ipynb) 12 | - [chapter_linear-networks/linear-regression.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression.ipynb) 13 | - [chapter_linear-networks/linear-regression-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression-scratch.ipynb) 14 | - [chapter_linear-networks/linear-regression-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/linear-regression-concise.ipynb) 15 | - [chapter_linear-networks/image-classification-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/image-classification-dataset.ipynb) 16 | - [chapter_linear-networks/softmax-regression-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/softmax-regression-scratch.ipynb) 17 | - [chapter_linear-networks/softmax-regression-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_linear-networks/softmax-regression-concise.ipynb) 18 | - [chapter_multilayer-perceptrons/mlp.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp.ipynb) 19 | - [chapter_multilayer-perceptrons/mlp-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp-scratch.ipynb) 20 | - [chapter_multilayer-perceptrons/mlp-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/mlp-concise.ipynb) 21 | - [chapter_multilayer-perceptrons/underfit-overfit.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/underfit-overfit.ipynb) 22 | - [chapter_multilayer-perceptrons/weight-decay.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/weight-decay.ipynb) 23 | - [chapter_multilayer-perceptrons/dropout.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/dropout.ipynb) 24 | - [chapter_multilayer-perceptrons/numerical-stability-and-init.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/numerical-stability-and-init.ipynb) 25 | - [chapter_multilayer-perceptrons/kaggle-house-price.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_multilayer-perceptrons/kaggle-house-price.ipynb) 26 | - [chapter_deep-learning-computation/model-construction.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/model-construction.ipynb) 27 | - [chapter_deep-learning-computation/parameters.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/parameters.ipynb) 28 | - [chapter_deep-learning-computation/custom-layer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/custom-layer.ipynb) 29 | - [chapter_deep-learning-computation/read-write.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/read-write.ipynb) 30 | - [chapter_deep-learning-computation/use-gpu.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_deep-learning-computation/use-gpu.ipynb) 31 | - [chapter_convolutional-neural-networks/conv-layer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/conv-layer.ipynb) 32 | - [chapter_convolutional-neural-networks/padding-and-strides.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/padding-and-strides.ipynb) 33 | - [chapter_convolutional-neural-networks/channels.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/channels.ipynb) 34 | - [chapter_convolutional-neural-networks/pooling.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/pooling.ipynb) 35 | - [chapter_convolutional-neural-networks/lenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-neural-networks/lenet.ipynb) 36 | - [chapter_convolutional-modern/alexnet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/alexnet.ipynb) 37 | - [chapter_convolutional-modern/vgg.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/vgg.ipynb) 38 | - [chapter_convolutional-modern/nin.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/nin.ipynb) 39 | - [chapter_convolutional-modern/googlenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/googlenet.ipynb) 40 | - [chapter_convolutional-modern/batch-norm.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/batch-norm.ipynb) 41 | - [chapter_convolutional-modern/resnet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/resnet.ipynb) 42 | - [chapter_convolutional-modern/densenet.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_convolutional-modern/densenet.ipynb) 43 | - [chapter_recurrent-neural-networks/sequence.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/sequence.ipynb) 44 | - [chapter_recurrent-neural-networks/text-preprocessing.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/text-preprocessing.ipynb) 45 | - [chapter_recurrent-neural-networks/language-models-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/language-models-and-dataset.ipynb) 46 | - [chapter_recurrent-neural-networks/rnn-scratch.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/rnn-scratch.ipynb) 47 | - [chapter_recurrent-neural-networks/rnn-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-neural-networks/rnn-concise.ipynb) 48 | - [chapter_recurrent-modern/gru.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/gru.ipynb) 49 | - [chapter_recurrent-modern/lstm.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/lstm.ipynb) 50 | - [chapter_recurrent-modern/deep-rnn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/deep-rnn.ipynb) 51 | - [chapter_recurrent-modern/bi-rnn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/bi-rnn.ipynb) 52 | - [chapter_recurrent-modern/machine-translation-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/machine-translation-and-dataset.ipynb) 53 | - [chapter_recurrent-modern/encoder-decoder.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/encoder-decoder.ipynb) 54 | - [chapter_recurrent-modern/seq2seq.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_recurrent-modern/seq2seq.ipynb) 55 | - [chapter_attention-mechanisms/nadaraya-waston.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/nadaraya-waston.ipynb) 56 | - [chapter_attention-mechanisms/attention-scoring-functions.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/attention-scoring-functions.ipynb) 57 | - [chapter_attention-mechanisms/bahdanau-attention.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/bahdanau-attention.ipynb) 58 | - [chapter_attention-mechanisms/multihead-attention.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/multihead-attention.ipynb) 59 | - [chapter_attention-mechanisms/self-attention-and-positional-encoding.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/self-attention-and-positional-encoding.ipynb) 60 | - [chapter_attention-mechanisms/transformer.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_attention-mechanisms/transformer.ipynb) 61 | - [chapter_computational-performance/multiple-gpus.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computational-performance/multiple-gpus.ipynb) 62 | - [chapter_computational-performance/multiple-gpus-concise.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computational-performance/multiple-gpus-concise.ipynb) 63 | - [chapter_computer-vision/image-augmentation.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/image-augmentation.ipynb) 64 | - [chapter_computer-vision/fine-tuning.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/fine-tuning.ipynb) 65 | - [chapter_computer-vision/bounding-box.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/bounding-box.ipynb) 66 | - [chapter_computer-vision/anchor.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/anchor.ipynb) 67 | - [chapter_computer-vision/multiscale-object-detection.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/multiscale-object-detection.ipynb) 68 | - [chapter_computer-vision/object-detection-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/object-detection-dataset.ipynb) 69 | - [chapter_computer-vision/ssd.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/ssd.ipynb) 70 | - [chapter_computer-vision/semantic-segmentation-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/semantic-segmentation-and-dataset.ipynb) 71 | - [chapter_computer-vision/transposed-conv.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/transposed-conv.ipynb) 72 | - [chapter_computer-vision/fcn.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/fcn.ipynb) 73 | - [chapter_computer-vision/neural-style.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/neural-style.ipynb) 74 | - [chapter_computer-vision/kaggle-cifar10.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/kaggle-cifar10.ipynb) 75 | - [chapter_computer-vision/kaggle-dog.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_computer-vision/kaggle-dog.ipynb) 76 | - [chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb) 77 | - [chapter_natural-language-processing-applications/natural-language-inference-bert.ipynb](https://nbviewer.jupyter.org/format/slides/github/d2l-ai/d2l-zh-pytorch-slides/blob/main/chapter_natural-language-processing-applications/natural-language-inference-bert.ipynb) -------------------------------------------------------------------------------- /chapter_attention-mechanisms/multihead-attention.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "cb4d82f7", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 多头注意力\n", 13 | "\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "id": "dc55ba33", 20 | "metadata": { 21 | "execution": { 22 | "iopub.execute_input": "2023-08-18T07:01:32.189972Z", 23 | "iopub.status.busy": "2023-08-18T07:01:32.189240Z", 24 | "iopub.status.idle": "2023-08-18T07:01:34.516491Z", 25 | "shell.execute_reply": "2023-08-18T07:01:34.515475Z" 26 | }, 27 | "origin_pos": 2, 28 | "tab": [ 29 | "pytorch" 30 | ] 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import math\n", 35 | "import torch\n", 36 | "from torch import nn\n", 37 | "from d2l import torch as d2l" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "id": "22964f2f", 43 | "metadata": { 44 | "slideshow": { 45 | "slide_type": "slide" 46 | } 47 | }, 48 | "source": [ 49 | "选择缩放点积注意力作为每一个注意力头" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "id": "1bb10990", 56 | "metadata": { 57 | "execution": { 58 | "iopub.execute_input": "2023-08-18T07:01:34.521491Z", 59 | "iopub.status.busy": "2023-08-18T07:01:34.521131Z", 60 | "iopub.status.idle": "2023-08-18T07:01:34.530492Z", 61 | "shell.execute_reply": "2023-08-18T07:01:34.529556Z" 62 | }, 63 | "origin_pos": 7, 64 | "tab": [ 65 | "pytorch" 66 | ] 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "class MultiHeadAttention(nn.Module):\n", 71 | " \"\"\"多头注意力\"\"\"\n", 72 | " def __init__(self, key_size, query_size, value_size, num_hiddens,\n", 73 | " num_heads, dropout, bias=False, **kwargs):\n", 74 | " super(MultiHeadAttention, self).__init__(**kwargs)\n", 75 | " self.num_heads = num_heads\n", 76 | " self.attention = d2l.DotProductAttention(dropout)\n", 77 | " self.W_q = nn.Linear(query_size, num_hiddens, bias=bias)\n", 78 | " self.W_k = nn.Linear(key_size, num_hiddens, bias=bias)\n", 79 | " self.W_v = nn.Linear(value_size, num_hiddens, bias=bias)\n", 80 | " self.W_o = nn.Linear(num_hiddens, num_hiddens, bias=bias)\n", 81 | "\n", 82 | " def forward(self, queries, keys, values, valid_lens):\n", 83 | " queries = transpose_qkv(self.W_q(queries), self.num_heads)\n", 84 | " keys = transpose_qkv(self.W_k(keys), self.num_heads)\n", 85 | " values = transpose_qkv(self.W_v(values), self.num_heads)\n", 86 | "\n", 87 | " if valid_lens is not None:\n", 88 | " valid_lens = torch.repeat_interleave(\n", 89 | " valid_lens, repeats=self.num_heads, dim=0)\n", 90 | "\n", 91 | " output = self.attention(queries, keys, values, valid_lens)\n", 92 | "\n", 93 | " output_concat = transpose_output(output, self.num_heads)\n", 94 | " return self.W_o(output_concat)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "d376aca2", 100 | "metadata": { 101 | "slideshow": { 102 | "slide_type": "slide" 103 | } 104 | }, 105 | "source": [ 106 | "使多个头并行计算" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "id": "b2af5ed8", 113 | "metadata": { 114 | "execution": { 115 | "iopub.execute_input": "2023-08-18T07:01:34.534820Z", 116 | "iopub.status.busy": "2023-08-18T07:01:34.534308Z", 117 | "iopub.status.idle": "2023-08-18T07:01:34.540852Z", 118 | "shell.execute_reply": "2023-08-18T07:01:34.539927Z" 119 | }, 120 | "origin_pos": 12, 121 | "tab": [ 122 | "pytorch" 123 | ] 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "def transpose_qkv(X, num_heads):\n", 128 | " \"\"\"为了多注意力头的并行计算而变换形状\"\"\"\n", 129 | " X = X.reshape(X.shape[0], X.shape[1], num_heads, -1)\n", 130 | "\n", 131 | " X = X.permute(0, 2, 1, 3)\n", 132 | "\n", 133 | " return X.reshape(-1, X.shape[2], X.shape[3])\n", 134 | "\n", 135 | "\n", 136 | "def transpose_output(X, num_heads):\n", 137 | " \"\"\"逆转transpose_qkv函数的操作\"\"\"\n", 138 | " X = X.reshape(-1, num_heads, X.shape[1], X.shape[2])\n", 139 | " X = X.permute(0, 2, 1, 3)\n", 140 | " return X.reshape(X.shape[0], X.shape[1], -1)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "id": "015e3e67", 146 | "metadata": { 147 | "slideshow": { 148 | "slide_type": "slide" 149 | } 150 | }, 151 | "source": [ 152 | "测试" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 4, 158 | "id": "d06baadf", 159 | "metadata": { 160 | "execution": { 161 | "iopub.execute_input": "2023-08-18T07:01:34.545405Z", 162 | "iopub.status.busy": "2023-08-18T07:01:34.544605Z", 163 | "iopub.status.idle": "2023-08-18T07:01:34.571251Z", 164 | "shell.execute_reply": "2023-08-18T07:01:34.570476Z" 165 | }, 166 | "origin_pos": 17, 167 | "tab": [ 168 | "pytorch" 169 | ] 170 | }, 171 | "outputs": [ 172 | { 173 | "data": { 174 | "text/plain": [ 175 | "MultiHeadAttention(\n", 176 | " (attention): DotProductAttention(\n", 177 | " (dropout): Dropout(p=0.5, inplace=False)\n", 178 | " )\n", 179 | " (W_q): Linear(in_features=100, out_features=100, bias=False)\n", 180 | " (W_k): Linear(in_features=100, out_features=100, bias=False)\n", 181 | " (W_v): Linear(in_features=100, out_features=100, bias=False)\n", 182 | " (W_o): Linear(in_features=100, out_features=100, bias=False)\n", 183 | ")" 184 | ] 185 | }, 186 | "execution_count": 4, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "num_hiddens, num_heads = 100, 5\n", 193 | "attention = MultiHeadAttention(num_hiddens, num_hiddens, num_hiddens,\n", 194 | " num_hiddens, num_heads, 0.5)\n", 195 | "attention.eval()" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 5, 201 | "id": "8da65afc", 202 | "metadata": { 203 | "execution": { 204 | "iopub.execute_input": "2023-08-18T07:01:34.574642Z", 205 | "iopub.status.busy": "2023-08-18T07:01:34.574021Z", 206 | "iopub.status.idle": "2023-08-18T07:01:34.588848Z", 207 | "shell.execute_reply": "2023-08-18T07:01:34.587945Z" 208 | }, 209 | "origin_pos": 20, 210 | "tab": [ 211 | "pytorch" 212 | ] 213 | }, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "torch.Size([2, 4, 100])" 219 | ] 220 | }, 221 | "execution_count": 5, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "batch_size, num_queries = 2, 4\n", 228 | "num_kvpairs, valid_lens = 6, torch.tensor([3, 2])\n", 229 | "X = torch.ones((batch_size, num_queries, num_hiddens))\n", 230 | "Y = torch.ones((batch_size, num_kvpairs, num_hiddens))\n", 231 | "attention(X, Y, Y, valid_lens).shape" 232 | ] 233 | } 234 | ], 235 | "metadata": { 236 | "celltoolbar": "Slideshow", 237 | "language_info": { 238 | "name": "python" 239 | }, 240 | "required_libs": [], 241 | "rise": { 242 | "autolaunch": true, 243 | "enable_chalkboard": true, 244 | "overlay": "
", 245 | "scroll": true 246 | } 247 | }, 248 | "nbformat": 4, 249 | "nbformat_minor": 5 250 | } -------------------------------------------------------------------------------- /chapter_attention-mechanisms/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_computational-performance/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_computer-vision/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_computer-vision/transposed-conv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "db0f208a", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 转置卷积\n", 13 | "\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "id": "1f39b5ef", 20 | "metadata": { 21 | "execution": { 22 | "iopub.execute_input": "2023-08-18T07:05:22.451701Z", 23 | "iopub.status.busy": "2023-08-18T07:05:22.451411Z", 24 | "iopub.status.idle": "2023-08-18T07:05:24.490785Z", 25 | "shell.execute_reply": "2023-08-18T07:05:24.489970Z" 26 | }, 27 | "origin_pos": 2, 28 | "tab": [ 29 | "pytorch" 30 | ] 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import torch\n", 35 | "from torch import nn\n", 36 | "from d2l import torch as d2l" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "1f3a83f3", 42 | "metadata": { 43 | "slideshow": { 44 | "slide_type": "-" 45 | } 46 | }, 47 | "source": [ 48 | "实现基本的转置卷积运算" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "id": "e6931d90", 55 | "metadata": { 56 | "execution": { 57 | "iopub.execute_input": "2023-08-18T07:05:24.494981Z", 58 | "iopub.status.busy": "2023-08-18T07:05:24.494307Z", 59 | "iopub.status.idle": "2023-08-18T07:05:24.499745Z", 60 | "shell.execute_reply": "2023-08-18T07:05:24.498885Z" 61 | }, 62 | "origin_pos": 5, 63 | "tab": [ 64 | "pytorch" 65 | ] 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "def trans_conv(X, K):\n", 70 | " h, w = K.shape\n", 71 | " Y = torch.zeros((X.shape[0] + h - 1, X.shape[1] + w - 1))\n", 72 | " for i in range(X.shape[0]):\n", 73 | " for j in range(X.shape[1]):\n", 74 | " Y[i: i + h, j: j + w] += X[i, j] * K\n", 75 | " return Y" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "f3baa22e", 81 | "metadata": { 82 | "slideshow": { 83 | "slide_type": "slide" 84 | } 85 | }, 86 | "source": [ 87 | "验证上述实现输出" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 3, 93 | "id": "a7c6e2fd", 94 | "metadata": { 95 | "execution": { 96 | "iopub.execute_input": "2023-08-18T07:05:24.503202Z", 97 | "iopub.status.busy": "2023-08-18T07:05:24.502646Z", 98 | "iopub.status.idle": "2023-08-18T07:05:24.531448Z", 99 | "shell.execute_reply": "2023-08-18T07:05:24.530730Z" 100 | }, 101 | "origin_pos": 7, 102 | "tab": [ 103 | "pytorch" 104 | ] 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "tensor([[ 0., 0., 1.],\n", 111 | " [ 0., 4., 6.],\n", 112 | " [ 4., 12., 9.]])" 113 | ] 114 | }, 115 | "execution_count": 3, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "X = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n", 122 | "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n", 123 | "trans_conv(X, K)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "id": "9f9dd301", 129 | "metadata": { 130 | "slideshow": { 131 | "slide_type": "slide" 132 | } 133 | }, 134 | "source": [ 135 | "使用高级API获得相同的结果" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 4, 141 | "id": "b9de6d80", 142 | "metadata": { 143 | "execution": { 144 | "iopub.execute_input": "2023-08-18T07:05:24.535386Z", 145 | "iopub.status.busy": "2023-08-18T07:05:24.534826Z", 146 | "iopub.status.idle": "2023-08-18T07:05:24.544484Z", 147 | "shell.execute_reply": "2023-08-18T07:05:24.543747Z" 148 | }, 149 | "origin_pos": 10, 150 | "tab": [ 151 | "pytorch" 152 | ] 153 | }, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "tensor([[[[ 0., 0., 1.],\n", 159 | " [ 0., 4., 6.],\n", 160 | " [ 4., 12., 9.]]]], grad_fn=)" 161 | ] 162 | }, 163 | "execution_count": 4, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "X, K = X.reshape(1, 1, 2, 2), K.reshape(1, 1, 2, 2)\n", 170 | "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, bias=False)\n", 171 | "tconv.weight.data = K\n", 172 | "tconv(X)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "id": "f8811b58", 178 | "metadata": { 179 | "slideshow": { 180 | "slide_type": "slide" 181 | } 182 | }, 183 | "source": [ 184 | "填充、步幅和多通道" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 5, 190 | "id": "cd114de1", 191 | "metadata": { 192 | "execution": { 193 | "iopub.execute_input": "2023-08-18T07:05:24.548040Z", 194 | "iopub.status.busy": "2023-08-18T07:05:24.547398Z", 195 | "iopub.status.idle": "2023-08-18T07:05:24.553659Z", 196 | "shell.execute_reply": "2023-08-18T07:05:24.552864Z" 197 | }, 198 | "origin_pos": 14, 199 | "tab": [ 200 | "pytorch" 201 | ] 202 | }, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "tensor([[[[4.]]]], grad_fn=)" 208 | ] 209 | }, 210 | "execution_count": 5, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, padding=1, bias=False)\n", 217 | "tconv.weight.data = K\n", 218 | "tconv(X)" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 6, 224 | "id": "48064406", 225 | "metadata": { 226 | "execution": { 227 | "iopub.execute_input": "2023-08-18T07:05:24.557362Z", 228 | "iopub.status.busy": "2023-08-18T07:05:24.556727Z", 229 | "iopub.status.idle": "2023-08-18T07:05:24.563081Z", 230 | "shell.execute_reply": "2023-08-18T07:05:24.562365Z" 231 | }, 232 | "origin_pos": 18, 233 | "tab": [ 234 | "pytorch" 235 | ] 236 | }, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "tensor([[[[0., 0., 0., 1.],\n", 242 | " [0., 0., 2., 3.],\n", 243 | " [0., 2., 0., 3.],\n", 244 | " [4., 6., 6., 9.]]]], grad_fn=)" 245 | ] 246 | }, 247 | "execution_count": 6, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "tconv = nn.ConvTranspose2d(1, 1, kernel_size=2, stride=2, bias=False)\n", 254 | "tconv.weight.data = K\n", 255 | "tconv(X)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 7, 261 | "id": "5e7033d7", 262 | "metadata": { 263 | "execution": { 264 | "iopub.execute_input": "2023-08-18T07:05:24.566613Z", 265 | "iopub.status.busy": "2023-08-18T07:05:24.565990Z", 266 | "iopub.status.idle": "2023-08-18T07:05:24.577437Z", 267 | "shell.execute_reply": "2023-08-18T07:05:24.576434Z" 268 | }, 269 | "origin_pos": 22, 270 | "tab": [ 271 | "pytorch" 272 | ] 273 | }, 274 | "outputs": [ 275 | { 276 | "data": { 277 | "text/plain": [ 278 | "True" 279 | ] 280 | }, 281 | "execution_count": 7, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "X = torch.rand(size=(1, 10, 16, 16))\n", 288 | "conv = nn.Conv2d(10, 20, kernel_size=5, padding=2, stride=3)\n", 289 | "tconv = nn.ConvTranspose2d(20, 10, kernel_size=5, padding=2, stride=3)\n", 290 | "tconv(conv(X)).shape == X.shape" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "id": "12aa0878", 296 | "metadata": { 297 | "slideshow": { 298 | "slide_type": "slide" 299 | } 300 | }, 301 | "source": [ 302 | "与矩阵变换的联系" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 8, 308 | "id": "260d5c6d", 309 | "metadata": { 310 | "execution": { 311 | "iopub.execute_input": "2023-08-18T07:05:24.581485Z", 312 | "iopub.status.busy": "2023-08-18T07:05:24.580866Z", 313 | "iopub.status.idle": "2023-08-18T07:05:24.589179Z", 314 | "shell.execute_reply": "2023-08-18T07:05:24.588233Z" 315 | }, 316 | "origin_pos": 25, 317 | "tab": [ 318 | "pytorch" 319 | ] 320 | }, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/plain": [ 325 | "tensor([[27., 37.],\n", 326 | " [57., 67.]])" 327 | ] 328 | }, 329 | "execution_count": 8, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "X = torch.arange(9.0).reshape(3, 3)\n", 336 | "K = torch.tensor([[1.0, 2.0], [3.0, 4.0]])\n", 337 | "Y = d2l.corr2d(X, K)\n", 338 | "Y" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 9, 344 | "id": "d9f6ce2b", 345 | "metadata": { 346 | "execution": { 347 | "iopub.execute_input": "2023-08-18T07:05:24.592769Z", 348 | "iopub.status.busy": "2023-08-18T07:05:24.592164Z", 349 | "iopub.status.idle": "2023-08-18T07:05:24.602392Z", 350 | "shell.execute_reply": "2023-08-18T07:05:24.601439Z" 351 | }, 352 | "origin_pos": 28, 353 | "tab": [ 354 | "pytorch" 355 | ] 356 | }, 357 | "outputs": [ 358 | { 359 | "data": { 360 | "text/plain": [ 361 | "tensor([[1., 2., 0., 3., 4., 0., 0., 0., 0.],\n", 362 | " [0., 1., 2., 0., 3., 4., 0., 0., 0.],\n", 363 | " [0., 0., 0., 1., 2., 0., 3., 4., 0.],\n", 364 | " [0., 0., 0., 0., 1., 2., 0., 3., 4.]])" 365 | ] 366 | }, 367 | "execution_count": 9, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "def kernel2matrix(K):\n", 374 | " k, W = torch.zeros(5), torch.zeros((4, 9))\n", 375 | " k[:2], k[3:5] = K[0, :], K[1, :]\n", 376 | " W[0, :5], W[1, 1:6], W[2, 3:8], W[3, 4:] = k, k, k, k\n", 377 | " return W\n", 378 | "\n", 379 | "W = kernel2matrix(K)\n", 380 | "W" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 10, 386 | "id": "1fb803d0", 387 | "metadata": { 388 | "execution": { 389 | "iopub.execute_input": "2023-08-18T07:05:24.606249Z", 390 | "iopub.status.busy": "2023-08-18T07:05:24.605496Z", 391 | "iopub.status.idle": "2023-08-18T07:05:24.612872Z", 392 | "shell.execute_reply": "2023-08-18T07:05:24.611900Z" 393 | }, 394 | "origin_pos": 31, 395 | "tab": [ 396 | "pytorch" 397 | ] 398 | }, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/plain": [ 403 | "tensor([[True, True],\n", 404 | " [True, True]])" 405 | ] 406 | }, 407 | "execution_count": 10, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "Y == torch.matmul(W, X.reshape(-1)).reshape(2, 2)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 11, 419 | "id": "f1a55ff1", 420 | "metadata": { 421 | "execution": { 422 | "iopub.execute_input": "2023-08-18T07:05:24.616575Z", 423 | "iopub.status.busy": "2023-08-18T07:05:24.615826Z", 424 | "iopub.status.idle": "2023-08-18T07:05:24.623063Z", 425 | "shell.execute_reply": "2023-08-18T07:05:24.622144Z" 426 | }, 427 | "origin_pos": 34, 428 | "tab": [ 429 | "pytorch" 430 | ] 431 | }, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "tensor([[True, True, True],\n", 437 | " [True, True, True],\n", 438 | " [True, True, True]])" 439 | ] 440 | }, 441 | "execution_count": 11, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "Z = trans_conv(Y, K)\n", 448 | "Z == torch.matmul(W.T, Y.reshape(-1)).reshape(3, 3)" 449 | ] 450 | } 451 | ], 452 | "metadata": { 453 | "celltoolbar": "Slideshow", 454 | "language_info": { 455 | "name": "python" 456 | }, 457 | "required_libs": [], 458 | "rise": { 459 | "autolaunch": true, 460 | "enable_chalkboard": true, 461 | "overlay": "
", 462 | "scroll": true 463 | } 464 | }, 465 | "nbformat": 4, 466 | "nbformat_minor": 5 467 | } -------------------------------------------------------------------------------- /chapter_convolutional-modern/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/channels.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a0f37efc", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 多输入多输出通道\n", 13 | "\n", 14 | "实现一下多输入通道互相关运算" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "0cff24d4", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:02:38.339612Z", 24 | "iopub.status.busy": "2023-08-18T07:02:38.339031Z", 25 | "iopub.status.idle": "2023-08-18T07:02:38.344485Z", 26 | "shell.execute_reply": "2023-08-18T07:02:38.343326Z" 27 | }, 28 | "origin_pos": 4, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import torch\n", 36 | "from d2l import torch as d2l\n", 37 | "\n", 38 | "def corr2d_multi_in(X, K):\n", 39 | " return sum(d2l.corr2d(x, k) for x, k in zip(X, K))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "aafb58cc", 45 | "metadata": { 46 | "slideshow": { 47 | "slide_type": "-" 48 | } 49 | }, 50 | "source": [ 51 | "验证互相关运算的输出" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "id": "5a60b8f9", 58 | "metadata": { 59 | "execution": { 60 | "iopub.execute_input": "2023-08-18T07:02:38.347937Z", 61 | "iopub.status.busy": "2023-08-18T07:02:38.347463Z", 62 | "iopub.status.idle": "2023-08-18T07:02:38.380997Z", 63 | "shell.execute_reply": "2023-08-18T07:02:38.379885Z" 64 | }, 65 | "origin_pos": 7, 66 | "tab": [ 67 | "pytorch" 68 | ] 69 | }, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "tensor([[ 56., 72.],\n", 75 | " [104., 120.]])" 76 | ] 77 | }, 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],\n", 85 | " [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])\n", 86 | "K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])\n", 87 | "\n", 88 | "corr2d_multi_in(X, K)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "id": "4b4c9aa1", 94 | "metadata": { 95 | "slideshow": { 96 | "slide_type": "slide" 97 | } 98 | }, 99 | "source": [ 100 | "计算多个通道的输出的互相关函数" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 5, 106 | "id": "6dde7543", 107 | "metadata": { 108 | "execution": { 109 | "iopub.execute_input": "2023-08-18T07:02:38.392733Z", 110 | "iopub.status.busy": "2023-08-18T07:02:38.392298Z", 111 | "iopub.status.idle": "2023-08-18T07:02:38.399310Z", 112 | "shell.execute_reply": "2023-08-18T07:02:38.398211Z" 113 | }, 114 | "origin_pos": 11, 115 | "tab": [ 116 | "pytorch" 117 | ] 118 | }, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "torch.Size([3, 2, 2, 2])" 124 | ] 125 | }, 126 | "execution_count": 5, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "def corr2d_multi_in_out(X, K):\n", 133 | " return torch.stack([corr2d_multi_in(X, k) for k in K], 0)\n", 134 | "\n", 135 | "K = torch.stack((K, K + 1, K + 2), 0)\n", 136 | "K.shape" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 6, 142 | "id": "86b2b71f", 143 | "metadata": { 144 | "execution": { 145 | "iopub.execute_input": "2023-08-18T07:02:38.403159Z", 146 | "iopub.status.busy": "2023-08-18T07:02:38.402457Z", 147 | "iopub.status.idle": "2023-08-18T07:02:38.410409Z", 148 | "shell.execute_reply": "2023-08-18T07:02:38.409310Z" 149 | }, 150 | "origin_pos": 13, 151 | "tab": [ 152 | "pytorch" 153 | ] 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "tensor([[[ 56., 72.],\n", 160 | " [104., 120.]],\n", 161 | "\n", 162 | " [[ 76., 100.],\n", 163 | " [148., 172.]],\n", 164 | "\n", 165 | " [[ 96., 128.],\n", 166 | " [192., 224.]]])" 167 | ] 168 | }, 169 | "execution_count": 6, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "corr2d_multi_in_out(X, K)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "cafe51a2", 181 | "metadata": { 182 | "slideshow": { 183 | "slide_type": "slide" 184 | } 185 | }, 186 | "source": [ 187 | "1x1卷积" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 9, 193 | "id": "7250eae2", 194 | "metadata": { 195 | "execution": { 196 | "iopub.execute_input": "2023-08-18T07:02:38.430613Z", 197 | "iopub.status.busy": "2023-08-18T07:02:38.430184Z", 198 | "iopub.status.idle": "2023-08-18T07:02:38.438715Z", 199 | "shell.execute_reply": "2023-08-18T07:02:38.437662Z" 200 | }, 201 | "origin_pos": 19, 202 | "tab": [ 203 | "pytorch" 204 | ] 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "def corr2d_multi_in_out_1x1(X, K):\n", 209 | " c_i, h, w = X.shape\n", 210 | " c_o = K.shape[0]\n", 211 | " X = X.reshape((c_i, h * w))\n", 212 | " K = K.reshape((c_o, c_i))\n", 213 | " Y = torch.matmul(K, X)\n", 214 | " return Y.reshape((c_o, h, w))\n", 215 | "\n", 216 | "X = torch.normal(0, 1, (3, 3, 3))\n", 217 | "K = torch.normal(0, 1, (2, 3, 1, 1))\n", 218 | "\n", 219 | "Y1 = corr2d_multi_in_out_1x1(X, K)\n", 220 | "Y2 = corr2d_multi_in_out(X, K)\n", 221 | "assert float(torch.abs(Y1 - Y2).sum()) < 1e-6" 222 | ] 223 | } 224 | ], 225 | "metadata": { 226 | "celltoolbar": "Slideshow", 227 | "language_info": { 228 | "name": "python" 229 | }, 230 | "required_libs": [], 231 | "rise": { 232 | "autolaunch": true, 233 | "enable_chalkboard": true, 234 | "overlay": "
", 235 | "scroll": true 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 5 240 | } -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/conv-layer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "50832220", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 图像卷积\n", 13 | "\n", 14 | "互相关运算" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "16abe7ca", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:07:28.563668Z", 24 | "iopub.status.busy": "2023-08-18T07:07:28.562986Z", 25 | "iopub.status.idle": "2023-08-18T07:07:28.569424Z", 26 | "shell.execute_reply": "2023-08-18T07:07:28.568319Z" 27 | }, 28 | "origin_pos": 4, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import torch\n", 36 | "from torch import nn\n", 37 | "from d2l import torch as d2l\n", 38 | "\n", 39 | "def corr2d(X, K): \n", 40 | " \"\"\"计算二维互相关运算\"\"\"\n", 41 | " h, w = K.shape\n", 42 | " Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n", 43 | " for i in range(Y.shape[0]):\n", 44 | " for j in range(Y.shape[1]):\n", 45 | " Y[i, j] = (X[i:i + h, j:j + w] * K).sum()\n", 46 | " return Y" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "42171987", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "slide" 55 | } 56 | }, 57 | "source": [ 58 | "验证上述二维互相关运算的输出" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "id": "6f84e512", 65 | "metadata": { 66 | "execution": { 67 | "iopub.execute_input": "2023-08-18T07:07:28.572958Z", 68 | "iopub.status.busy": "2023-08-18T07:07:28.572449Z", 69 | "iopub.status.idle": "2023-08-18T07:07:28.604854Z", 70 | "shell.execute_reply": "2023-08-18T07:07:28.603813Z" 71 | }, 72 | "origin_pos": 7, 73 | "tab": [ 74 | "pytorch" 75 | ] 76 | }, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "tensor([[19., 25.],\n", 82 | " [37., 43.]])" 83 | ] 84 | }, 85 | "execution_count": 3, 86 | "metadata": {}, 87 | "output_type": "execute_result" 88 | } 89 | ], 90 | "source": [ 91 | "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n", 92 | "K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])\n", 93 | "corr2d(X, K)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "id": "5abc9d97", 99 | "metadata": { 100 | "slideshow": { 101 | "slide_type": "slide" 102 | } 103 | }, 104 | "source": [ 105 | "实现二维卷积层" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 4, 111 | "id": "450def67", 112 | "metadata": { 113 | "execution": { 114 | "iopub.execute_input": "2023-08-18T07:07:28.610672Z", 115 | "iopub.status.busy": "2023-08-18T07:07:28.609819Z", 116 | "iopub.status.idle": "2023-08-18T07:07:28.615602Z", 117 | "shell.execute_reply": "2023-08-18T07:07:28.614632Z" 118 | }, 119 | "origin_pos": 10, 120 | "tab": [ 121 | "pytorch" 122 | ] 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "class Conv2D(nn.Module):\n", 127 | " def __init__(self, kernel_size):\n", 128 | " super().__init__()\n", 129 | " self.weight = nn.Parameter(torch.rand(kernel_size))\n", 130 | " self.bias = nn.Parameter(torch.zeros(1))\n", 131 | "\n", 132 | " def forward(self, x):\n", 133 | " return corr2d(x, self.weight) + self.bias" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "id": "c5b49b95", 139 | "metadata": { 140 | "slideshow": { 141 | "slide_type": "slide" 142 | } 143 | }, 144 | "source": [ 145 | "卷积层的一个简单应用:\n", 146 | "检测图像中不同颜色的边缘" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 5, 152 | "id": "dee1bc79", 153 | "metadata": { 154 | "execution": { 155 | "iopub.execute_input": "2023-08-18T07:07:28.620077Z", 156 | "iopub.status.busy": "2023-08-18T07:07:28.619277Z", 157 | "iopub.status.idle": "2023-08-18T07:07:28.626719Z", 158 | "shell.execute_reply": "2023-08-18T07:07:28.625746Z" 159 | }, 160 | "origin_pos": 14, 161 | "tab": [ 162 | "pytorch" 163 | ] 164 | }, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/plain": [ 169 | "tensor([[1., 1., 0., 0., 0., 0., 1., 1.],\n", 170 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n", 171 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n", 172 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n", 173 | " [1., 1., 0., 0., 0., 0., 1., 1.],\n", 174 | " [1., 1., 0., 0., 0., 0., 1., 1.]])" 175 | ] 176 | }, 177 | "execution_count": 5, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "X = torch.ones((6, 8))\n", 184 | "X[:, 2:6] = 0\n", 185 | "X" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 6, 191 | "id": "d042bda0", 192 | "metadata": { 193 | "execution": { 194 | "iopub.execute_input": "2023-08-18T07:07:28.630101Z", 195 | "iopub.status.busy": "2023-08-18T07:07:28.629606Z", 196 | "iopub.status.idle": "2023-08-18T07:07:28.634133Z", 197 | "shell.execute_reply": "2023-08-18T07:07:28.633165Z" 198 | }, 199 | "origin_pos": 17, 200 | "tab": [ 201 | "pytorch" 202 | ] 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "K = torch.tensor([[1.0, -1.0]])" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "id": "02307562", 212 | "metadata": { 213 | "slideshow": { 214 | "slide_type": "slide" 215 | } 216 | }, 217 | "source": [ 218 | "输出`Y`中的1代表从白色到黑色的边缘,-1代表从黑色到白色的边缘" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 7, 224 | "id": "36de9e2a", 225 | "metadata": { 226 | "execution": { 227 | "iopub.execute_input": "2023-08-18T07:07:28.639056Z", 228 | "iopub.status.busy": "2023-08-18T07:07:28.638505Z", 229 | "iopub.status.idle": "2023-08-18T07:07:28.646532Z", 230 | "shell.execute_reply": "2023-08-18T07:07:28.645509Z" 231 | }, 232 | "origin_pos": 19, 233 | "tab": [ 234 | "pytorch" 235 | ] 236 | }, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "tensor([[ 0., 1., 0., 0., 0., -1., 0.],\n", 242 | " [ 0., 1., 0., 0., 0., -1., 0.],\n", 243 | " [ 0., 1., 0., 0., 0., -1., 0.],\n", 244 | " [ 0., 1., 0., 0., 0., -1., 0.],\n", 245 | " [ 0., 1., 0., 0., 0., -1., 0.],\n", 246 | " [ 0., 1., 0., 0., 0., -1., 0.]])" 247 | ] 248 | }, 249 | "execution_count": 7, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "Y = corr2d(X, K)\n", 256 | "Y" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "id": "f8fc54d3", 262 | "metadata": { 263 | "slideshow": { 264 | "slide_type": "slide" 265 | } 266 | }, 267 | "source": [ 268 | "卷积核`K`只可以检测垂直边缘" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 8, 274 | "id": "0a754b2d", 275 | "metadata": { 276 | "execution": { 277 | "iopub.execute_input": "2023-08-18T07:07:28.651371Z", 278 | "iopub.status.busy": "2023-08-18T07:07:28.650819Z", 279 | "iopub.status.idle": "2023-08-18T07:07:28.658419Z", 280 | "shell.execute_reply": "2023-08-18T07:07:28.657436Z" 281 | }, 282 | "origin_pos": 21, 283 | "tab": [ 284 | "pytorch" 285 | ] 286 | }, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "tensor([[0., 0., 0., 0., 0.],\n", 292 | " [0., 0., 0., 0., 0.],\n", 293 | " [0., 0., 0., 0., 0.],\n", 294 | " [0., 0., 0., 0., 0.],\n", 295 | " [0., 0., 0., 0., 0.],\n", 296 | " [0., 0., 0., 0., 0.],\n", 297 | " [0., 0., 0., 0., 0.],\n", 298 | " [0., 0., 0., 0., 0.]])" 299 | ] 300 | }, 301 | "execution_count": 8, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "corr2d(X.t(), K)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "id": "d3c3d2e5", 313 | "metadata": { 314 | "slideshow": { 315 | "slide_type": "slide" 316 | } 317 | }, 318 | "source": [ 319 | "学习由`X`生成`Y`的卷积核" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 9, 325 | "id": "2b423578", 326 | "metadata": { 327 | "execution": { 328 | "iopub.execute_input": "2023-08-18T07:07:28.662260Z", 329 | "iopub.status.busy": "2023-08-18T07:07:28.661527Z", 330 | "iopub.status.idle": "2023-08-18T07:07:28.681412Z", 331 | "shell.execute_reply": "2023-08-18T07:07:28.680192Z" 332 | }, 333 | "origin_pos": 24, 334 | "tab": [ 335 | "pytorch" 336 | ] 337 | }, 338 | "outputs": [ 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | "epoch 2, loss 6.422\n", 344 | "epoch 4, loss 1.225\n", 345 | "epoch 6, loss 0.266\n", 346 | "epoch 8, loss 0.070\n", 347 | "epoch 10, loss 0.022\n" 348 | ] 349 | } 350 | ], 351 | "source": [ 352 | "conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), bias=False)\n", 353 | "\n", 354 | "X = X.reshape((1, 1, 6, 8))\n", 355 | "Y = Y.reshape((1, 1, 6, 7))\n", 356 | "lr = 3e-2\n", 357 | "\n", 358 | "for i in range(10):\n", 359 | " Y_hat = conv2d(X)\n", 360 | " l = (Y_hat - Y) ** 2\n", 361 | " conv2d.zero_grad()\n", 362 | " l.sum().backward()\n", 363 | " conv2d.weight.data[:] -= lr * conv2d.weight.grad\n", 364 | " if (i + 1) % 2 == 0:\n", 365 | " print(f'epoch {i+1}, loss {l.sum():.3f}')" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "id": "292614cd", 371 | "metadata": { 372 | "slideshow": { 373 | "slide_type": "slide" 374 | } 375 | }, 376 | "source": [ 377 | "所学的卷积核的权重张量" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 10, 383 | "id": "b40515e8", 384 | "metadata": { 385 | "execution": { 386 | "iopub.execute_input": "2023-08-18T07:07:28.684721Z", 387 | "iopub.status.busy": "2023-08-18T07:07:28.684428Z", 388 | "iopub.status.idle": "2023-08-18T07:07:28.691507Z", 389 | "shell.execute_reply": "2023-08-18T07:07:28.690512Z" 390 | }, 391 | "origin_pos": 29, 392 | "tab": [ 393 | "pytorch" 394 | ] 395 | }, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "text/plain": [ 400 | "tensor([[ 1.0010, -0.9739]])" 401 | ] 402 | }, 403 | "execution_count": 10, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "conv2d.weight.data.reshape((1, 2))" 410 | ] 411 | } 412 | ], 413 | "metadata": { 414 | "celltoolbar": "Slideshow", 415 | "language_info": { 416 | "name": "python" 417 | }, 418 | "required_libs": [], 419 | "rise": { 420 | "autolaunch": true, 421 | "enable_chalkboard": true, 422 | "overlay": "
", 423 | "scroll": true 424 | } 425 | }, 426 | "nbformat": 4, 427 | "nbformat_minor": 5 428 | } -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/padding-and-strides.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7d4404d4", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 填充和步幅\n", 13 | "\n", 14 | "在所有侧边填充1个像素" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "ee25ca28", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:00:27.440657Z", 24 | "iopub.status.busy": "2023-08-18T07:00:27.439788Z", 25 | "iopub.status.idle": "2023-08-18T07:00:28.396461Z", 26 | "shell.execute_reply": "2023-08-18T07:00:28.395508Z" 27 | }, 28 | "origin_pos": 2, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "torch.Size([8, 8])" 38 | ] 39 | }, 40 | "execution_count": 1, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "import torch\n", 47 | "from torch import nn\n", 48 | "\n", 49 | "\n", 50 | "def comp_conv2d(conv2d, X):\n", 51 | " X = X.reshape((1, 1) + X.shape)\n", 52 | " Y = conv2d(X)\n", 53 | " return Y.reshape(Y.shape[2:])\n", 54 | "\n", 55 | "conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)\n", 56 | "X = torch.rand(size=(8, 8))\n", 57 | "comp_conv2d(conv2d, X).shape" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "id": "04e04824", 63 | "metadata": { 64 | "slideshow": { 65 | "slide_type": "slide" 66 | } 67 | }, 68 | "source": [ 69 | "填充不同的高度和宽度" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "id": "5dadebb1", 76 | "metadata": { 77 | "execution": { 78 | "iopub.execute_input": "2023-08-18T07:00:28.400923Z", 79 | "iopub.status.busy": "2023-08-18T07:00:28.400085Z", 80 | "iopub.status.idle": "2023-08-18T07:00:28.406887Z", 81 | "shell.execute_reply": "2023-08-18T07:00:28.406085Z" 82 | }, 83 | "origin_pos": 7, 84 | "tab": [ 85 | "pytorch" 86 | ] 87 | }, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "torch.Size([8, 8])" 93 | ] 94 | }, 95 | "execution_count": 2, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))\n", 102 | "comp_conv2d(conv2d, X).shape" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "01e7aa78", 108 | "metadata": { 109 | "slideshow": { 110 | "slide_type": "slide" 111 | } 112 | }, 113 | "source": [ 114 | "将高度和宽度的步幅设置为2" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 3, 120 | "id": "7b6ac278", 121 | "metadata": { 122 | "execution": { 123 | "iopub.execute_input": "2023-08-18T07:00:28.410395Z", 124 | "iopub.status.busy": "2023-08-18T07:00:28.410090Z", 125 | "iopub.status.idle": "2023-08-18T07:00:28.416621Z", 126 | "shell.execute_reply": "2023-08-18T07:00:28.415848Z" 127 | }, 128 | "origin_pos": 12, 129 | "tab": [ 130 | "pytorch" 131 | ] 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "torch.Size([4, 4])" 138 | ] 139 | }, 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)\n", 147 | "comp_conv2d(conv2d, X).shape" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "id": "53265c61", 153 | "metadata": { 154 | "slideshow": { 155 | "slide_type": "-" 156 | } 157 | }, 158 | "source": [ 159 | "一个稍微复杂的例子" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 4, 165 | "id": "6f1c0e6c", 166 | "metadata": { 167 | "execution": { 168 | "iopub.execute_input": "2023-08-18T07:00:28.422070Z", 169 | "iopub.status.busy": "2023-08-18T07:00:28.421461Z", 170 | "iopub.status.idle": "2023-08-18T07:00:28.429200Z", 171 | "shell.execute_reply": "2023-08-18T07:00:28.427969Z" 172 | }, 173 | "origin_pos": 17, 174 | "tab": [ 175 | "pytorch" 176 | ] 177 | }, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/plain": [ 182 | "torch.Size([2, 2])" 183 | ] 184 | }, 185 | "execution_count": 4, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))\n", 192 | "comp_conv2d(conv2d, X).shape" 193 | ] 194 | } 195 | ], 196 | "metadata": { 197 | "celltoolbar": "Slideshow", 198 | "language_info": { 199 | "name": "python" 200 | }, 201 | "required_libs": [], 202 | "rise": { 203 | "autolaunch": true, 204 | "enable_chalkboard": true, 205 | "overlay": "
", 206 | "scroll": true 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 5 211 | } -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/pooling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a3a254cb", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 汇聚层\n", 13 | "\n", 14 | "实现汇聚层的前向传播" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "fe35adac", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:02:20.228639Z", 24 | "iopub.status.busy": "2023-08-18T07:02:20.227964Z", 25 | "iopub.status.idle": "2023-08-18T07:02:20.234155Z", 26 | "shell.execute_reply": "2023-08-18T07:02:20.233266Z" 27 | }, 28 | "origin_pos": 4, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import torch\n", 36 | "from torch import nn\n", 37 | "from d2l import torch as d2l\n", 38 | "\n", 39 | "def pool2d(X, pool_size, mode='max'):\n", 40 | " p_h, p_w = pool_size\n", 41 | " Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))\n", 42 | " for i in range(Y.shape[0]):\n", 43 | " for j in range(Y.shape[1]):\n", 44 | " if mode == 'max':\n", 45 | " Y[i, j] = X[i: i + p_h, j: j + p_w].max()\n", 46 | " elif mode == 'avg':\n", 47 | " Y[i, j] = X[i: i + p_h, j: j + p_w].mean()\n", 48 | " return Y" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "0cee43f1", 54 | "metadata": { 55 | "slideshow": { 56 | "slide_type": "slide" 57 | } 58 | }, 59 | "source": [ 60 | "验证二维最大汇聚层的输出" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "id": "3a781c85", 67 | "metadata": { 68 | "execution": { 69 | "iopub.execute_input": "2023-08-18T07:02:20.237767Z", 70 | "iopub.status.busy": "2023-08-18T07:02:20.237211Z", 71 | "iopub.status.idle": "2023-08-18T07:02:20.268065Z", 72 | "shell.execute_reply": "2023-08-18T07:02:20.267212Z" 73 | }, 74 | "origin_pos": 7, 75 | "tab": [ 76 | "pytorch" 77 | ] 78 | }, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "tensor([[4., 5.],\n", 84 | " [7., 8.]])" 85 | ] 86 | }, 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])\n", 94 | "pool2d(X, (2, 2))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "2a7b5d65", 100 | "metadata": { 101 | "slideshow": { 102 | "slide_type": "-" 103 | } 104 | }, 105 | "source": [ 106 | "验证平均汇聚层" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "id": "4f9a1ffd", 113 | "metadata": { 114 | "execution": { 115 | "iopub.execute_input": "2023-08-18T07:02:20.272001Z", 116 | "iopub.status.busy": "2023-08-18T07:02:20.271411Z", 117 | "iopub.status.idle": "2023-08-18T07:02:20.277849Z", 118 | "shell.execute_reply": "2023-08-18T07:02:20.276928Z" 119 | }, 120 | "origin_pos": 9, 121 | "tab": [ 122 | "pytorch" 123 | ] 124 | }, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "tensor([[2., 3.],\n", 130 | " [5., 6.]])" 131 | ] 132 | }, 133 | "execution_count": 4, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "pool2d(X, (2, 2), 'avg')" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "941b8124", 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "slide" 148 | } 149 | }, 150 | "source": [ 151 | "填充和步幅" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 5, 157 | "id": "140d08f5", 158 | "metadata": { 159 | "execution": { 160 | "iopub.execute_input": "2023-08-18T07:02:20.281458Z", 161 | "iopub.status.busy": "2023-08-18T07:02:20.280874Z", 162 | "iopub.status.idle": "2023-08-18T07:02:20.287391Z", 163 | "shell.execute_reply": "2023-08-18T07:02:20.286578Z" 164 | }, 165 | "origin_pos": 12, 166 | "tab": [ 167 | "pytorch" 168 | ] 169 | }, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "tensor([[[[ 0., 1., 2., 3.],\n", 175 | " [ 4., 5., 6., 7.],\n", 176 | " [ 8., 9., 10., 11.],\n", 177 | " [12., 13., 14., 15.]]]])" 178 | ] 179 | }, 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))\n", 187 | "X" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "id": "43710341", 193 | "metadata": { 194 | "slideshow": { 195 | "slide_type": "-" 196 | } 197 | }, 198 | "source": [ 199 | "深度学习框架中的步幅与汇聚窗口的大小相同" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 6, 205 | "id": "a3cc01e3", 206 | "metadata": { 207 | "execution": { 208 | "iopub.execute_input": "2023-08-18T07:02:20.291052Z", 209 | "iopub.status.busy": "2023-08-18T07:02:20.290402Z", 210 | "iopub.status.idle": "2023-08-18T07:02:20.296276Z", 211 | "shell.execute_reply": "2023-08-18T07:02:20.295476Z" 212 | }, 213 | "origin_pos": 17, 214 | "tab": [ 215 | "pytorch" 216 | ] 217 | }, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "tensor([[[[10.]]]])" 223 | ] 224 | }, 225 | "execution_count": 6, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "pool2d = nn.MaxPool2d(3)\n", 232 | "pool2d(X)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "id": "4b86f339", 238 | "metadata": { 239 | "slideshow": { 240 | "slide_type": "slide" 241 | } 242 | }, 243 | "source": [ 244 | "填充和步幅可以手动设定" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 7, 250 | "id": "9c247428", 251 | "metadata": { 252 | "execution": { 253 | "iopub.execute_input": "2023-08-18T07:02:20.299965Z", 254 | "iopub.status.busy": "2023-08-18T07:02:20.299310Z", 255 | "iopub.status.idle": "2023-08-18T07:02:20.307455Z", 256 | "shell.execute_reply": "2023-08-18T07:02:20.306477Z" 257 | }, 258 | "origin_pos": 22, 259 | "tab": [ 260 | "pytorch" 261 | ] 262 | }, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "tensor([[[[ 5., 7.],\n", 268 | " [13., 15.]]]])" 269 | ] 270 | }, 271 | "execution_count": 7, 272 | "metadata": {}, 273 | "output_type": "execute_result" 274 | } 275 | ], 276 | "source": [ 277 | "pool2d = nn.MaxPool2d(3, padding=1, stride=2)\n", 278 | "pool2d(X)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "id": "7295d3e3", 284 | "metadata": { 285 | "slideshow": { 286 | "slide_type": "-" 287 | } 288 | }, 289 | "source": [ 290 | "设定一个任意大小的矩形汇聚窗口,并分别设定填充和步幅的高度和宽度" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 8, 296 | "id": "7c169b2f", 297 | "metadata": { 298 | "execution": { 299 | "iopub.execute_input": "2023-08-18T07:02:20.311794Z", 300 | "iopub.status.busy": "2023-08-18T07:02:20.311492Z", 301 | "iopub.status.idle": "2023-08-18T07:02:20.320399Z", 302 | "shell.execute_reply": "2023-08-18T07:02:20.319108Z" 303 | }, 304 | "origin_pos": 30, 305 | "tab": [ 306 | "pytorch" 307 | ] 308 | }, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "tensor([[[[ 5., 7.],\n", 314 | " [13., 15.]]]])" 315 | ] 316 | }, 317 | "execution_count": 8, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))\n", 324 | "pool2d(X)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "id": "daa999f2", 330 | "metadata": { 331 | "slideshow": { 332 | "slide_type": "slide" 333 | } 334 | }, 335 | "source": [ 336 | "汇聚层在每个输入通道上单独运算" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 9, 342 | "id": "c0a30a7f", 343 | "metadata": { 344 | "execution": { 345 | "iopub.execute_input": "2023-08-18T07:02:20.325617Z", 346 | "iopub.status.busy": "2023-08-18T07:02:20.324879Z", 347 | "iopub.status.idle": "2023-08-18T07:02:20.335303Z", 348 | "shell.execute_reply": "2023-08-18T07:02:20.334055Z" 349 | }, 350 | "origin_pos": 35, 351 | "tab": [ 352 | "pytorch" 353 | ] 354 | }, 355 | "outputs": [ 356 | { 357 | "data": { 358 | "text/plain": [ 359 | "tensor([[[[ 0., 1., 2., 3.],\n", 360 | " [ 4., 5., 6., 7.],\n", 361 | " [ 8., 9., 10., 11.],\n", 362 | " [12., 13., 14., 15.]],\n", 363 | "\n", 364 | " [[ 1., 2., 3., 4.],\n", 365 | " [ 5., 6., 7., 8.],\n", 366 | " [ 9., 10., 11., 12.],\n", 367 | " [13., 14., 15., 16.]]]])" 368 | ] 369 | }, 370 | "execution_count": 9, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "X = torch.cat((X, X + 1), 1)\n", 377 | "X" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 10, 383 | "id": "e534c8f3", 384 | "metadata": { 385 | "execution": { 386 | "iopub.execute_input": "2023-08-18T07:02:20.340529Z", 387 | "iopub.status.busy": "2023-08-18T07:02:20.339767Z", 388 | "iopub.status.idle": "2023-08-18T07:02:20.349365Z", 389 | "shell.execute_reply": "2023-08-18T07:02:20.348159Z" 390 | }, 391 | "origin_pos": 39, 392 | "tab": [ 393 | "pytorch" 394 | ] 395 | }, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "text/plain": [ 400 | "tensor([[[[ 5., 7.],\n", 401 | " [13., 15.]],\n", 402 | "\n", 403 | " [[ 6., 8.],\n", 404 | " [14., 16.]]]])" 405 | ] 406 | }, 407 | "execution_count": 10, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "pool2d = nn.MaxPool2d(3, padding=1, stride=2)\n", 414 | "pool2d(X)" 415 | ] 416 | } 417 | ], 418 | "metadata": { 419 | "celltoolbar": "Slideshow", 420 | "language_info": { 421 | "name": "python" 422 | }, 423 | "required_libs": [], 424 | "rise": { 425 | "autolaunch": true, 426 | "enable_chalkboard": true, 427 | "overlay": "
", 428 | "scroll": true 429 | } 430 | }, 431 | "nbformat": 4, 432 | "nbformat_minor": 5 433 | } -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_deep-learning-computation/custom-layer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e91e1a31", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 自定义层\n", 13 | "\n", 14 | "构造一个没有任何参数的自定义层" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "dec68045", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:07:17.497408Z", 24 | "iopub.status.busy": "2023-08-18T07:07:17.497077Z", 25 | "iopub.status.idle": "2023-08-18T07:07:17.508357Z", 26 | "shell.execute_reply": "2023-08-18T07:07:17.507175Z" 27 | }, 28 | "origin_pos": 7, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "tensor([-2., -1., 0., 1., 2.])" 38 | ] 39 | }, 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "import torch\n", 47 | "import torch.nn.functional as F\n", 48 | "from torch import nn\n", 49 | "\n", 50 | "\n", 51 | "class CenteredLayer(nn.Module):\n", 52 | " def __init__(self):\n", 53 | " super().__init__()\n", 54 | "\n", 55 | " def forward(self, X):\n", 56 | " return X - X.mean()\n", 57 | "\n", 58 | "layer = CenteredLayer()\n", 59 | "layer(torch.FloatTensor([1, 2, 3, 4, 5]))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "id": "05f8a223", 65 | "metadata": { 66 | "slideshow": { 67 | "slide_type": "slide" 68 | } 69 | }, 70 | "source": [ 71 | "将层作为组件合并到更复杂的模型中" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "id": "6ab302a0", 78 | "metadata": { 79 | "execution": { 80 | "iopub.execute_input": "2023-08-18T07:07:17.523517Z", 81 | "iopub.status.busy": "2023-08-18T07:07:17.523140Z", 82 | "iopub.status.idle": "2023-08-18T07:07:17.534718Z", 83 | "shell.execute_reply": "2023-08-18T07:07:17.533593Z" 84 | }, 85 | "origin_pos": 16, 86 | "tab": [ 87 | "pytorch" 88 | ] 89 | }, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": [ 94 | "tensor(7.4506e-09, grad_fn=)" 95 | ] 96 | }, 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "net = nn.Sequential(nn.Linear(8, 128), CenteredLayer())\n", 104 | "\n", 105 | "Y = net(torch.rand(4, 8))\n", 106 | "Y.mean()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "id": "568d0d74", 112 | "metadata": { 113 | "slideshow": { 114 | "slide_type": "slide" 115 | } 116 | }, 117 | "source": [ 118 | "带参数的层" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 6, 124 | "id": "4490005a", 125 | "metadata": { 126 | "execution": { 127 | "iopub.execute_input": "2023-08-18T07:07:17.550522Z", 128 | "iopub.status.busy": "2023-08-18T07:07:17.550152Z", 129 | "iopub.status.idle": "2023-08-18T07:07:17.558364Z", 130 | "shell.execute_reply": "2023-08-18T07:07:17.557338Z" 131 | }, 132 | "origin_pos": 28, 133 | "tab": [ 134 | "pytorch" 135 | ] 136 | }, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/plain": [ 141 | "Parameter containing:\n", 142 | "tensor([[ 0.1775, -1.4539, 0.3972],\n", 143 | " [-0.1339, 0.5273, 1.3041],\n", 144 | " [-0.3327, -0.2337, -0.6334],\n", 145 | " [ 1.2076, -0.3937, 0.6851],\n", 146 | " [-0.4716, 0.0894, -0.9195]], requires_grad=True)" 147 | ] 148 | }, 149 | "execution_count": 6, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "class MyLinear(nn.Module):\n", 156 | " def __init__(self, in_units, units):\n", 157 | " super().__init__()\n", 158 | " self.weight = nn.Parameter(torch.randn(in_units, units))\n", 159 | " self.bias = nn.Parameter(torch.randn(units,))\n", 160 | " def forward(self, X):\n", 161 | " linear = torch.matmul(X, self.weight.data) + self.bias.data\n", 162 | " return F.relu(linear)\n", 163 | "\n", 164 | "linear = MyLinear(5, 3)\n", 165 | "linear.weight" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "id": "9c8d6fca", 171 | "metadata": { 172 | "slideshow": { 173 | "slide_type": "slide" 174 | } 175 | }, 176 | "source": [ 177 | "使用自定义层直接执行前向传播计算" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": 7, 183 | "id": "25f2aabf", 184 | "metadata": { 185 | "execution": { 186 | "iopub.execute_input": "2023-08-18T07:07:17.562706Z", 187 | "iopub.status.busy": "2023-08-18T07:07:17.562337Z", 188 | "iopub.status.idle": "2023-08-18T07:07:17.570015Z", 189 | "shell.execute_reply": "2023-08-18T07:07:17.568916Z" 190 | }, 191 | "origin_pos": 32, 192 | "tab": [ 193 | "pytorch" 194 | ] 195 | }, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/plain": [ 200 | "tensor([[0., 0., 0.],\n", 201 | " [0., 0., 0.]])" 202 | ] 203 | }, 204 | "execution_count": 7, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "linear(torch.rand(2, 5))" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "id": "393a154e", 216 | "metadata": { 217 | "slideshow": { 218 | "slide_type": "-" 219 | } 220 | }, 221 | "source": [ 222 | "使用自定义层构建模型" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 8, 228 | "id": "fb2953e8", 229 | "metadata": { 230 | "execution": { 231 | "iopub.execute_input": "2023-08-18T07:07:17.574378Z", 232 | "iopub.status.busy": "2023-08-18T07:07:17.574000Z", 233 | "iopub.status.idle": "2023-08-18T07:07:17.582792Z", 234 | "shell.execute_reply": "2023-08-18T07:07:17.581735Z" 235 | }, 236 | "origin_pos": 37, 237 | "tab": [ 238 | "pytorch" 239 | ] 240 | }, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "tensor([[0.],\n", 246 | " [0.]])" 247 | ] 248 | }, 249 | "execution_count": 8, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "net = nn.Sequential(MyLinear(64, 8), MyLinear(8, 1))\n", 256 | "net(torch.rand(2, 64))" 257 | ] 258 | } 259 | ], 260 | "metadata": { 261 | "celltoolbar": "Slideshow", 262 | "language_info": { 263 | "name": "python" 264 | }, 265 | "required_libs": [], 266 | "rise": { 267 | "autolaunch": true, 268 | "enable_chalkboard": true, 269 | "overlay": "
", 270 | "scroll": true 271 | } 272 | }, 273 | "nbformat": 4, 274 | "nbformat_minor": 5 275 | } -------------------------------------------------------------------------------- /chapter_deep-learning-computation/model-construction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8d628b46", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 层和块\n", 13 | "\n", 14 | "我们先回顾一下多层感知机" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "9895e279", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T06:57:00.244437Z", 24 | "iopub.status.busy": "2023-08-18T06:57:00.243813Z", 25 | "iopub.status.idle": "2023-08-18T06:57:01.320999Z", 26 | "shell.execute_reply": "2023-08-18T06:57:01.320186Z" 27 | }, 28 | "origin_pos": 2, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "tensor([[ 0.0343, 0.0264, 0.2505, -0.0243, 0.0945, 0.0012, -0.0141, 0.0666,\n", 38 | " -0.0547, -0.0667],\n", 39 | " [ 0.0772, -0.0274, 0.2638, -0.0191, 0.0394, -0.0324, 0.0102, 0.0707,\n", 40 | " -0.1481, -0.1031]], grad_fn=)" 41 | ] 42 | }, 43 | "execution_count": 1, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "import torch\n", 50 | "from torch import nn\n", 51 | "from torch.nn import functional as F\n", 52 | "\n", 53 | "net = nn.Sequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))\n", 54 | "\n", 55 | "X = torch.rand(2, 20)\n", 56 | "net(X)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "id": "389483a7", 62 | "metadata": { 63 | "slideshow": { 64 | "slide_type": "-" 65 | } 66 | }, 67 | "source": [ 68 | "`nn.Sequential`定义了一种特殊的`Module`" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "id": "c9adf2a5", 74 | "metadata": { 75 | "slideshow": { 76 | "slide_type": "slide" 77 | } 78 | }, 79 | "source": [ 80 | "自定义块" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 2, 86 | "id": "876df867", 87 | "metadata": { 88 | "execution": { 89 | "iopub.execute_input": "2023-08-18T06:57:01.325541Z", 90 | "iopub.status.busy": "2023-08-18T06:57:01.324828Z", 91 | "iopub.status.idle": "2023-08-18T06:57:01.330411Z", 92 | "shell.execute_reply": "2023-08-18T06:57:01.329591Z" 93 | }, 94 | "origin_pos": 14, 95 | "tab": [ 96 | "pytorch" 97 | ] 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "class MLP(nn.Module):\n", 102 | " def __init__(self):\n", 103 | " super().__init__()\n", 104 | " self.hidden = nn.Linear(20, 256)\n", 105 | " self.out = nn.Linear(256, 10)\n", 106 | "\n", 107 | " def forward(self, X):\n", 108 | " return self.out(F.relu(self.hidden(X)))" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "d63bddd3", 114 | "metadata": { 115 | "slideshow": { 116 | "slide_type": "slide" 117 | } 118 | }, 119 | "source": [ 120 | "实例化多层感知机的层,然后在每次调用前向传播函数时调用这些层" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 3, 126 | "id": "f7a34ec3", 127 | "metadata": { 128 | "execution": { 129 | "iopub.execute_input": "2023-08-18T06:57:01.334346Z", 130 | "iopub.status.busy": "2023-08-18T06:57:01.333603Z", 131 | "iopub.status.idle": "2023-08-18T06:57:01.340473Z", 132 | "shell.execute_reply": "2023-08-18T06:57:01.339676Z" 133 | }, 134 | "origin_pos": 19, 135 | "tab": [ 136 | "pytorch" 137 | ] 138 | }, 139 | "outputs": [ 140 | { 141 | "data": { 142 | "text/plain": [ 143 | "tensor([[ 0.0669, 0.2202, -0.0912, -0.0064, 0.1474, -0.0577, -0.3006, 0.1256,\n", 144 | " -0.0280, 0.4040],\n", 145 | " [ 0.0545, 0.2591, -0.0297, 0.1141, 0.1887, 0.0094, -0.2686, 0.0732,\n", 146 | " -0.0135, 0.3865]], grad_fn=)" 147 | ] 148 | }, 149 | "execution_count": 3, 150 | "metadata": {}, 151 | "output_type": "execute_result" 152 | } 153 | ], 154 | "source": [ 155 | "net = MLP()\n", 156 | "net(X)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "id": "584a9ee2", 162 | "metadata": { 163 | "slideshow": { 164 | "slide_type": "slide" 165 | } 166 | }, 167 | "source": [ 168 | "顺序块" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 5, 174 | "id": "9672de9a", 175 | "metadata": { 176 | "execution": { 177 | "iopub.execute_input": "2023-08-18T06:57:01.353302Z", 178 | "iopub.status.busy": "2023-08-18T06:57:01.352727Z", 179 | "iopub.status.idle": "2023-08-18T06:57:01.360268Z", 180 | "shell.execute_reply": "2023-08-18T06:57:01.359462Z" 181 | }, 182 | "origin_pos": 31, 183 | "tab": [ 184 | "pytorch" 185 | ] 186 | }, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "tensor([[ 2.2759e-01, -4.7003e-02, 4.2846e-01, -1.2546e-01, 1.5296e-01,\n", 192 | " 1.8972e-01, 9.7048e-02, 4.5479e-04, -3.7986e-02, 6.4842e-02],\n", 193 | " [ 2.7825e-01, -9.7517e-02, 4.8541e-01, -2.4519e-01, -8.4580e-02,\n", 194 | " 2.8538e-01, 3.6861e-02, 2.9411e-02, -1.0612e-01, 1.2620e-01]],\n", 195 | " grad_fn=)" 196 | ] 197 | }, 198 | "execution_count": 5, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "class MySequential(nn.Module):\n", 205 | " def __init__(self, *args):\n", 206 | " super().__init__()\n", 207 | " for idx, module in enumerate(args):\n", 208 | " self._modules[str(idx)] = module\n", 209 | "\n", 210 | " def forward(self, X):\n", 211 | " for block in self._modules.values():\n", 212 | " X = block(X)\n", 213 | " return X\n", 214 | "\n", 215 | "net = MySequential(nn.Linear(20, 256), nn.ReLU(), nn.Linear(256, 10))\n", 216 | "net(X)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "id": "3ce57d60", 222 | "metadata": { 223 | "slideshow": { 224 | "slide_type": "slide" 225 | } 226 | }, 227 | "source": [ 228 | "在前向传播函数中执行代码" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 7, 234 | "id": "00ebc567", 235 | "metadata": { 236 | "execution": { 237 | "iopub.execute_input": "2023-08-18T06:57:01.373508Z", 238 | "iopub.status.busy": "2023-08-18T06:57:01.372789Z", 239 | "iopub.status.idle": "2023-08-18T06:57:01.380049Z", 240 | "shell.execute_reply": "2023-08-18T06:57:01.379025Z" 241 | }, 242 | "origin_pos": 40, 243 | "tab": [ 244 | "pytorch" 245 | ] 246 | }, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "tensor(0.1862, grad_fn=)" 252 | ] 253 | }, 254 | "execution_count": 7, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "class FixedHiddenMLP(nn.Module):\n", 261 | " def __init__(self):\n", 262 | " super().__init__()\n", 263 | " self.rand_weight = torch.rand((20, 20), requires_grad=False)\n", 264 | " self.linear = nn.Linear(20, 20)\n", 265 | "\n", 266 | " def forward(self, X):\n", 267 | " X = self.linear(X)\n", 268 | " X = F.relu(torch.mm(X, self.rand_weight) + 1)\n", 269 | " X = self.linear(X)\n", 270 | " while X.abs().sum() > 1:\n", 271 | " X /= 2\n", 272 | " return X.sum()\n", 273 | "\n", 274 | "net = FixedHiddenMLP()\n", 275 | "net(X)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "id": "053d1e5a", 281 | "metadata": { 282 | "slideshow": { 283 | "slide_type": "slide" 284 | } 285 | }, 286 | "source": [ 287 | "混合搭配各种组合块的方法" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 8, 293 | "id": "6ca3b399", 294 | "metadata": { 295 | "execution": { 296 | "iopub.execute_input": "2023-08-18T06:57:01.384091Z", 297 | "iopub.status.busy": "2023-08-18T06:57:01.383236Z", 298 | "iopub.status.idle": "2023-08-18T06:57:01.394649Z", 299 | "shell.execute_reply": "2023-08-18T06:57:01.393535Z" 300 | }, 301 | "origin_pos": 43, 302 | "tab": [ 303 | "pytorch" 304 | ] 305 | }, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/plain": [ 310 | "tensor(0.2183, grad_fn=)" 311 | ] 312 | }, 313 | "execution_count": 8, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "class NestMLP(nn.Module):\n", 320 | " def __init__(self):\n", 321 | " super().__init__()\n", 322 | " self.net = nn.Sequential(nn.Linear(20, 64), nn.ReLU(),\n", 323 | " nn.Linear(64, 32), nn.ReLU())\n", 324 | " self.linear = nn.Linear(32, 16)\n", 325 | "\n", 326 | " def forward(self, X):\n", 327 | " return self.linear(self.net(X))\n", 328 | "\n", 329 | "chimera = nn.Sequential(NestMLP(), nn.Linear(16, 20), FixedHiddenMLP())\n", 330 | "chimera(X)" 331 | ] 332 | } 333 | ], 334 | "metadata": { 335 | "celltoolbar": "Slideshow", 336 | "language_info": { 337 | "name": "python" 338 | }, 339 | "required_libs": [], 340 | "rise": { 341 | "autolaunch": true, 342 | "enable_chalkboard": true, 343 | "overlay": "
", 344 | "scroll": true 345 | } 346 | }, 347 | "nbformat": 4, 348 | "nbformat_minor": 5 349 | } -------------------------------------------------------------------------------- /chapter_deep-learning-computation/parameters.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "23850d90", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 参数管理\n", 13 | "\n", 14 | "我们首先看一下具有单隐藏层的多层感知机" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "ab7ef7a0", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:01:09.649068Z", 24 | "iopub.status.busy": "2023-08-18T07:01:09.648305Z", 25 | "iopub.status.idle": "2023-08-18T07:01:10.928992Z", 26 | "shell.execute_reply": "2023-08-18T07:01:10.927959Z" 27 | }, 28 | "origin_pos": 2, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "tensor([[-0.0970],\n", 38 | " [-0.0827]], grad_fn=)" 39 | ] 40 | }, 41 | "execution_count": 1, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "import torch\n", 48 | "from torch import nn\n", 49 | "\n", 50 | "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(), nn.Linear(8, 1))\n", 51 | "X = torch.rand(size=(2, 4))\n", 52 | "net(X)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "id": "2dbaff55", 58 | "metadata": { 59 | "slideshow": { 60 | "slide_type": "slide" 61 | } 62 | }, 63 | "source": [ 64 | "参数访问" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 2, 70 | "id": "5e2fff9a", 71 | "metadata": { 72 | "execution": { 73 | "iopub.execute_input": "2023-08-18T07:01:10.933865Z", 74 | "iopub.status.busy": "2023-08-18T07:01:10.933267Z", 75 | "iopub.status.idle": "2023-08-18T07:01:10.939922Z", 76 | "shell.execute_reply": "2023-08-18T07:01:10.938931Z" 77 | }, 78 | "origin_pos": 7, 79 | "tab": [ 80 | "pytorch" 81 | ] 82 | }, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "OrderedDict([('weight', tensor([[-0.0427, -0.2939, -0.1894, 0.0220, -0.1709, -0.1522, -0.0334, -0.2263]])), ('bias', tensor([0.0887]))])\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "print(net[2].state_dict())" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "id": "f4e174dc", 99 | "metadata": { 100 | "slideshow": { 101 | "slide_type": "slide" 102 | } 103 | }, 104 | "source": [ 105 | "目标参数" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 3, 111 | "id": "d0682fff", 112 | "metadata": { 113 | "execution": { 114 | "iopub.execute_input": "2023-08-18T07:01:10.945104Z", 115 | "iopub.status.busy": "2023-08-18T07:01:10.944250Z", 116 | "iopub.status.idle": "2023-08-18T07:01:10.951764Z", 117 | "shell.execute_reply": "2023-08-18T07:01:10.950790Z" 118 | }, 119 | "origin_pos": 11, 120 | "tab": [ 121 | "pytorch" 122 | ] 123 | }, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "\n", 130 | "Parameter containing:\n", 131 | "tensor([0.0887], requires_grad=True)\n", 132 | "tensor([0.0887])\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "print(type(net[2].bias))\n", 138 | "print(net[2].bias)\n", 139 | "print(net[2].bias.data)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 4, 145 | "id": "3cf4d55b", 146 | "metadata": { 147 | "execution": { 148 | "iopub.execute_input": "2023-08-18T07:01:10.956378Z", 149 | "iopub.status.busy": "2023-08-18T07:01:10.955542Z", 150 | "iopub.status.idle": "2023-08-18T07:01:10.961810Z", 151 | "shell.execute_reply": "2023-08-18T07:01:10.960767Z" 152 | }, 153 | "origin_pos": 16, 154 | "tab": [ 155 | "pytorch" 156 | ] 157 | }, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "True" 163 | ] 164 | }, 165 | "execution_count": 4, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "net[2].weight.grad == None" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "id": "170b54ab", 177 | "metadata": { 178 | "slideshow": { 179 | "slide_type": "slide" 180 | } 181 | }, 182 | "source": [ 183 | "一次性访问所有参数" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 5, 189 | "id": "916939ce", 190 | "metadata": { 191 | "execution": { 192 | "iopub.execute_input": "2023-08-18T07:01:10.966725Z", 193 | "iopub.status.busy": "2023-08-18T07:01:10.965969Z", 194 | "iopub.status.idle": "2023-08-18T07:01:10.972600Z", 195 | "shell.execute_reply": "2023-08-18T07:01:10.971655Z" 196 | }, 197 | "origin_pos": 19, 198 | "tab": [ 199 | "pytorch" 200 | ] 201 | }, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "('weight', torch.Size([8, 4])) ('bias', torch.Size([8]))\n", 208 | "('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))\n" 209 | ] 210 | } 211 | ], 212 | "source": [ 213 | "print(*[(name, param.shape) for name, param in net[0].named_parameters()])\n", 214 | "print(*[(name, param.shape) for name, param in net.named_parameters()])" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 6, 220 | "id": "116207ef", 221 | "metadata": { 222 | "execution": { 223 | "iopub.execute_input": "2023-08-18T07:01:10.977269Z", 224 | "iopub.status.busy": "2023-08-18T07:01:10.976623Z", 225 | "iopub.status.idle": "2023-08-18T07:01:10.983222Z", 226 | "shell.execute_reply": "2023-08-18T07:01:10.982309Z" 227 | }, 228 | "origin_pos": 23, 229 | "tab": [ 230 | "pytorch" 231 | ] 232 | }, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "tensor([0.0887])" 238 | ] 239 | }, 240 | "execution_count": 6, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "net.state_dict()['2.bias'].data" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "id": "707279d0", 252 | "metadata": { 253 | "slideshow": { 254 | "slide_type": "slide" 255 | } 256 | }, 257 | "source": [ 258 | "从嵌套块收集参数" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 7, 264 | "id": "712e31fd", 265 | "metadata": { 266 | "execution": { 267 | "iopub.execute_input": "2023-08-18T07:01:10.988088Z", 268 | "iopub.status.busy": "2023-08-18T07:01:10.987352Z", 269 | "iopub.status.idle": "2023-08-18T07:01:10.998245Z", 270 | "shell.execute_reply": "2023-08-18T07:01:10.997197Z" 271 | }, 272 | "origin_pos": 28, 273 | "tab": [ 274 | "pytorch" 275 | ] 276 | }, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "tensor([[0.2596],\n", 282 | " [0.2596]], grad_fn=)" 283 | ] 284 | }, 285 | "execution_count": 7, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "def block1():\n", 292 | " return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n", 293 | " nn.Linear(8, 4), nn.ReLU())\n", 294 | "\n", 295 | "def block2():\n", 296 | " net = nn.Sequential()\n", 297 | " for i in range(4):\n", 298 | " net.add_module(f'block {i}', block1())\n", 299 | " return net\n", 300 | "\n", 301 | "rgnet = nn.Sequential(block2(), nn.Linear(4, 1))\n", 302 | "rgnet(X)" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "id": "df7a2644", 308 | "metadata": { 309 | "slideshow": { 310 | "slide_type": "slide" 311 | } 312 | }, 313 | "source": [ 314 | "设计了网络后,我们看看它是如何工作的" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 8, 320 | "id": "c7d7717d", 321 | "metadata": { 322 | "execution": { 323 | "iopub.execute_input": "2023-08-18T07:01:11.002889Z", 324 | "iopub.status.busy": "2023-08-18T07:01:11.002264Z", 325 | "iopub.status.idle": "2023-08-18T07:01:11.007643Z", 326 | "shell.execute_reply": "2023-08-18T07:01:11.006464Z" 327 | }, 328 | "origin_pos": 33, 329 | "tab": [ 330 | "pytorch" 331 | ] 332 | }, 333 | "outputs": [ 334 | { 335 | "name": "stdout", 336 | "output_type": "stream", 337 | "text": [ 338 | "Sequential(\n", 339 | " (0): Sequential(\n", 340 | " (block 0): Sequential(\n", 341 | " (0): Linear(in_features=4, out_features=8, bias=True)\n", 342 | " (1): ReLU()\n", 343 | " (2): Linear(in_features=8, out_features=4, bias=True)\n", 344 | " (3): ReLU()\n", 345 | " )\n", 346 | " (block 1): Sequential(\n", 347 | " (0): Linear(in_features=4, out_features=8, bias=True)\n", 348 | " (1): ReLU()\n", 349 | " (2): Linear(in_features=8, out_features=4, bias=True)\n", 350 | " (3): ReLU()\n", 351 | " )\n", 352 | " (block 2): Sequential(\n", 353 | " (0): Linear(in_features=4, out_features=8, bias=True)\n", 354 | " (1): ReLU()\n", 355 | " (2): Linear(in_features=8, out_features=4, bias=True)\n", 356 | " (3): ReLU()\n", 357 | " )\n", 358 | " (block 3): Sequential(\n", 359 | " (0): Linear(in_features=4, out_features=8, bias=True)\n", 360 | " (1): ReLU()\n", 361 | " (2): Linear(in_features=8, out_features=4, bias=True)\n", 362 | " (3): ReLU()\n", 363 | " )\n", 364 | " )\n", 365 | " (1): Linear(in_features=4, out_features=1, bias=True)\n", 366 | ")\n" 367 | ] 368 | } 369 | ], 370 | "source": [ 371 | "print(rgnet)" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 9, 377 | "id": "939ba4d3", 378 | "metadata": { 379 | "execution": { 380 | "iopub.execute_input": "2023-08-18T07:01:11.012522Z", 381 | "iopub.status.busy": "2023-08-18T07:01:11.011839Z", 382 | "iopub.status.idle": "2023-08-18T07:01:11.018508Z", 383 | "shell.execute_reply": "2023-08-18T07:01:11.017590Z" 384 | }, 385 | "origin_pos": 37, 386 | "tab": [ 387 | "pytorch" 388 | ] 389 | }, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "tensor([ 0.1999, -0.4073, -0.1200, -0.2033, -0.1573, 0.3546, -0.2141, -0.2483])" 395 | ] 396 | }, 397 | "execution_count": 9, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "rgnet[0][1][0].bias.data" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "id": "77b45fbb", 409 | "metadata": { 410 | "slideshow": { 411 | "slide_type": "slide" 412 | } 413 | }, 414 | "source": [ 415 | "内置初始化" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 10, 421 | "id": "2f00d5e7", 422 | "metadata": { 423 | "execution": { 424 | "iopub.execute_input": "2023-08-18T07:01:11.023955Z", 425 | "iopub.status.busy": "2023-08-18T07:01:11.023046Z", 426 | "iopub.status.idle": "2023-08-18T07:01:11.033287Z", 427 | "shell.execute_reply": "2023-08-18T07:01:11.032096Z" 428 | }, 429 | "origin_pos": 47, 430 | "tab": [ 431 | "pytorch" 432 | ] 433 | }, 434 | "outputs": [ 435 | { 436 | "data": { 437 | "text/plain": [ 438 | "(tensor([-0.0214, -0.0015, -0.0100, -0.0058]), tensor(0.))" 439 | ] 440 | }, 441 | "execution_count": 10, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "def init_normal(m):\n", 448 | " if type(m) == nn.Linear:\n", 449 | " nn.init.normal_(m.weight, mean=0, std=0.01)\n", 450 | " nn.init.zeros_(m.bias)\n", 451 | "net.apply(init_normal)\n", 452 | "net[0].weight.data[0], net[0].bias.data[0]" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 11, 458 | "id": "49ee306c", 459 | "metadata": { 460 | "execution": { 461 | "iopub.execute_input": "2023-08-18T07:01:11.038321Z", 462 | "iopub.status.busy": "2023-08-18T07:01:11.037607Z", 463 | "iopub.status.idle": "2023-08-18T07:01:11.049009Z", 464 | "shell.execute_reply": "2023-08-18T07:01:11.047793Z" 465 | }, 466 | "origin_pos": 52, 467 | "tab": [ 468 | "pytorch" 469 | ] 470 | }, 471 | "outputs": [ 472 | { 473 | "data": { 474 | "text/plain": [ 475 | "(tensor([1., 1., 1., 1.]), tensor(0.))" 476 | ] 477 | }, 478 | "execution_count": 11, 479 | "metadata": {}, 480 | "output_type": "execute_result" 481 | } 482 | ], 483 | "source": [ 484 | "def init_constant(m):\n", 485 | " if type(m) == nn.Linear:\n", 486 | " nn.init.constant_(m.weight, 1)\n", 487 | " nn.init.zeros_(m.bias)\n", 488 | "net.apply(init_constant)\n", 489 | "net[0].weight.data[0], net[0].bias.data[0]" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "id": "478059aa", 495 | "metadata": { 496 | "slideshow": { 497 | "slide_type": "slide" 498 | } 499 | }, 500 | "source": [ 501 | "对某些块应用不同的初始化方法" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 12, 507 | "id": "1a90ffaa", 508 | "metadata": { 509 | "execution": { 510 | "iopub.execute_input": "2023-08-18T07:01:11.054335Z", 511 | "iopub.status.busy": "2023-08-18T07:01:11.053550Z", 512 | "iopub.status.idle": "2023-08-18T07:01:11.063215Z", 513 | "shell.execute_reply": "2023-08-18T07:01:11.062244Z" 514 | }, 515 | "origin_pos": 57, 516 | "tab": [ 517 | "pytorch" 518 | ] 519 | }, 520 | "outputs": [ 521 | { 522 | "name": "stdout", 523 | "output_type": "stream", 524 | "text": [ 525 | "tensor([ 0.5236, 0.0516, -0.3236, 0.3794])\n", 526 | "tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])\n" 527 | ] 528 | } 529 | ], 530 | "source": [ 531 | "def init_xavier(m):\n", 532 | " if type(m) == nn.Linear:\n", 533 | " nn.init.xavier_uniform_(m.weight)\n", 534 | "def init_42(m):\n", 535 | " if type(m) == nn.Linear:\n", 536 | " nn.init.constant_(m.weight, 42)\n", 537 | "\n", 538 | "net[0].apply(init_xavier)\n", 539 | "net[2].apply(init_42)\n", 540 | "print(net[0].weight.data[0])\n", 541 | "print(net[2].weight.data)" 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "id": "0a70ae16", 547 | "metadata": { 548 | "slideshow": { 549 | "slide_type": "slide" 550 | } 551 | }, 552 | "source": [ 553 | "自定义初始化" 554 | ] 555 | }, 556 | { 557 | "cell_type": "code", 558 | "execution_count": 13, 559 | "id": "9166f6e3", 560 | "metadata": { 561 | "execution": { 562 | "iopub.execute_input": "2023-08-18T07:01:11.068164Z", 563 | "iopub.status.busy": "2023-08-18T07:01:11.067460Z", 564 | "iopub.status.idle": "2023-08-18T07:01:11.079228Z", 565 | "shell.execute_reply": "2023-08-18T07:01:11.078069Z" 566 | }, 567 | "origin_pos": 66, 568 | "tab": [ 569 | "pytorch" 570 | ] 571 | }, 572 | "outputs": [ 573 | { 574 | "name": "stdout", 575 | "output_type": "stream", 576 | "text": [ 577 | "Init weight torch.Size([8, 4])\n", 578 | "Init weight torch.Size([1, 8])\n" 579 | ] 580 | }, 581 | { 582 | "data": { 583 | "text/plain": [ 584 | "tensor([[5.4079, 9.3334, 5.0616, 8.3095],\n", 585 | " [0.0000, 7.2788, -0.0000, -0.0000]], grad_fn=)" 586 | ] 587 | }, 588 | "execution_count": 13, 589 | "metadata": {}, 590 | "output_type": "execute_result" 591 | } 592 | ], 593 | "source": [ 594 | "def my_init(m):\n", 595 | " if type(m) == nn.Linear:\n", 596 | " print(\"Init\", *[(name, param.shape)\n", 597 | " for name, param in m.named_parameters()][0])\n", 598 | " nn.init.uniform_(m.weight, -10, 10)\n", 599 | " m.weight.data *= m.weight.data.abs() >= 5\n", 600 | "\n", 601 | "net.apply(my_init)\n", 602 | "net[0].weight[:2]" 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "execution_count": 14, 608 | "id": "5b9af1f8", 609 | "metadata": { 610 | "execution": { 611 | "iopub.execute_input": "2023-08-18T07:01:11.084158Z", 612 | "iopub.status.busy": "2023-08-18T07:01:11.083416Z", 613 | "iopub.status.idle": "2023-08-18T07:01:11.092672Z", 614 | "shell.execute_reply": "2023-08-18T07:01:11.091537Z" 615 | }, 616 | "origin_pos": 71, 617 | "tab": [ 618 | "pytorch" 619 | ] 620 | }, 621 | "outputs": [ 622 | { 623 | "data": { 624 | "text/plain": [ 625 | "tensor([42.0000, 10.3334, 6.0616, 9.3095])" 626 | ] 627 | }, 628 | "execution_count": 14, 629 | "metadata": {}, 630 | "output_type": "execute_result" 631 | } 632 | ], 633 | "source": [ 634 | "net[0].weight.data[:] += 1\n", 635 | "net[0].weight.data[0, 0] = 42\n", 636 | "net[0].weight.data[0]" 637 | ] 638 | }, 639 | { 640 | "cell_type": "markdown", 641 | "id": "9031168e", 642 | "metadata": { 643 | "slideshow": { 644 | "slide_type": "slide" 645 | } 646 | }, 647 | "source": [ 648 | "参数绑定" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": 15, 654 | "id": "69660fa7", 655 | "metadata": { 656 | "execution": { 657 | "iopub.execute_input": "2023-08-18T07:01:11.097767Z", 658 | "iopub.status.busy": "2023-08-18T07:01:11.096948Z", 659 | "iopub.status.idle": "2023-08-18T07:01:11.108904Z", 660 | "shell.execute_reply": "2023-08-18T07:01:11.107763Z" 661 | }, 662 | "origin_pos": 77, 663 | "tab": [ 664 | "pytorch" 665 | ] 666 | }, 667 | "outputs": [ 668 | { 669 | "name": "stdout", 670 | "output_type": "stream", 671 | "text": [ 672 | "tensor([True, True, True, True, True, True, True, True])\n", 673 | "tensor([True, True, True, True, True, True, True, True])\n" 674 | ] 675 | } 676 | ], 677 | "source": [ 678 | "shared = nn.Linear(8, 8)\n", 679 | "net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),\n", 680 | " shared, nn.ReLU(),\n", 681 | " shared, nn.ReLU(),\n", 682 | " nn.Linear(8, 1))\n", 683 | "net(X)\n", 684 | "print(net[2].weight.data[0] == net[4].weight.data[0])\n", 685 | "net[2].weight.data[0, 0] = 100\n", 686 | "print(net[2].weight.data[0] == net[4].weight.data[0])" 687 | ] 688 | } 689 | ], 690 | "metadata": { 691 | "celltoolbar": "Slideshow", 692 | "language_info": { 693 | "name": "python" 694 | }, 695 | "required_libs": [], 696 | "rise": { 697 | "autolaunch": true, 698 | "enable_chalkboard": true, 699 | "overlay": "
", 700 | "scroll": true 701 | } 702 | }, 703 | "nbformat": 4, 704 | "nbformat_minor": 5 705 | } -------------------------------------------------------------------------------- /chapter_deep-learning-computation/read-write.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3c764167", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 读写文件\n", 13 | "\n", 14 | "加载和保存张量" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "1ab53461", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T06:56:43.733002Z", 24 | "iopub.status.busy": "2023-08-18T06:56:43.732347Z", 25 | "iopub.status.idle": "2023-08-18T06:56:43.741208Z", 26 | "shell.execute_reply": "2023-08-18T06:56:43.740416Z" 27 | }, 28 | "origin_pos": 7, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "tensor([0, 1, 2, 3])" 38 | ] 39 | }, 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "import torch\n", 47 | "from torch import nn\n", 48 | "from torch.nn import functional as F\n", 49 | "\n", 50 | "x = torch.arange(4)\n", 51 | "torch.save(x, 'x-file')\n", 52 | "\n", 53 | "x2 = torch.load('x-file')\n", 54 | "x2" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "id": "6c8adb8b", 60 | "metadata": { 61 | "slideshow": { 62 | "slide_type": "slide" 63 | } 64 | }, 65 | "source": [ 66 | "存储一个张量列表,然后把它们读回内存" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 3, 72 | "id": "81027fe1", 73 | "metadata": { 74 | "execution": { 75 | "iopub.execute_input": "2023-08-18T06:56:43.744676Z", 76 | "iopub.status.busy": "2023-08-18T06:56:43.744140Z", 77 | "iopub.status.idle": "2023-08-18T06:56:43.751376Z", 78 | "shell.execute_reply": "2023-08-18T06:56:43.750630Z" 79 | }, 80 | "origin_pos": 12, 81 | "tab": [ 82 | "pytorch" 83 | ] 84 | }, 85 | "outputs": [ 86 | { 87 | "data": { 88 | "text/plain": [ 89 | "(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))" 90 | ] 91 | }, 92 | "execution_count": 3, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "y = torch.zeros(4)\n", 99 | "torch.save([x, y],'x-files')\n", 100 | "x2, y2 = torch.load('x-files')\n", 101 | "(x2, y2)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "id": "47678e7f", 107 | "metadata": { 108 | "slideshow": { 109 | "slide_type": "-" 110 | } 111 | }, 112 | "source": [ 113 | "写入或读取从字符串映射到张量的字典" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 4, 119 | "id": "fde1cb33", 120 | "metadata": { 121 | "execution": { 122 | "iopub.execute_input": "2023-08-18T06:56:43.754777Z", 123 | "iopub.status.busy": "2023-08-18T06:56:43.754313Z", 124 | "iopub.status.idle": "2023-08-18T06:56:43.761150Z", 125 | "shell.execute_reply": "2023-08-18T06:56:43.760369Z" 126 | }, 127 | "origin_pos": 17, 128 | "tab": [ 129 | "pytorch" 130 | ] 131 | }, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}" 137 | ] 138 | }, 139 | "execution_count": 4, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "mydict = {'x': x, 'y': y}\n", 146 | "torch.save(mydict, 'mydict')\n", 147 | "mydict2 = torch.load('mydict')\n", 148 | "mydict2" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "id": "9cc9655d", 154 | "metadata": { 155 | "slideshow": { 156 | "slide_type": "slide" 157 | } 158 | }, 159 | "source": [ 160 | "加载和保存模型参数" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 5, 166 | "id": "2672b5c2", 167 | "metadata": { 168 | "execution": { 169 | "iopub.execute_input": "2023-08-18T06:56:43.764609Z", 170 | "iopub.status.busy": "2023-08-18T06:56:43.764090Z", 171 | "iopub.status.idle": "2023-08-18T06:56:43.773070Z", 172 | "shell.execute_reply": "2023-08-18T06:56:43.772277Z" 173 | }, 174 | "origin_pos": 22, 175 | "tab": [ 176 | "pytorch" 177 | ] 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "class MLP(nn.Module):\n", 182 | " def __init__(self):\n", 183 | " super().__init__()\n", 184 | " self.hidden = nn.Linear(20, 256)\n", 185 | " self.output = nn.Linear(256, 10)\n", 186 | "\n", 187 | " def forward(self, x):\n", 188 | " return self.output(F.relu(self.hidden(x)))\n", 189 | "\n", 190 | "net = MLP()\n", 191 | "X = torch.randn(size=(2, 20))\n", 192 | "Y = net(X)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "id": "72d77012", 198 | "metadata": { 199 | "slideshow": { 200 | "slide_type": "slide" 201 | } 202 | }, 203 | "source": [ 204 | "将模型的参数存储在一个叫做“mlp.params”的文件中" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 6, 210 | "id": "a53c1315", 211 | "metadata": { 212 | "execution": { 213 | "iopub.execute_input": "2023-08-18T06:56:43.776452Z", 214 | "iopub.status.busy": "2023-08-18T06:56:43.775942Z", 215 | "iopub.status.idle": "2023-08-18T06:56:43.780387Z", 216 | "shell.execute_reply": "2023-08-18T06:56:43.779636Z" 217 | }, 218 | "origin_pos": 27, 219 | "tab": [ 220 | "pytorch" 221 | ] 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "torch.save(net.state_dict(), 'mlp.params')" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "id": "647cbcef", 231 | "metadata": { 232 | "slideshow": { 233 | "slide_type": "slide" 234 | } 235 | }, 236 | "source": [ 237 | "实例化了原始多层感知机模型的一个备份。\n", 238 | "直接读取文件中存储的参数" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 7, 244 | "id": "da5e1b3f", 245 | "metadata": { 246 | "execution": { 247 | "iopub.execute_input": "2023-08-18T06:56:43.783850Z", 248 | "iopub.status.busy": "2023-08-18T06:56:43.783240Z", 249 | "iopub.status.idle": "2023-08-18T06:56:43.789905Z", 250 | "shell.execute_reply": "2023-08-18T06:56:43.789164Z" 251 | }, 252 | "origin_pos": 32, 253 | "tab": [ 254 | "pytorch" 255 | ] 256 | }, 257 | "outputs": [ 258 | { 259 | "data": { 260 | "text/plain": [ 261 | "MLP(\n", 262 | " (hidden): Linear(in_features=20, out_features=256, bias=True)\n", 263 | " (output): Linear(in_features=256, out_features=10, bias=True)\n", 264 | ")" 265 | ] 266 | }, 267 | "execution_count": 7, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "clone = MLP()\n", 274 | "clone.load_state_dict(torch.load('mlp.params'))\n", 275 | "clone.eval()" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 8, 281 | "id": "a25ba1f1", 282 | "metadata": { 283 | "execution": { 284 | "iopub.execute_input": "2023-08-18T06:56:43.793400Z", 285 | "iopub.status.busy": "2023-08-18T06:56:43.792788Z", 286 | "iopub.status.idle": "2023-08-18T06:56:43.798329Z", 287 | "shell.execute_reply": "2023-08-18T06:56:43.797576Z" 288 | }, 289 | "origin_pos": 37, 290 | "tab": [ 291 | "pytorch" 292 | ] 293 | }, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "tensor([[True, True, True, True, True, True, True, True, True, True],\n", 299 | " [True, True, True, True, True, True, True, True, True, True]])" 300 | ] 301 | }, 302 | "execution_count": 8, 303 | "metadata": {}, 304 | "output_type": "execute_result" 305 | } 306 | ], 307 | "source": [ 308 | "Y_clone = clone(X)\n", 309 | "Y_clone == Y" 310 | ] 311 | } 312 | ], 313 | "metadata": { 314 | "celltoolbar": "Slideshow", 315 | "language_info": { 316 | "name": "python" 317 | }, 318 | "required_libs": [], 319 | "rise": { 320 | "autolaunch": true, 321 | "enable_chalkboard": true, 322 | "overlay": "
", 323 | "scroll": true 324 | } 325 | }, 326 | "nbformat": 4, 327 | "nbformat_minor": 5 328 | } -------------------------------------------------------------------------------- /chapter_deep-learning-computation/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_deep-learning-computation/use-gpu.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b867430b", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# GPU\n", 13 | "\n", 14 | "查看显卡信息" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "369d9baa", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T06:58:06.499888Z", 24 | "iopub.status.busy": "2023-08-18T06:58:06.499324Z", 25 | "iopub.status.idle": "2023-08-18T06:58:06.859541Z", 26 | "shell.execute_reply": "2023-08-18T06:58:06.858210Z" 27 | }, 28 | "origin_pos": 1, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "Fri Aug 18 06:58:06 2023 \r\n", 39 | "+-----------------------------------------------------------------------------+\r\n", 40 | "| NVIDIA-SMI 470.161.03 Driver Version: 470.161.03 CUDA Version: 11.7 |\r\n", 41 | "|-------------------------------+----------------------+----------------------+\r\n", 42 | "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", 43 | "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", 44 | "| | | MIG M. |\r\n", 45 | "|===============================+======================+======================|\r\n", 46 | "| 0 Tesla V100-SXM2... Off | 00000000:00:1B.0 Off | 0 |\r\n", 47 | "| N/A 41C P0 42W / 300W | 0MiB / 16160MiB | 0% Default |\r\n", 48 | "| | | N/A |\r\n", 49 | "+-------------------------------+----------------------+----------------------+\r\n" 50 | ] 51 | }, 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "| 1 Tesla V100-SXM2... Off | 00000000:00:1C.0 Off | 0 |\r\n", 57 | "| N/A 44C P0 113W / 300W | 1456MiB / 16160MiB | 53% Default |\r\n", 58 | "| | | N/A |\r\n", 59 | "+-------------------------------+----------------------+----------------------+\r\n", 60 | "| 2 Tesla V100-SXM2... Off | 00000000:00:1D.0 Off | 0 |\r\n", 61 | "| N/A 43C P0 120W / 300W | 1358MiB / 16160MiB | 55% Default |\r\n", 62 | "| | | N/A |\r\n", 63 | "+-------------------------------+----------------------+----------------------+\r\n", 64 | "| 3 Tesla V100-SXM2... Off | 00000000:00:1E.0 Off | 0 |\r\n", 65 | "| N/A 42C P0 47W / 300W | 0MiB / 16160MiB | 0% Default |\r\n", 66 | "| | | N/A |\r\n", 67 | "+-------------------------------+----------------------+----------------------+\r\n", 68 | " \r\n", 69 | "+-----------------------------------------------------------------------------+\r\n", 70 | "| Processes: |\r\n", 71 | "| GPU GI CI PID Type Process name GPU Memory |\r\n", 72 | "| ID ID Usage |\r\n", 73 | "|=============================================================================|\r\n", 74 | "+-----------------------------------------------------------------------------+\r\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "!nvidia-smi" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "19db9813", 85 | "metadata": { 86 | "slideshow": { 87 | "slide_type": "slide" 88 | } 89 | }, 90 | "source": [ 91 | "计算设备" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 2, 97 | "id": "9f69ad46", 98 | "metadata": { 99 | "execution": { 100 | "iopub.execute_input": "2023-08-18T06:58:06.865430Z", 101 | "iopub.status.busy": "2023-08-18T06:58:06.864979Z", 102 | "iopub.status.idle": "2023-08-18T06:58:07.970615Z", 103 | "shell.execute_reply": "2023-08-18T06:58:07.969801Z" 104 | }, 105 | "origin_pos": 10, 106 | "tab": [ 107 | "pytorch" 108 | ] 109 | }, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))" 115 | ] 116 | }, 117 | "execution_count": 2, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "import torch\n", 124 | "from torch import nn\n", 125 | "\n", 126 | "torch.device('cpu'), torch.device('cuda'), torch.device('cuda:1')" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "id": "1e0f86ba", 132 | "metadata": { 133 | "slideshow": { 134 | "slide_type": "-" 135 | } 136 | }, 137 | "source": [ 138 | "查询可用gpu的数量" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 3, 144 | "id": "c29151b0", 145 | "metadata": { 146 | "execution": { 147 | "iopub.execute_input": "2023-08-18T06:58:07.974568Z", 148 | "iopub.status.busy": "2023-08-18T06:58:07.973917Z", 149 | "iopub.status.idle": "2023-08-18T06:58:07.979097Z", 150 | "shell.execute_reply": "2023-08-18T06:58:07.978337Z" 151 | }, 152 | "origin_pos": 15, 153 | "tab": [ 154 | "pytorch" 155 | ] 156 | }, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "2" 162 | ] 163 | }, 164 | "execution_count": 3, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "torch.cuda.device_count()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "id": "add6b576", 176 | "metadata": { 177 | "slideshow": { 178 | "slide_type": "slide" 179 | } 180 | }, 181 | "source": [ 182 | "这两个函数允许我们在不存在所需所有GPU的情况下运行代码" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 4, 188 | "id": "cda0ab76", 189 | "metadata": { 190 | "execution": { 191 | "iopub.execute_input": "2023-08-18T06:58:07.983261Z", 192 | "iopub.status.busy": "2023-08-18T06:58:07.982604Z", 193 | "iopub.status.idle": "2023-08-18T06:58:07.990309Z", 194 | "shell.execute_reply": "2023-08-18T06:58:07.989541Z" 195 | }, 196 | "origin_pos": 20, 197 | "tab": [ 198 | "pytorch" 199 | ] 200 | }, 201 | "outputs": [ 202 | { 203 | "data": { 204 | "text/plain": [ 205 | "(device(type='cuda', index=0),\n", 206 | " device(type='cpu'),\n", 207 | " [device(type='cuda', index=0), device(type='cuda', index=1)])" 208 | ] 209 | }, 210 | "execution_count": 4, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "def try_gpu(i=0): \n", 217 | " \"\"\"如果存在,则返回gpu(i),否则返回cpu()\"\"\"\n", 218 | " if torch.cuda.device_count() >= i + 1:\n", 219 | " return torch.device(f'cuda:{i}')\n", 220 | " return torch.device('cpu')\n", 221 | "\n", 222 | "def try_all_gpus(): \n", 223 | " \"\"\"返回所有可用的GPU,如果没有GPU,则返回[cpu(),]\"\"\"\n", 224 | " devices = [torch.device(f'cuda:{i}')\n", 225 | " for i in range(torch.cuda.device_count())]\n", 226 | " return devices if devices else [torch.device('cpu')]\n", 227 | "\n", 228 | "try_gpu(), try_gpu(10), try_all_gpus()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "id": "013f4e4b", 234 | "metadata": { 235 | "slideshow": { 236 | "slide_type": "slide" 237 | } 238 | }, 239 | "source": [ 240 | "查询张量所在的设备" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 5, 246 | "id": "f6ab0f26", 247 | "metadata": { 248 | "execution": { 249 | "iopub.execute_input": "2023-08-18T06:58:07.994741Z", 250 | "iopub.status.busy": "2023-08-18T06:58:07.994126Z", 251 | "iopub.status.idle": "2023-08-18T06:58:07.999439Z", 252 | "shell.execute_reply": "2023-08-18T06:58:07.998673Z" 253 | }, 254 | "origin_pos": 25, 255 | "tab": [ 256 | "pytorch" 257 | ] 258 | }, 259 | "outputs": [ 260 | { 261 | "data": { 262 | "text/plain": [ 263 | "device(type='cpu')" 264 | ] 265 | }, 266 | "execution_count": 5, 267 | "metadata": {}, 268 | "output_type": "execute_result" 269 | } 270 | ], 271 | "source": [ 272 | "x = torch.tensor([1, 2, 3])\n", 273 | "x.device" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "id": "9404d10b", 279 | "metadata": { 280 | "slideshow": { 281 | "slide_type": "slide" 282 | } 283 | }, 284 | "source": [ 285 | "存储在GPU上" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 6, 291 | "id": "a67dbf2f", 292 | "metadata": { 293 | "execution": { 294 | "iopub.execute_input": "2023-08-18T06:58:08.004162Z", 295 | "iopub.status.busy": "2023-08-18T06:58:08.003541Z", 296 | "iopub.status.idle": "2023-08-18T06:58:09.277879Z", 297 | "shell.execute_reply": "2023-08-18T06:58:09.277008Z" 298 | }, 299 | "origin_pos": 30, 300 | "tab": [ 301 | "pytorch" 302 | ] 303 | }, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "tensor([[1., 1., 1.],\n", 309 | " [1., 1., 1.]], device='cuda:0')" 310 | ] 311 | }, 312 | "execution_count": 6, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "X = torch.ones(2, 3, device=try_gpu())\n", 319 | "X" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "id": "5bf59dc4", 325 | "metadata": { 326 | "slideshow": { 327 | "slide_type": "-" 328 | } 329 | }, 330 | "source": [ 331 | "第二个GPU上创建一个随机张量" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 7, 337 | "id": "7c0d4a84", 338 | "metadata": { 339 | "execution": { 340 | "iopub.execute_input": "2023-08-18T06:58:09.282814Z", 341 | "iopub.status.busy": "2023-08-18T06:58:09.282230Z", 342 | "iopub.status.idle": "2023-08-18T06:58:10.279046Z", 343 | "shell.execute_reply": "2023-08-18T06:58:10.278227Z" 344 | }, 345 | "origin_pos": 35, 346 | "tab": [ 347 | "pytorch" 348 | ] 349 | }, 350 | "outputs": [ 351 | { 352 | "data": { 353 | "text/plain": [ 354 | "tensor([[0.4860, 0.1285, 0.0440],\n", 355 | " [0.9743, 0.4159, 0.9979]], device='cuda:1')" 356 | ] 357 | }, 358 | "execution_count": 7, 359 | "metadata": {}, 360 | "output_type": "execute_result" 361 | } 362 | ], 363 | "source": [ 364 | "Y = torch.rand(2, 3, device=try_gpu(1))\n", 365 | "Y" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "id": "397b13bf", 371 | "metadata": { 372 | "slideshow": { 373 | "slide_type": "slide" 374 | } 375 | }, 376 | "source": [ 377 | "要计算`X + Y`,我们需要决定在哪里执行这个操作" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 8, 383 | "id": "9e700cd2", 384 | "metadata": { 385 | "execution": { 386 | "iopub.execute_input": "2023-08-18T06:58:10.284097Z", 387 | "iopub.status.busy": "2023-08-18T06:58:10.283529Z", 388 | "iopub.status.idle": "2023-08-18T06:58:10.290795Z", 389 | "shell.execute_reply": "2023-08-18T06:58:10.290007Z" 390 | }, 391 | "origin_pos": 40, 392 | "tab": [ 393 | "pytorch" 394 | ] 395 | }, 396 | "outputs": [ 397 | { 398 | "name": "stdout", 399 | "output_type": "stream", 400 | "text": [ 401 | "tensor([[1., 1., 1.],\n", 402 | " [1., 1., 1.]], device='cuda:0')\n", 403 | "tensor([[1., 1., 1.],\n", 404 | " [1., 1., 1.]], device='cuda:1')\n" 405 | ] 406 | } 407 | ], 408 | "source": [ 409 | "Z = X.cuda(1)\n", 410 | "print(X)\n", 411 | "print(Z)" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "id": "a8407698", 417 | "metadata": { 418 | "slideshow": { 419 | "slide_type": "slide" 420 | } 421 | }, 422 | "source": [ 423 | "现在数据在同一个GPU上(`Z`和`Y`都在),我们可以将它们相加" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 9, 429 | "id": "b2f04f35", 430 | "metadata": { 431 | "execution": { 432 | "iopub.execute_input": "2023-08-18T06:58:10.295377Z", 433 | "iopub.status.busy": "2023-08-18T06:58:10.294845Z", 434 | "iopub.status.idle": "2023-08-18T06:58:10.301122Z", 435 | "shell.execute_reply": "2023-08-18T06:58:10.300297Z" 436 | }, 437 | "origin_pos": 43, 438 | "tab": [ 439 | "pytorch" 440 | ] 441 | }, 442 | "outputs": [ 443 | { 444 | "data": { 445 | "text/plain": [ 446 | "tensor([[1.4860, 1.1285, 1.0440],\n", 447 | " [1.9743, 1.4159, 1.9979]], device='cuda:1')" 448 | ] 449 | }, 450 | "execution_count": 9, 451 | "metadata": {}, 452 | "output_type": "execute_result" 453 | } 454 | ], 455 | "source": [ 456 | "Y + Z" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 10, 462 | "id": "d6b95aa1", 463 | "metadata": { 464 | "execution": { 465 | "iopub.execute_input": "2023-08-18T06:58:10.305143Z", 466 | "iopub.status.busy": "2023-08-18T06:58:10.304592Z", 467 | "iopub.status.idle": "2023-08-18T06:58:10.309707Z", 468 | "shell.execute_reply": "2023-08-18T06:58:10.308894Z" 469 | }, 470 | "origin_pos": 48, 471 | "tab": [ 472 | "pytorch" 473 | ] 474 | }, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/plain": [ 479 | "True" 480 | ] 481 | }, 482 | "execution_count": 10, 483 | "metadata": {}, 484 | "output_type": "execute_result" 485 | } 486 | ], 487 | "source": [ 488 | "Z.cuda(1) is Z" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "id": "86e67482", 494 | "metadata": { 495 | "slideshow": { 496 | "slide_type": "slide" 497 | } 498 | }, 499 | "source": [ 500 | "神经网络与GPU" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": 12, 506 | "id": "955f7f67", 507 | "metadata": { 508 | "execution": { 509 | "iopub.execute_input": "2023-08-18T06:58:10.340989Z", 510 | "iopub.status.busy": "2023-08-18T06:58:10.340312Z", 511 | "iopub.status.idle": "2023-08-18T06:58:10.930969Z", 512 | "shell.execute_reply": "2023-08-18T06:58:10.930143Z" 513 | }, 514 | "origin_pos": 56, 515 | "tab": [ 516 | "pytorch" 517 | ] 518 | }, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "tensor([[-0.4275],\n", 524 | " [-0.4275]], device='cuda:0', grad_fn=)" 525 | ] 526 | }, 527 | "execution_count": 12, 528 | "metadata": {}, 529 | "output_type": "execute_result" 530 | } 531 | ], 532 | "source": [ 533 | "net = nn.Sequential(nn.Linear(3, 1))\n", 534 | "net = net.to(device=try_gpu())\n", 535 | "\n", 536 | "net(X)" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "id": "b1ae89a4", 542 | "metadata": { 543 | "slideshow": { 544 | "slide_type": "-" 545 | } 546 | }, 547 | "source": [ 548 | "确认模型参数存储在同一个GPU上" 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": 13, 554 | "id": "bd727993", 555 | "metadata": { 556 | "execution": { 557 | "iopub.execute_input": "2023-08-18T06:58:10.935087Z", 558 | "iopub.status.busy": "2023-08-18T06:58:10.934497Z", 559 | "iopub.status.idle": "2023-08-18T06:58:10.939740Z", 560 | "shell.execute_reply": "2023-08-18T06:58:10.938974Z" 561 | }, 562 | "origin_pos": 59, 563 | "tab": [ 564 | "pytorch" 565 | ] 566 | }, 567 | "outputs": [ 568 | { 569 | "data": { 570 | "text/plain": [ 571 | "device(type='cuda', index=0)" 572 | ] 573 | }, 574 | "execution_count": 13, 575 | "metadata": {}, 576 | "output_type": "execute_result" 577 | } 578 | ], 579 | "source": [ 580 | "net[0].weight.data.device" 581 | ] 582 | } 583 | ], 584 | "metadata": { 585 | "celltoolbar": "Slideshow", 586 | "language_info": { 587 | "name": "python" 588 | }, 589 | "required_libs": [], 590 | "rise": { 591 | "autolaunch": true, 592 | "enable_chalkboard": true, 593 | "overlay": "
", 594 | "scroll": true 595 | } 596 | }, 597 | "nbformat": 4, 598 | "nbformat_minor": 5 599 | } -------------------------------------------------------------------------------- /chapter_linear-networks/linear-regression-concise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a5b9ec6e", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "slide" 9 | } 10 | }, 11 | "source": [ 12 | "# 线性回归的简洁实现\n", 13 | "\n", 14 | "通过使用深度学习框架来简洁地实现\n", 15 | "线性回归模型\n", 16 | "生成数据集" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "id": "c26b741f", 23 | "metadata": { 24 | "execution": { 25 | "iopub.execute_input": "2023-08-18T07:01:54.616404Z", 26 | "iopub.status.busy": "2023-08-18T07:01:54.615685Z", 27 | "iopub.status.idle": "2023-08-18T07:01:54.643472Z", 28 | "shell.execute_reply": "2023-08-18T07:01:54.642512Z" 29 | }, 30 | "origin_pos": 5, 31 | "tab": [ 32 | "pytorch" 33 | ] 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import numpy as np\n", 38 | "import torch\n", 39 | "from torch.utils import data\n", 40 | "from d2l import torch as d2l\n", 41 | "\n", 42 | "true_w = torch.tensor([2, -3.4])\n", 43 | "true_b = 4.2\n", 44 | "features, labels = d2l.synthetic_data(true_w, true_b, 1000)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "25eda004", 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "source": [ 56 | "调用框架中现有的API来读取数据" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 5, 62 | "id": "7c6919b8", 63 | "metadata": { 64 | "execution": { 65 | "iopub.execute_input": "2023-08-18T07:01:54.665574Z", 66 | "iopub.status.busy": "2023-08-18T07:01:54.664999Z", 67 | "iopub.status.idle": "2023-08-18T07:01:54.673523Z", 68 | "shell.execute_reply": "2023-08-18T07:01:54.672688Z" 69 | }, 70 | "origin_pos": 13, 71 | "tab": [ 72 | "pytorch" 73 | ] 74 | }, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "[tensor([[-1.3116, -0.3062],\n", 80 | " [-1.5653, 0.4830],\n", 81 | " [-0.8893, -0.9466],\n", 82 | " [-1.2417, 1.6891],\n", 83 | " [-0.7148, 0.1376],\n", 84 | " [-0.2162, -0.6122],\n", 85 | " [ 2.4048, -0.3211],\n", 86 | " [-0.1516, 0.4997],\n", 87 | " [ 1.5298, -0.2291],\n", 88 | " [ 1.3895, 1.2602]]),\n", 89 | " tensor([[ 2.6073],\n", 90 | " [-0.5787],\n", 91 | " [ 5.6339],\n", 92 | " [-4.0211],\n", 93 | " [ 2.3117],\n", 94 | " [ 5.8492],\n", 95 | " [10.0926],\n", 96 | " [ 2.1932],\n", 97 | " [ 8.0441],\n", 98 | " [ 2.6943]])]" 99 | ] 100 | }, 101 | "execution_count": 5, 102 | "metadata": {}, 103 | "output_type": "execute_result" 104 | } 105 | ], 106 | "source": [ 107 | "def load_array(data_arrays, batch_size, is_train=True): \n", 108 | " \"\"\"构造一个PyTorch数据迭代器\"\"\"\n", 109 | " dataset = data.TensorDataset(*data_arrays)\n", 110 | " return data.DataLoader(dataset, batch_size, shuffle=is_train)\n", 111 | "\n", 112 | "batch_size = 10\n", 113 | "data_iter = load_array((features, labels), batch_size)\n", 114 | "\n", 115 | "next(iter(data_iter))" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "id": "b4c6012b", 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "slide" 124 | } 125 | }, 126 | "source": [ 127 | "使用框架的预定义好的层" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 6, 133 | "id": "85c54a1a", 134 | "metadata": { 135 | "execution": { 136 | "iopub.execute_input": "2023-08-18T07:01:54.677177Z", 137 | "iopub.status.busy": "2023-08-18T07:01:54.676580Z", 138 | "iopub.status.idle": "2023-08-18T07:01:54.680914Z", 139 | "shell.execute_reply": "2023-08-18T07:01:54.680130Z" 140 | }, 141 | "origin_pos": 20, 142 | "tab": [ 143 | "pytorch" 144 | ] 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "from torch import nn\n", 149 | "\n", 150 | "net = nn.Sequential(nn.Linear(2, 1))" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "9bf96a4d", 156 | "metadata": { 157 | "slideshow": { 158 | "slide_type": "-" 159 | } 160 | }, 161 | "source": [ 162 | "初始化模型参数" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 7, 168 | "id": "31716c55", 169 | "metadata": { 170 | "execution": { 171 | "iopub.execute_input": "2023-08-18T07:01:54.684561Z", 172 | "iopub.status.busy": "2023-08-18T07:01:54.684036Z", 173 | "iopub.status.idle": "2023-08-18T07:01:54.690673Z", 174 | "shell.execute_reply": "2023-08-18T07:01:54.689754Z" 175 | }, 176 | "origin_pos": 29, 177 | "tab": [ 178 | "pytorch" 179 | ] 180 | }, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "tensor([0.])" 186 | ] 187 | }, 188 | "execution_count": 7, 189 | "metadata": {}, 190 | "output_type": "execute_result" 191 | } 192 | ], 193 | "source": [ 194 | "net[0].weight.data.normal_(0, 0.01)\n", 195 | "net[0].bias.data.fill_(0)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "id": "11c01887", 201 | "metadata": { 202 | "slideshow": { 203 | "slide_type": "slide" 204 | } 205 | }, 206 | "source": [ 207 | "计算均方误差使用的是`MSELoss`类,也称为平方$L_2$范数" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 8, 213 | "id": "19a417ac", 214 | "metadata": { 215 | "execution": { 216 | "iopub.execute_input": "2023-08-18T07:01:54.695575Z", 217 | "iopub.status.busy": "2023-08-18T07:01:54.694922Z", 218 | "iopub.status.idle": "2023-08-18T07:01:54.699373Z", 219 | "shell.execute_reply": "2023-08-18T07:01:54.698348Z" 220 | }, 221 | "origin_pos": 41, 222 | "tab": [ 223 | "pytorch" 224 | ] 225 | }, 226 | "outputs": [], 227 | "source": [ 228 | "loss = nn.MSELoss()" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "id": "68315d47", 234 | "metadata": { 235 | "slideshow": { 236 | "slide_type": "-" 237 | } 238 | }, 239 | "source": [ 240 | "实例化一个`SGD`实例" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 9, 246 | "id": "1ae0989f", 247 | "metadata": { 248 | "execution": { 249 | "iopub.execute_input": "2023-08-18T07:01:54.703905Z", 250 | "iopub.status.busy": "2023-08-18T07:01:54.703368Z", 251 | "iopub.status.idle": "2023-08-18T07:01:54.708081Z", 252 | "shell.execute_reply": "2023-08-18T07:01:54.706987Z" 253 | }, 254 | "origin_pos": 50, 255 | "tab": [ 256 | "pytorch" 257 | ] 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "trainer = torch.optim.SGD(net.parameters(), lr=0.03)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "id": "d5991157", 267 | "metadata": { 268 | "slideshow": { 269 | "slide_type": "slide" 270 | } 271 | }, 272 | "source": [ 273 | "训练过程代码与我们从零开始实现时所做的非常相似" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 10, 279 | "id": "1270d706", 280 | "metadata": { 281 | "execution": { 282 | "iopub.execute_input": "2023-08-18T07:01:54.712705Z", 283 | "iopub.status.busy": "2023-08-18T07:01:54.712113Z", 284 | "iopub.status.idle": "2023-08-18T07:01:54.922720Z", 285 | "shell.execute_reply": "2023-08-18T07:01:54.921580Z" 286 | }, 287 | "origin_pos": 55, 288 | "tab": [ 289 | "pytorch" 290 | ] 291 | }, 292 | "outputs": [ 293 | { 294 | "name": "stdout", 295 | "output_type": "stream", 296 | "text": [ 297 | "epoch 1, loss 0.000248\n", 298 | "epoch 2, loss 0.000103\n", 299 | "epoch 3, loss 0.000103\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "num_epochs = 3\n", 305 | "for epoch in range(num_epochs):\n", 306 | " for X, y in data_iter:\n", 307 | " l = loss(net(X) ,y)\n", 308 | " trainer.zero_grad()\n", 309 | " l.backward()\n", 310 | " trainer.step()\n", 311 | " l = loss(net(features), labels)\n", 312 | " print(f'epoch {epoch + 1}, loss {l:f}')" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "id": "7c8e4f61", 318 | "metadata": { 319 | "slideshow": { 320 | "slide_type": "slide" 321 | } 322 | }, 323 | "source": [ 324 | "比较生成数据集的真实参数和通过有限数据训练获得的模型参数" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 11, 330 | "id": "aa7cef5a", 331 | "metadata": { 332 | "execution": { 333 | "iopub.execute_input": "2023-08-18T07:01:54.927464Z", 334 | "iopub.status.busy": "2023-08-18T07:01:54.927072Z", 335 | "iopub.status.idle": "2023-08-18T07:01:54.935672Z", 336 | "shell.execute_reply": "2023-08-18T07:01:54.934585Z" 337 | }, 338 | "origin_pos": 60, 339 | "tab": [ 340 | "pytorch" 341 | ] 342 | }, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | "w的估计误差: tensor([-0.0010, -0.0003])\n", 349 | "b的估计误差: tensor([-0.0003])\n" 350 | ] 351 | } 352 | ], 353 | "source": [ 354 | "w = net[0].weight.data\n", 355 | "print('w的估计误差:', true_w - w.reshape(true_w.shape))\n", 356 | "b = net[0].bias.data\n", 357 | "print('b的估计误差:', true_b - b)" 358 | ] 359 | } 360 | ], 361 | "metadata": { 362 | "celltoolbar": "Slideshow", 363 | "language_info": { 364 | "name": "python" 365 | }, 366 | "required_libs": [], 367 | "rise": { 368 | "autolaunch": true, 369 | "enable_chalkboard": true, 370 | "overlay": "
", 371 | "scroll": true 372 | } 373 | }, 374 | "nbformat": 4, 375 | "nbformat_minor": 5 376 | } -------------------------------------------------------------------------------- /chapter_linear-networks/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-applications/natural-language-inference-and-dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "e030be85", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "slide" 9 | } 10 | }, 11 | "source": [ 12 | "# 自然语言推断与数据集\n", 13 | "\n", 14 | "斯坦福自然语言推断语料库(Stanford Natural Language Inference,SNLI)" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "85ccbfd4", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:06:00.201212Z", 24 | "iopub.status.busy": "2023-08-18T07:06:00.200144Z", 25 | "iopub.status.idle": "2023-08-18T07:06:09.370822Z", 26 | "shell.execute_reply": "2023-08-18T07:06:09.368591Z" 27 | }, 28 | "origin_pos": 2, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "import re\n", 37 | "import torch\n", 38 | "from torch import nn\n", 39 | "from d2l import torch as d2l\n", 40 | "\n", 41 | "d2l.DATA_HUB['SNLI'] = (\n", 42 | " 'https://nlp.stanford.edu/projects/snli/snli_1.0.zip',\n", 43 | " '9fcde07509c7e87ec61c640c1b2753d9041758e4')\n", 44 | "\n", 45 | "data_dir = d2l.download_extract('SNLI')" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "id": "4b6dcd15", 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "slide" 54 | } 55 | }, 56 | "source": [ 57 | "读取数据集" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 2, 63 | "id": "fa839f80", 64 | "metadata": { 65 | "execution": { 66 | "iopub.execute_input": "2023-08-18T07:06:09.377922Z", 67 | "iopub.status.busy": "2023-08-18T07:06:09.377380Z", 68 | "iopub.status.idle": "2023-08-18T07:06:09.392203Z", 69 | "shell.execute_reply": "2023-08-18T07:06:09.390984Z" 70 | }, 71 | "origin_pos": 5, 72 | "tab": [ 73 | "pytorch" 74 | ] 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "def read_snli(data_dir, is_train):\n", 79 | " \"\"\"将SNLI数据集解析为前提、假设和标签\"\"\"\n", 80 | " def extract_text(s):\n", 81 | " s = re.sub('\\\\(', '', s)\n", 82 | " s = re.sub('\\\\)', '', s)\n", 83 | " s = re.sub('\\\\s{2,}', ' ', s)\n", 84 | " return s.strip()\n", 85 | " label_set = {'entailment': 0, 'contradiction': 1, 'neutral': 2}\n", 86 | " file_name = os.path.join(data_dir, 'snli_1.0_train.txt'\n", 87 | " if is_train else 'snli_1.0_test.txt')\n", 88 | " with open(file_name, 'r') as f:\n", 89 | " rows = [row.split('\\t') for row in f.readlines()[1:]]\n", 90 | " premises = [extract_text(row[1]) for row in rows if row[0] in label_set]\n", 91 | " hypotheses = [extract_text(row[2]) for row in rows if row[0] \\\n", 92 | " in label_set]\n", 93 | " labels = [label_set[row[0]] for row in rows if row[0] in label_set]\n", 94 | " return premises, hypotheses, labels" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "ab3a4dc3", 100 | "metadata": { 101 | "slideshow": { 102 | "slide_type": "slide" 103 | } 104 | }, 105 | "source": [ 106 | "打印前3对" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "id": "19101f9e", 113 | "metadata": { 114 | "execution": { 115 | "iopub.execute_input": "2023-08-18T07:06:09.397297Z", 116 | "iopub.status.busy": "2023-08-18T07:06:09.396407Z", 117 | "iopub.status.idle": "2023-08-18T07:06:23.206512Z", 118 | "shell.execute_reply": "2023-08-18T07:06:23.205574Z" 119 | }, 120 | "origin_pos": 7, 121 | "tab": [ 122 | "pytorch" 123 | ] 124 | }, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "前提: A person on a horse jumps over a broken down airplane .\n", 131 | "假设: A person is training his horse for a competition .\n", 132 | "标签: 2\n", 133 | "前提: A person on a horse jumps over a broken down airplane .\n", 134 | "假设: A person is at a diner , ordering an omelette .\n", 135 | "标签: 1\n", 136 | "前提: A person on a horse jumps over a broken down airplane .\n", 137 | "假设: A person is outdoors , on a horse .\n", 138 | "标签: 0\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "train_data = read_snli(data_dir, is_train=True)\n", 144 | "for x0, x1, y in zip(train_data[0][:3], train_data[1][:3], train_data[2][:3]):\n", 145 | " print('前提:', x0)\n", 146 | " print('假设:', x1)\n", 147 | " print('标签:', y)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "id": "ffb6b0b0", 153 | "metadata": { 154 | "slideshow": { 155 | "slide_type": "slide" 156 | } 157 | }, 158 | "source": [ 159 | "标签“蕴涵”“矛盾”和“中性”是平衡的" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 4, 165 | "id": "972ca3d1", 166 | "metadata": { 167 | "execution": { 168 | "iopub.execute_input": "2023-08-18T07:06:23.210300Z", 169 | "iopub.status.busy": "2023-08-18T07:06:23.209728Z", 170 | "iopub.status.idle": "2023-08-18T07:06:23.531128Z", 171 | "shell.execute_reply": "2023-08-18T07:06:23.530246Z" 172 | }, 173 | "origin_pos": 9, 174 | "tab": [ 175 | "pytorch" 176 | ] 177 | }, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "[183416, 183187, 182764]\n", 184 | "[3368, 3237, 3219]\n" 185 | ] 186 | } 187 | ], 188 | "source": [ 189 | "test_data = read_snli(data_dir, is_train=False)\n", 190 | "for data in [train_data, test_data]:\n", 191 | " print([[row for row in data[2]].count(i) for i in range(3)])" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "id": "8fe876db", 197 | "metadata": { 198 | "slideshow": { 199 | "slide_type": "slide" 200 | } 201 | }, 202 | "source": [ 203 | "定义用于加载数据集的类" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 5, 209 | "id": "b8b15f65", 210 | "metadata": { 211 | "execution": { 212 | "iopub.execute_input": "2023-08-18T07:06:23.534933Z", 213 | "iopub.status.busy": "2023-08-18T07:06:23.534365Z", 214 | "iopub.status.idle": "2023-08-18T07:06:23.542550Z", 215 | "shell.execute_reply": "2023-08-18T07:06:23.541714Z" 216 | }, 217 | "origin_pos": 12, 218 | "tab": [ 219 | "pytorch" 220 | ] 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "class SNLIDataset(torch.utils.data.Dataset):\n", 225 | " \"\"\"用于加载SNLI数据集的自定义数据集\"\"\"\n", 226 | " def __init__(self, dataset, num_steps, vocab=None):\n", 227 | " self.num_steps = num_steps\n", 228 | " all_premise_tokens = d2l.tokenize(dataset[0])\n", 229 | " all_hypothesis_tokens = d2l.tokenize(dataset[1])\n", 230 | " if vocab is None:\n", 231 | " self.vocab = d2l.Vocab(all_premise_tokens + \\\n", 232 | " all_hypothesis_tokens, min_freq=5, reserved_tokens=[''])\n", 233 | " else:\n", 234 | " self.vocab = vocab\n", 235 | " self.premises = self._pad(all_premise_tokens)\n", 236 | " self.hypotheses = self._pad(all_hypothesis_tokens)\n", 237 | " self.labels = torch.tensor(dataset[2])\n", 238 | " print('read ' + str(len(self.premises)) + ' examples')\n", 239 | "\n", 240 | " def _pad(self, lines):\n", 241 | " return torch.tensor([d2l.truncate_pad(\n", 242 | " self.vocab[line], self.num_steps, self.vocab[''])\n", 243 | " for line in lines])\n", 244 | "\n", 245 | " def __getitem__(self, idx):\n", 246 | " return (self.premises[idx], self.hypotheses[idx]), self.labels[idx]\n", 247 | "\n", 248 | " def __len__(self):\n", 249 | " return len(self.premises)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "id": "f4ab0616", 255 | "metadata": { 256 | "slideshow": { 257 | "slide_type": "slide" 258 | } 259 | }, 260 | "source": [ 261 | "整合代码" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 7, 267 | "id": "08d0c755", 268 | "metadata": { 269 | "execution": { 270 | "iopub.execute_input": "2023-08-18T07:06:23.554839Z", 271 | "iopub.status.busy": "2023-08-18T07:06:23.554288Z", 272 | "iopub.status.idle": "2023-08-18T07:07:02.488484Z", 273 | "shell.execute_reply": "2023-08-18T07:07:02.487658Z" 274 | }, 275 | "origin_pos": 19, 276 | "tab": [ 277 | "pytorch" 278 | ] 279 | }, 280 | "outputs": [ 281 | { 282 | "name": "stdout", 283 | "output_type": "stream", 284 | "text": [ 285 | "read 549367 examples\n" 286 | ] 287 | }, 288 | { 289 | "name": "stdout", 290 | "output_type": "stream", 291 | "text": [ 292 | "read 9824 examples\n" 293 | ] 294 | }, 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "18678" 299 | ] 300 | }, 301 | "execution_count": 7, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "def load_data_snli(batch_size, num_steps=50):\n", 308 | " \"\"\"下载SNLI数据集并返回数据迭代器和词表\"\"\"\n", 309 | " num_workers = d2l.get_dataloader_workers()\n", 310 | " data_dir = d2l.download_extract('SNLI')\n", 311 | " train_data = read_snli(data_dir, True)\n", 312 | " test_data = read_snli(data_dir, False)\n", 313 | " train_set = SNLIDataset(train_data, num_steps)\n", 314 | " test_set = SNLIDataset(test_data, num_steps, train_set.vocab)\n", 315 | " train_iter = torch.utils.data.DataLoader(train_set, batch_size,\n", 316 | " shuffle=True,\n", 317 | " num_workers=num_workers)\n", 318 | " test_iter = torch.utils.data.DataLoader(test_set, batch_size,\n", 319 | " shuffle=False,\n", 320 | " num_workers=num_workers)\n", 321 | " return train_iter, test_iter, train_set.vocab\n", 322 | "\n", 323 | "train_iter, test_iter, vocab = load_data_snli(128, 50)\n", 324 | "len(vocab)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 8, 330 | "id": "d7411a33", 331 | "metadata": { 332 | "execution": { 333 | "iopub.execute_input": "2023-08-18T07:07:02.492220Z", 334 | "iopub.status.busy": "2023-08-18T07:07:02.491909Z", 335 | "iopub.status.idle": "2023-08-18T07:07:02.966465Z", 336 | "shell.execute_reply": "2023-08-18T07:07:02.965137Z" 337 | }, 338 | "origin_pos": 21, 339 | "tab": [ 340 | "pytorch" 341 | ] 342 | }, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | "torch.Size([128, 50])\n", 349 | "torch.Size([128, 50])\n", 350 | "torch.Size([128])\n" 351 | ] 352 | } 353 | ], 354 | "source": [ 355 | "for X, Y in train_iter:\n", 356 | " print(X[0].shape)\n", 357 | " print(X[1].shape)\n", 358 | " print(Y.shape)\n", 359 | " break" 360 | ] 361 | } 362 | ], 363 | "metadata": { 364 | "celltoolbar": "Slideshow", 365 | "language_info": { 366 | "name": "python" 367 | }, 368 | "required_libs": [], 369 | "rise": { 370 | "autolaunch": true, 371 | "enable_chalkboard": true, 372 | "overlay": "
", 373 | "scroll": true 374 | } 375 | }, 376 | "nbformat": 4, 377 | "nbformat_minor": 5 378 | } -------------------------------------------------------------------------------- /chapter_natural-language-processing-applications/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_preliminaries/autograd.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "807b7862", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 自动微分\n", 13 | "\n", 14 | "假设我们想对函数$y=2\\mathbf{x}^{\\top}\\mathbf{x}$关于列向量$\\mathbf{x}$求导" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "98cd8a9e", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:07:31.627945Z", 24 | "iopub.status.busy": "2023-08-18T07:07:31.627424Z", 25 | "iopub.status.idle": "2023-08-18T07:07:32.686372Z", 26 | "shell.execute_reply": "2023-08-18T07:07:32.685559Z" 27 | }, 28 | "origin_pos": 2, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "tensor([0., 1., 2., 3.])" 38 | ] 39 | }, 40 | "execution_count": 1, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "import torch\n", 47 | "\n", 48 | "x = torch.arange(4.0)\n", 49 | "x" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "4bd68367", 55 | "metadata": { 56 | "slideshow": { 57 | "slide_type": "slide" 58 | } 59 | }, 60 | "source": [ 61 | "在我们计算$y$关于$\\mathbf{x}$的梯度之前,需要一个地方来存储梯度" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 2, 67 | "id": "e27a5df4", 68 | "metadata": { 69 | "execution": { 70 | "iopub.execute_input": "2023-08-18T07:07:32.690633Z", 71 | "iopub.status.busy": "2023-08-18T07:07:32.689882Z", 72 | "iopub.status.idle": "2023-08-18T07:07:32.694159Z", 73 | "shell.execute_reply": "2023-08-18T07:07:32.693367Z" 74 | }, 75 | "origin_pos": 7, 76 | "tab": [ 77 | "pytorch" 78 | ] 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "x.requires_grad_(True)\n", 83 | "x.grad" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "aeab4090", 89 | "metadata": { 90 | "slideshow": { 91 | "slide_type": "-" 92 | } 93 | }, 94 | "source": [ 95 | "现在计算$y$" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 3, 101 | "id": "4c3f80b7", 102 | "metadata": { 103 | "execution": { 104 | "iopub.execute_input": "2023-08-18T07:07:32.698006Z", 105 | "iopub.status.busy": "2023-08-18T07:07:32.697167Z", 106 | "iopub.status.idle": "2023-08-18T07:07:32.705385Z", 107 | "shell.execute_reply": "2023-08-18T07:07:32.704593Z" 108 | }, 109 | "origin_pos": 12, 110 | "tab": [ 111 | "pytorch" 112 | ] 113 | }, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "tensor(28., grad_fn=)" 119 | ] 120 | }, 121 | "execution_count": 3, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "y = 2 * torch.dot(x, x)\n", 128 | "y" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "c6ffaf8b", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "slide" 137 | } 138 | }, 139 | "source": [ 140 | "通过调用反向传播函数来自动计算`y`关于`x`每个分量的梯度" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 4, 146 | "id": "a1c3a419", 147 | "metadata": { 148 | "execution": { 149 | "iopub.execute_input": "2023-08-18T07:07:32.708698Z", 150 | "iopub.status.busy": "2023-08-18T07:07:32.708196Z", 151 | "iopub.status.idle": "2023-08-18T07:07:32.713924Z", 152 | "shell.execute_reply": "2023-08-18T07:07:32.713091Z" 153 | }, 154 | "origin_pos": 17, 155 | "tab": [ 156 | "pytorch" 157 | ] 158 | }, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "tensor([ 0., 4., 8., 12.])" 164 | ] 165 | }, 166 | "execution_count": 4, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "y.backward()\n", 173 | "x.grad" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 5, 179 | "id": "b8493d0a", 180 | "metadata": { 181 | "execution": { 182 | "iopub.execute_input": "2023-08-18T07:07:32.718858Z", 183 | "iopub.status.busy": "2023-08-18T07:07:32.718156Z", 184 | "iopub.status.idle": "2023-08-18T07:07:32.724091Z", 185 | "shell.execute_reply": "2023-08-18T07:07:32.723104Z" 186 | }, 187 | "origin_pos": 22, 188 | "tab": [ 189 | "pytorch" 190 | ] 191 | }, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "tensor([True, True, True, True])" 197 | ] 198 | }, 199 | "execution_count": 5, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "x.grad == 4 * x" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "id": "dcb2d3a8", 211 | "metadata": { 212 | "slideshow": { 213 | "slide_type": "slide" 214 | } 215 | }, 216 | "source": [ 217 | "现在计算`x`的另一个函数" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 6, 223 | "id": "f2fcd392", 224 | "metadata": { 225 | "execution": { 226 | "iopub.execute_input": "2023-08-18T07:07:32.729368Z", 227 | "iopub.status.busy": "2023-08-18T07:07:32.728433Z", 228 | "iopub.status.idle": "2023-08-18T07:07:32.736493Z", 229 | "shell.execute_reply": "2023-08-18T07:07:32.735715Z" 230 | }, 231 | "origin_pos": 27, 232 | "tab": [ 233 | "pytorch" 234 | ] 235 | }, 236 | "outputs": [ 237 | { 238 | "data": { 239 | "text/plain": [ 240 | "tensor([1., 1., 1., 1.])" 241 | ] 242 | }, 243 | "execution_count": 6, 244 | "metadata": {}, 245 | "output_type": "execute_result" 246 | } 247 | ], 248 | "source": [ 249 | "x.grad.zero_()\n", 250 | "y = x.sum()\n", 251 | "y.backward()\n", 252 | "x.grad" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "id": "6ab86a5c", 258 | "metadata": { 259 | "slideshow": { 260 | "slide_type": "slide" 261 | } 262 | }, 263 | "source": [ 264 | "深度学习中\n", 265 | ",我们的目的不是计算微分矩阵,而是单独计算批量中每个样本的偏导数之和" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 7, 271 | "id": "f4e62a5d", 272 | "metadata": { 273 | "execution": { 274 | "iopub.execute_input": "2023-08-18T07:07:32.740109Z", 275 | "iopub.status.busy": "2023-08-18T07:07:32.739419Z", 276 | "iopub.status.idle": "2023-08-18T07:07:32.745803Z", 277 | "shell.execute_reply": "2023-08-18T07:07:32.744893Z" 278 | }, 279 | "origin_pos": 32, 280 | "tab": [ 281 | "pytorch" 282 | ] 283 | }, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "tensor([0., 2., 4., 6.])" 289 | ] 290 | }, 291 | "execution_count": 7, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "x.grad.zero_()\n", 298 | "y = x * x\n", 299 | "y.sum().backward()\n", 300 | "x.grad" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "id": "aba72343", 306 | "metadata": { 307 | "slideshow": { 308 | "slide_type": "slide" 309 | } 310 | }, 311 | "source": [ 312 | "将某些计算移动到记录的计算图之外" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 8, 318 | "id": "8dab493d", 319 | "metadata": { 320 | "execution": { 321 | "iopub.execute_input": "2023-08-18T07:07:32.749398Z", 322 | "iopub.status.busy": "2023-08-18T07:07:32.748759Z", 323 | "iopub.status.idle": "2023-08-18T07:07:32.755280Z", 324 | "shell.execute_reply": "2023-08-18T07:07:32.754543Z" 325 | }, 326 | "origin_pos": 37, 327 | "tab": [ 328 | "pytorch" 329 | ] 330 | }, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/plain": [ 335 | "tensor([True, True, True, True])" 336 | ] 337 | }, 338 | "execution_count": 8, 339 | "metadata": {}, 340 | "output_type": "execute_result" 341 | } 342 | ], 343 | "source": [ 344 | "x.grad.zero_()\n", 345 | "y = x * x\n", 346 | "u = y.detach()\n", 347 | "z = u * x\n", 348 | "\n", 349 | "z.sum().backward()\n", 350 | "x.grad == u" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 9, 356 | "id": "271a9b3a", 357 | "metadata": { 358 | "execution": { 359 | "iopub.execute_input": "2023-08-18T07:07:32.759344Z", 360 | "iopub.status.busy": "2023-08-18T07:07:32.758633Z", 361 | "iopub.status.idle": "2023-08-18T07:07:32.764663Z", 362 | "shell.execute_reply": "2023-08-18T07:07:32.763922Z" 363 | }, 364 | "origin_pos": 42, 365 | "tab": [ 366 | "pytorch" 367 | ] 368 | }, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/plain": [ 373 | "tensor([True, True, True, True])" 374 | ] 375 | }, 376 | "execution_count": 9, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "x.grad.zero_()\n", 383 | "y.sum().backward()\n", 384 | "x.grad == 2 * x" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "id": "491b3462", 390 | "metadata": { 391 | "slideshow": { 392 | "slide_type": "slide" 393 | } 394 | }, 395 | "source": [ 396 | "即使构建函数的计算图需要通过Python控制流(例如,条件、循环或任意函数调用),我们仍然可以计算得到的变量的梯度" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 12, 402 | "id": "2595bdc0", 403 | "metadata": { 404 | "execution": { 405 | "iopub.execute_input": "2023-08-18T07:07:32.785728Z", 406 | "iopub.status.busy": "2023-08-18T07:07:32.785179Z", 407 | "iopub.status.idle": "2023-08-18T07:07:32.790672Z", 408 | "shell.execute_reply": "2023-08-18T07:07:32.789892Z" 409 | }, 410 | "origin_pos": 57, 411 | "tab": [ 412 | "pytorch" 413 | ] 414 | }, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "tensor(True)" 420 | ] 421 | }, 422 | "execution_count": 12, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "def f(a):\n", 429 | " b = a * 2\n", 430 | " while b.norm() < 1000:\n", 431 | " b = b * 2\n", 432 | " if b.sum() > 0:\n", 433 | " c = b\n", 434 | " else:\n", 435 | " c = 100 * b\n", 436 | " return c\n", 437 | "\n", 438 | "a = torch.randn(size=(), requires_grad=True)\n", 439 | "d = f(a)\n", 440 | "d.backward()\n", 441 | "\n", 442 | "a.grad == d / a" 443 | ] 444 | } 445 | ], 446 | "metadata": { 447 | "celltoolbar": "Slideshow", 448 | "language_info": { 449 | "name": "python" 450 | }, 451 | "required_libs": [], 452 | "rise": { 453 | "autolaunch": true, 454 | "enable_chalkboard": true, 455 | "overlay": "
", 456 | "scroll": true 457 | } 458 | }, 459 | "nbformat": 4, 460 | "nbformat_minor": 5 461 | } -------------------------------------------------------------------------------- /chapter_preliminaries/lookup-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a9a80d6d", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 查阅文档\n", 13 | "\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "0be06f74", 19 | "metadata": { 20 | "slideshow": { 21 | "slide_type": "-" 22 | } 23 | }, 24 | "source": [ 25 | "查询随机数生成模块中的所有属性" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "id": "8f7f4d63", 32 | "metadata": { 33 | "execution": { 34 | "iopub.execute_input": "2023-08-18T07:05:30.519062Z", 35 | "iopub.status.busy": "2023-08-18T07:05:30.518501Z", 36 | "iopub.status.idle": "2023-08-18T07:05:31.469749Z", 37 | "shell.execute_reply": "2023-08-18T07:05:31.468858Z" 38 | }, 39 | "origin_pos": 6, 40 | "tab": [ 41 | "pytorch" 42 | ] 43 | }, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "['AbsTransform', 'AffineTransform', 'Bernoulli', 'Beta', 'Binomial', 'CatTransform', 'Categorical', 'Cauchy', 'Chi2', 'ComposeTransform', 'ContinuousBernoulli', 'CorrCholeskyTransform', 'CumulativeDistributionTransform', 'Dirichlet', 'Distribution', 'ExpTransform', 'Exponential', 'ExponentialFamily', 'FisherSnedecor', 'Gamma', 'Geometric', 'Gumbel', 'HalfCauchy', 'HalfNormal', 'Independent', 'IndependentTransform', 'Kumaraswamy', 'LKJCholesky', 'Laplace', 'LogNormal', 'LogisticNormal', 'LowRankMultivariateNormal', 'LowerCholeskyTransform', 'MixtureSameFamily', 'Multinomial', 'MultivariateNormal', 'NegativeBinomial', 'Normal', 'OneHotCategorical', 'OneHotCategoricalStraightThrough', 'Pareto', 'Poisson', 'PowerTransform', 'RelaxedBernoulli', 'RelaxedOneHotCategorical', 'ReshapeTransform', 'SigmoidTransform', 'SoftmaxTransform', 'SoftplusTransform', 'StackTransform', 'StickBreakingTransform', 'StudentT', 'TanhTransform', 'Transform', 'TransformedDistribution', 'Uniform', 'VonMises', 'Weibull', 'Wishart', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'bernoulli', 'beta', 'biject_to', 'binomial', 'categorical', 'cauchy', 'chi2', 'constraint_registry', 'constraints', 'continuous_bernoulli', 'dirichlet', 'distribution', 'exp_family', 'exponential', 'fishersnedecor', 'gamma', 'geometric', 'gumbel', 'half_cauchy', 'half_normal', 'identity_transform', 'independent', 'kl', 'kl_divergence', 'kumaraswamy', 'laplace', 'lkj_cholesky', 'log_normal', 'logistic_normal', 'lowrank_multivariate_normal', 'mixture_same_family', 'multinomial', 'multivariate_normal', 'negative_binomial', 'normal', 'one_hot_categorical', 'pareto', 'poisson', 'register_kl', 'relaxed_bernoulli', 'relaxed_categorical', 'studentT', 'transform_to', 'transformed_distribution', 'transforms', 'uniform', 'utils', 'von_mises', 'weibull', 'wishart']\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "import torch\n", 55 | "\n", 56 | "print(dir(torch.distributions))" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "id": "4b506160", 62 | "metadata": { 63 | "slideshow": { 64 | "slide_type": "slide" 65 | } 66 | }, 67 | "source": [ 68 | "查看张量`ones`函数的用法" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "id": "a16494ed", 75 | "metadata": { 76 | "execution": { 77 | "iopub.execute_input": "2023-08-18T07:05:31.473606Z", 78 | "iopub.status.busy": "2023-08-18T07:05:31.472946Z", 79 | "iopub.status.idle": "2023-08-18T07:05:31.477780Z", 80 | "shell.execute_reply": "2023-08-18T07:05:31.476938Z" 81 | }, 82 | "origin_pos": 11, 83 | "tab": [ 84 | "pytorch" 85 | ] 86 | }, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | "Help on built-in function ones in module torch:\n", 93 | "\n", 94 | "ones(...)\n", 95 | " ones(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) -> Tensor\n", 96 | " \n", 97 | " Returns a tensor filled with the scalar value `1`, with the shape defined\n", 98 | " by the variable argument :attr:`size`.\n", 99 | " \n", 100 | " Args:\n", 101 | " size (int...): a sequence of integers defining the shape of the output tensor.\n", 102 | " Can be a variable number of arguments or a collection like a list or tuple.\n", 103 | " \n", 104 | " Keyword arguments:\n", 105 | " out (Tensor, optional): the output tensor.\n", 106 | " dtype (:class:`torch.dtype`, optional): the desired data type of returned tensor.\n", 107 | " Default: if ``None``, uses a global default (see :func:`torch.set_default_tensor_type`).\n", 108 | " layout (:class:`torch.layout`, optional): the desired layout of returned Tensor.\n", 109 | " Default: ``torch.strided``.\n", 110 | " device (:class:`torch.device`, optional): the desired device of returned tensor.\n", 111 | " Default: if ``None``, uses the current device for the default tensor type\n", 112 | " (see :func:`torch.set_default_tensor_type`). :attr:`device` will be the CPU\n", 113 | " for CPU tensor types and the current CUDA device for CUDA tensor types.\n", 114 | " requires_grad (bool, optional): If autograd should record operations on the\n", 115 | " returned tensor. Default: ``False``.\n", 116 | " \n", 117 | " Example::\n", 118 | " \n", 119 | " >>> torch.ones(2, 3)\n", 120 | " tensor([[ 1., 1., 1.],\n", 121 | " [ 1., 1., 1.]])\n", 122 | " \n", 123 | " >>> torch.ones(5)\n", 124 | " tensor([ 1., 1., 1., 1., 1.])\n", 125 | "\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "help(torch.ones)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "5bb71da8", 136 | "metadata": { 137 | "slideshow": { 138 | "slide_type": "slide" 139 | } 140 | }, 141 | "source": [ 142 | "运行一个快速测试" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 3, 148 | "id": "7870b2f5", 149 | "metadata": { 150 | "execution": { 151 | "iopub.execute_input": "2023-08-18T07:05:31.481310Z", 152 | "iopub.status.busy": "2023-08-18T07:05:31.480685Z", 153 | "iopub.status.idle": "2023-08-18T07:05:31.490398Z", 154 | "shell.execute_reply": "2023-08-18T07:05:31.489581Z" 155 | }, 156 | "origin_pos": 16, 157 | "tab": [ 158 | "pytorch" 159 | ] 160 | }, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "tensor([1., 1., 1., 1.])" 166 | ] 167 | }, 168 | "execution_count": 3, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "torch.ones(4)" 175 | ] 176 | } 177 | ], 178 | "metadata": { 179 | "celltoolbar": "Slideshow", 180 | "language_info": { 181 | "name": "python" 182 | }, 183 | "required_libs": [], 184 | "rise": { 185 | "autolaunch": true, 186 | "enable_chalkboard": true, 187 | "overlay": "
", 188 | "scroll": true 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 5 193 | } -------------------------------------------------------------------------------- /chapter_preliminaries/ndarray.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ee4089f8", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 数据操作\n", 13 | "\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "52ec48cf", 19 | "metadata": { 20 | "slideshow": { 21 | "slide_type": "-" 22 | } 23 | }, 24 | "source": [ 25 | "首先,我们导入`torch`。请注意,虽然它被称为PyTorch,但是代码中使用`torch`而不是`pytorch`" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "id": "278e6d3f", 32 | "metadata": { 33 | "execution": { 34 | "iopub.execute_input": "2023-08-18T07:05:01.545874Z", 35 | "iopub.status.busy": "2023-08-18T07:05:01.545147Z", 36 | "iopub.status.idle": "2023-08-18T07:05:02.992816Z", 37 | "shell.execute_reply": "2023-08-18T07:05:02.991719Z" 38 | }, 39 | "origin_pos": 5, 40 | "tab": [ 41 | "pytorch" 42 | ] 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "import torch" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "id": "8f89b9c0", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "slide" 55 | } 56 | }, 57 | "source": [ 58 | "张量表示一个由数值组成的数组,这个数组可能有多个维度" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 2, 64 | "id": "b1700627", 65 | "metadata": { 66 | "execution": { 67 | "iopub.execute_input": "2023-08-18T07:05:02.997386Z", 68 | "iopub.status.busy": "2023-08-18T07:05:02.996970Z", 69 | "iopub.status.idle": "2023-08-18T07:05:03.007632Z", 70 | "shell.execute_reply": "2023-08-18T07:05:03.006483Z" 71 | }, 72 | "origin_pos": 13, 73 | "tab": [ 74 | "pytorch" 75 | ] 76 | }, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])" 82 | ] 83 | }, 84 | "execution_count": 2, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "x = torch.arange(12)\n", 91 | "x" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "id": "8ed94c74", 97 | "metadata": { 98 | "slideshow": { 99 | "slide_type": "slide" 100 | } 101 | }, 102 | "source": [ 103 | "可以通过张量的`shape`属性来访问张量(沿每个轴的长度)的*形状*\n", 104 | "和张量中元素的总数" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 3, 110 | "id": "b86b6572", 111 | "metadata": { 112 | "execution": { 113 | "iopub.execute_input": "2023-08-18T07:05:03.011628Z", 114 | "iopub.status.busy": "2023-08-18T07:05:03.011110Z", 115 | "iopub.status.idle": "2023-08-18T07:05:03.017191Z", 116 | "shell.execute_reply": "2023-08-18T07:05:03.016193Z" 117 | }, 118 | "origin_pos": 17, 119 | "tab": [ 120 | "pytorch" 121 | ] 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "torch.Size([12])" 128 | ] 129 | }, 130 | "execution_count": 3, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "x.shape" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 4, 142 | "id": "b8b69ca9", 143 | "metadata": { 144 | "execution": { 145 | "iopub.execute_input": "2023-08-18T07:05:03.020938Z", 146 | "iopub.status.busy": "2023-08-18T07:05:03.020483Z", 147 | "iopub.status.idle": "2023-08-18T07:05:03.026998Z", 148 | "shell.execute_reply": "2023-08-18T07:05:03.025752Z" 149 | }, 150 | "origin_pos": 20, 151 | "tab": [ 152 | "pytorch" 153 | ] 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "12" 160 | ] 161 | }, 162 | "execution_count": 4, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "x.numel()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "id": "588600c4", 174 | "metadata": { 175 | "slideshow": { 176 | "slide_type": "slide" 177 | } 178 | }, 179 | "source": [ 180 | "要想改变一个张量的形状而不改变元素数量和元素值,可以调用`reshape`函数" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 5, 186 | "id": "0f294243", 187 | "metadata": { 188 | "execution": { 189 | "iopub.execute_input": "2023-08-18T07:05:03.031842Z", 190 | "iopub.status.busy": "2023-08-18T07:05:03.031448Z", 191 | "iopub.status.idle": "2023-08-18T07:05:03.039288Z", 192 | "shell.execute_reply": "2023-08-18T07:05:03.038227Z" 193 | }, 194 | "origin_pos": 24, 195 | "tab": [ 196 | "pytorch" 197 | ] 198 | }, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "tensor([[ 0, 1, 2, 3],\n", 204 | " [ 4, 5, 6, 7],\n", 205 | " [ 8, 9, 10, 11]])" 206 | ] 207 | }, 208 | "execution_count": 5, 209 | "metadata": {}, 210 | "output_type": "execute_result" 211 | } 212 | ], 213 | "source": [ 214 | "X = x.reshape(3, 4)\n", 215 | "X" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "id": "403fad02", 221 | "metadata": { 222 | "slideshow": { 223 | "slide_type": "slide" 224 | } 225 | }, 226 | "source": [ 227 | "使用全0、全1、其他常量,或者从特定分布中随机采样的数字" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 6, 233 | "id": "b23c3056", 234 | "metadata": { 235 | "execution": { 236 | "iopub.execute_input": "2023-08-18T07:05:03.044733Z", 237 | "iopub.status.busy": "2023-08-18T07:05:03.043866Z", 238 | "iopub.status.idle": "2023-08-18T07:05:03.052195Z", 239 | "shell.execute_reply": "2023-08-18T07:05:03.051146Z" 240 | }, 241 | "origin_pos": 29, 242 | "tab": [ 243 | "pytorch" 244 | ] 245 | }, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "tensor([[[0., 0., 0., 0.],\n", 251 | " [0., 0., 0., 0.],\n", 252 | " [0., 0., 0., 0.]],\n", 253 | "\n", 254 | " [[0., 0., 0., 0.],\n", 255 | " [0., 0., 0., 0.],\n", 256 | " [0., 0., 0., 0.]]])" 257 | ] 258 | }, 259 | "execution_count": 6, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "torch.zeros((2, 3, 4))" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 7, 271 | "id": "25981960", 272 | "metadata": { 273 | "execution": { 274 | "iopub.execute_input": "2023-08-18T07:05:03.057264Z", 275 | "iopub.status.busy": "2023-08-18T07:05:03.056578Z", 276 | "iopub.status.idle": "2023-08-18T07:05:03.064973Z", 277 | "shell.execute_reply": "2023-08-18T07:05:03.063853Z" 278 | }, 279 | "origin_pos": 34, 280 | "tab": [ 281 | "pytorch" 282 | ] 283 | }, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "tensor([[[1., 1., 1., 1.],\n", 289 | " [1., 1., 1., 1.],\n", 290 | " [1., 1., 1., 1.]],\n", 291 | "\n", 292 | " [[1., 1., 1., 1.],\n", 293 | " [1., 1., 1., 1.],\n", 294 | " [1., 1., 1., 1.]]])" 295 | ] 296 | }, 297 | "execution_count": 7, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "torch.ones((2, 3, 4))" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 8, 309 | "id": "2493f09a", 310 | "metadata": { 311 | "execution": { 312 | "iopub.execute_input": "2023-08-18T07:05:03.069946Z", 313 | "iopub.status.busy": "2023-08-18T07:05:03.069231Z", 314 | "iopub.status.idle": "2023-08-18T07:05:03.077304Z", 315 | "shell.execute_reply": "2023-08-18T07:05:03.076139Z" 316 | }, 317 | "origin_pos": 39, 318 | "tab": [ 319 | "pytorch" 320 | ] 321 | }, 322 | "outputs": [ 323 | { 324 | "data": { 325 | "text/plain": [ 326 | "tensor([[-0.0135, 0.0665, 0.0912, 0.3212],\n", 327 | " [ 1.4653, 0.1843, -1.6995, -0.3036],\n", 328 | " [ 1.7646, 1.0450, 0.2457, -0.7732]])" 329 | ] 330 | }, 331 | "execution_count": 8, 332 | "metadata": {}, 333 | "output_type": "execute_result" 334 | } 335 | ], 336 | "source": [ 337 | "torch.randn(3, 4)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "id": "211d5b8e", 343 | "metadata": { 344 | "slideshow": { 345 | "slide_type": "slide" 346 | } 347 | }, 348 | "source": [ 349 | "通过提供包含数值的Python列表(或嵌套列表),来为所需张量中的每个元素赋予确定值" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 9, 355 | "id": "708be494", 356 | "metadata": { 357 | "execution": { 358 | "iopub.execute_input": "2023-08-18T07:05:03.082360Z", 359 | "iopub.status.busy": "2023-08-18T07:05:03.081424Z", 360 | "iopub.status.idle": "2023-08-18T07:05:03.090148Z", 361 | "shell.execute_reply": "2023-08-18T07:05:03.088973Z" 362 | }, 363 | "origin_pos": 44, 364 | "tab": [ 365 | "pytorch" 366 | ] 367 | }, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "tensor([[2, 1, 4, 3],\n", 373 | " [1, 2, 3, 4],\n", 374 | " [4, 3, 2, 1]])" 375 | ] 376 | }, 377 | "execution_count": 9, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "torch.tensor([[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "id": "14185106", 389 | "metadata": { 390 | "slideshow": { 391 | "slide_type": "slide" 392 | } 393 | }, 394 | "source": [ 395 | "常见的标准算术运算符(`+`、`-`、`*`、`/`和`**`)都可以被升级为按元素运算" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 10, 401 | "id": "99b28553", 402 | "metadata": { 403 | "execution": { 404 | "iopub.execute_input": "2023-08-18T07:05:03.095504Z", 405 | "iopub.status.busy": "2023-08-18T07:05:03.094688Z", 406 | "iopub.status.idle": "2023-08-18T07:05:03.106084Z", 407 | "shell.execute_reply": "2023-08-18T07:05:03.104976Z" 408 | }, 409 | "origin_pos": 49, 410 | "tab": [ 411 | "pytorch" 412 | ] 413 | }, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/plain": [ 418 | "(tensor([ 3., 4., 6., 10.]),\n", 419 | " tensor([-1., 0., 2., 6.]),\n", 420 | " tensor([ 2., 4., 8., 16.]),\n", 421 | " tensor([0.5000, 1.0000, 2.0000, 4.0000]),\n", 422 | " tensor([ 1., 4., 16., 64.]))" 423 | ] 424 | }, 425 | "execution_count": 10, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "x = torch.tensor([1.0, 2, 4, 8])\n", 432 | "y = torch.tensor([2, 2, 2, 2])\n", 433 | "x + y, x - y, x * y, x / y, x ** y" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "id": "7e770d36", 439 | "metadata": { 440 | "slideshow": { 441 | "slide_type": "-" 442 | } 443 | }, 444 | "source": [ 445 | "“按元素”方式可以应用更多的计算" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 11, 451 | "id": "ef07c995", 452 | "metadata": { 453 | "execution": { 454 | "iopub.execute_input": "2023-08-18T07:05:03.110973Z", 455 | "iopub.status.busy": "2023-08-18T07:05:03.110221Z", 456 | "iopub.status.idle": "2023-08-18T07:05:03.120389Z", 457 | "shell.execute_reply": "2023-08-18T07:05:03.119471Z" 458 | }, 459 | "origin_pos": 54, 460 | "tab": [ 461 | "pytorch" 462 | ] 463 | }, 464 | "outputs": [ 465 | { 466 | "data": { 467 | "text/plain": [ 468 | "tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])" 469 | ] 470 | }, 471 | "execution_count": 11, 472 | "metadata": {}, 473 | "output_type": "execute_result" 474 | } 475 | ], 476 | "source": [ 477 | "torch.exp(x)" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "id": "e8a09ecd", 483 | "metadata": { 484 | "slideshow": { 485 | "slide_type": "slide" 486 | } 487 | }, 488 | "source": [ 489 | "我们也可以把多个张量*连结*(concatenate)在一起" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 12, 495 | "id": "a583b891", 496 | "metadata": { 497 | "execution": { 498 | "iopub.execute_input": "2023-08-18T07:05:03.125263Z", 499 | "iopub.status.busy": "2023-08-18T07:05:03.124477Z", 500 | "iopub.status.idle": "2023-08-18T07:05:03.136328Z", 501 | "shell.execute_reply": "2023-08-18T07:05:03.135199Z" 502 | }, 503 | "origin_pos": 59, 504 | "tab": [ 505 | "pytorch" 506 | ] 507 | }, 508 | "outputs": [ 509 | { 510 | "data": { 511 | "text/plain": [ 512 | "(tensor([[ 0., 1., 2., 3.],\n", 513 | " [ 4., 5., 6., 7.],\n", 514 | " [ 8., 9., 10., 11.],\n", 515 | " [ 2., 1., 4., 3.],\n", 516 | " [ 1., 2., 3., 4.],\n", 517 | " [ 4., 3., 2., 1.]]),\n", 518 | " tensor([[ 0., 1., 2., 3., 2., 1., 4., 3.],\n", 519 | " [ 4., 5., 6., 7., 1., 2., 3., 4.],\n", 520 | " [ 8., 9., 10., 11., 4., 3., 2., 1.]]))" 521 | ] 522 | }, 523 | "execution_count": 12, 524 | "metadata": {}, 525 | "output_type": "execute_result" 526 | } 527 | ], 528 | "source": [ 529 | "X = torch.arange(12, dtype=torch.float32).reshape((3,4))\n", 530 | "Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])\n", 531 | "torch.cat((X, Y), dim=0), torch.cat((X, Y), dim=1)" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "id": "fac225a4", 537 | "metadata": { 538 | "slideshow": { 539 | "slide_type": "slide" 540 | } 541 | }, 542 | "source": [ 543 | "通过*逻辑运算符*构建二元张量" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 13, 549 | "id": "6405ec63", 550 | "metadata": { 551 | "execution": { 552 | "iopub.execute_input": "2023-08-18T07:05:03.141449Z", 553 | "iopub.status.busy": "2023-08-18T07:05:03.140776Z", 554 | "iopub.status.idle": "2023-08-18T07:05:03.148692Z", 555 | "shell.execute_reply": "2023-08-18T07:05:03.147491Z" 556 | }, 557 | "origin_pos": 63, 558 | "tab": [ 559 | "pytorch" 560 | ] 561 | }, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "text/plain": [ 566 | "tensor([[False, True, False, True],\n", 567 | " [False, False, False, False],\n", 568 | " [False, False, False, False]])" 569 | ] 570 | }, 571 | "execution_count": 13, 572 | "metadata": {}, 573 | "output_type": "execute_result" 574 | } 575 | ], 576 | "source": [ 577 | "X == Y" 578 | ] 579 | }, 580 | { 581 | "cell_type": "markdown", 582 | "id": "ab37c495", 583 | "metadata": { 584 | "slideshow": { 585 | "slide_type": "slide" 586 | } 587 | }, 588 | "source": [ 589 | "对张量中的所有元素进行求和,会产生一个单元素张量" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 14, 595 | "id": "a13cb291", 596 | "metadata": { 597 | "execution": { 598 | "iopub.execute_input": "2023-08-18T07:05:03.153907Z", 599 | "iopub.status.busy": "2023-08-18T07:05:03.152814Z", 600 | "iopub.status.idle": "2023-08-18T07:05:03.160277Z", 601 | "shell.execute_reply": "2023-08-18T07:05:03.159188Z" 602 | }, 603 | "origin_pos": 65, 604 | "tab": [ 605 | "pytorch" 606 | ] 607 | }, 608 | "outputs": [ 609 | { 610 | "data": { 611 | "text/plain": [ 612 | "tensor(66.)" 613 | ] 614 | }, 615 | "execution_count": 14, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "X.sum()" 622 | ] 623 | }, 624 | { 625 | "cell_type": "markdown", 626 | "id": "63f0408f", 627 | "metadata": { 628 | "slideshow": { 629 | "slide_type": "slide" 630 | } 631 | }, 632 | "source": [ 633 | "即使形状不同,我们仍然可以通过调用\n", 634 | "*广播机制*(broadcasting mechanism)来执行按元素操作" 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 15, 640 | "id": "a1de79a2", 641 | "metadata": { 642 | "execution": { 643 | "iopub.execute_input": "2023-08-18T07:05:03.165305Z", 644 | "iopub.status.busy": "2023-08-18T07:05:03.164274Z", 645 | "iopub.status.idle": "2023-08-18T07:05:03.172771Z", 646 | "shell.execute_reply": "2023-08-18T07:05:03.171692Z" 647 | }, 648 | "origin_pos": 69, 649 | "tab": [ 650 | "pytorch" 651 | ] 652 | }, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/plain": [ 657 | "(tensor([[0],\n", 658 | " [1],\n", 659 | " [2]]),\n", 660 | " tensor([[0, 1]]))" 661 | ] 662 | }, 663 | "execution_count": 15, 664 | "metadata": {}, 665 | "output_type": "execute_result" 666 | } 667 | ], 668 | "source": [ 669 | "a = torch.arange(3).reshape((3, 1))\n", 670 | "b = torch.arange(2).reshape((1, 2))\n", 671 | "a, b" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 16, 677 | "id": "4d8904b1", 678 | "metadata": { 679 | "execution": { 680 | "iopub.execute_input": "2023-08-18T07:05:03.177900Z", 681 | "iopub.status.busy": "2023-08-18T07:05:03.176935Z", 682 | "iopub.status.idle": "2023-08-18T07:05:03.184212Z", 683 | "shell.execute_reply": "2023-08-18T07:05:03.183156Z" 684 | }, 685 | "origin_pos": 73, 686 | "tab": [ 687 | "pytorch" 688 | ] 689 | }, 690 | "outputs": [ 691 | { 692 | "data": { 693 | "text/plain": [ 694 | "tensor([[0, 1],\n", 695 | " [1, 2],\n", 696 | " [2, 3]])" 697 | ] 698 | }, 699 | "execution_count": 16, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "a + b" 706 | ] 707 | }, 708 | { 709 | "cell_type": "markdown", 710 | "id": "16a2cb7b", 711 | "metadata": { 712 | "slideshow": { 713 | "slide_type": "slide" 714 | } 715 | }, 716 | "source": [ 717 | "可以用`[-1]`选择最后一个元素,可以用`[1:3]`选择第二个和第三个元素" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 17, 723 | "id": "b62b00c7", 724 | "metadata": { 725 | "execution": { 726 | "iopub.execute_input": "2023-08-18T07:05:03.189786Z", 727 | "iopub.status.busy": "2023-08-18T07:05:03.188961Z", 728 | "iopub.status.idle": "2023-08-18T07:05:03.197712Z", 729 | "shell.execute_reply": "2023-08-18T07:05:03.196559Z" 730 | }, 731 | "origin_pos": 75, 732 | "tab": [ 733 | "pytorch" 734 | ] 735 | }, 736 | "outputs": [ 737 | { 738 | "data": { 739 | "text/plain": [ 740 | "(tensor([ 8., 9., 10., 11.]),\n", 741 | " tensor([[ 4., 5., 6., 7.],\n", 742 | " [ 8., 9., 10., 11.]]))" 743 | ] 744 | }, 745 | "execution_count": 17, 746 | "metadata": {}, 747 | "output_type": "execute_result" 748 | } 749 | ], 750 | "source": [ 751 | "X[-1], X[1:3]" 752 | ] 753 | }, 754 | { 755 | "cell_type": "markdown", 756 | "id": "9d9f38fe", 757 | "metadata": { 758 | "slideshow": { 759 | "slide_type": "slide" 760 | } 761 | }, 762 | "source": [ 763 | "除读取外,我们还可以通过指定索引来将元素写入矩阵" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 18, 769 | "id": "56a8261a", 770 | "metadata": { 771 | "execution": { 772 | "iopub.execute_input": "2023-08-18T07:05:03.203157Z", 773 | "iopub.status.busy": "2023-08-18T07:05:03.202390Z", 774 | "iopub.status.idle": "2023-08-18T07:05:03.210176Z", 775 | "shell.execute_reply": "2023-08-18T07:05:03.209097Z" 776 | }, 777 | "origin_pos": 78, 778 | "tab": [ 779 | "pytorch" 780 | ] 781 | }, 782 | "outputs": [ 783 | { 784 | "data": { 785 | "text/plain": [ 786 | "tensor([[ 0., 1., 2., 3.],\n", 787 | " [ 4., 5., 9., 7.],\n", 788 | " [ 8., 9., 10., 11.]])" 789 | ] 790 | }, 791 | "execution_count": 18, 792 | "metadata": {}, 793 | "output_type": "execute_result" 794 | } 795 | ], 796 | "source": [ 797 | "X[1, 2] = 9\n", 798 | "X" 799 | ] 800 | }, 801 | { 802 | "cell_type": "markdown", 803 | "id": "70b1b082", 804 | "metadata": { 805 | "slideshow": { 806 | "slide_type": "slide" 807 | } 808 | }, 809 | "source": [ 810 | "为多个元素赋值相同的值,我们只需要索引所有元素,然后为它们赋值" 811 | ] 812 | }, 813 | { 814 | "cell_type": "code", 815 | "execution_count": 19, 816 | "id": "bd48bae9", 817 | "metadata": { 818 | "execution": { 819 | "iopub.execute_input": "2023-08-18T07:05:03.214118Z", 820 | "iopub.status.busy": "2023-08-18T07:05:03.213430Z", 821 | "iopub.status.idle": "2023-08-18T07:05:03.221215Z", 822 | "shell.execute_reply": "2023-08-18T07:05:03.220084Z" 823 | }, 824 | "origin_pos": 81, 825 | "tab": [ 826 | "pytorch" 827 | ] 828 | }, 829 | "outputs": [ 830 | { 831 | "data": { 832 | "text/plain": [ 833 | "tensor([[12., 12., 12., 12.],\n", 834 | " [12., 12., 12., 12.],\n", 835 | " [ 8., 9., 10., 11.]])" 836 | ] 837 | }, 838 | "execution_count": 19, 839 | "metadata": {}, 840 | "output_type": "execute_result" 841 | } 842 | ], 843 | "source": [ 844 | "X[0:2, :] = 12\n", 845 | "X" 846 | ] 847 | }, 848 | { 849 | "cell_type": "markdown", 850 | "id": "9360fd66", 851 | "metadata": { 852 | "slideshow": { 853 | "slide_type": "slide" 854 | } 855 | }, 856 | "source": [ 857 | "运行一些操作可能会导致为新结果分配内存" 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "execution_count": 20, 863 | "id": "6bcd6d07", 864 | "metadata": { 865 | "execution": { 866 | "iopub.execute_input": "2023-08-18T07:05:03.225106Z", 867 | "iopub.status.busy": "2023-08-18T07:05:03.224353Z", 868 | "iopub.status.idle": "2023-08-18T07:05:03.231715Z", 869 | "shell.execute_reply": "2023-08-18T07:05:03.230626Z" 870 | }, 871 | "origin_pos": 84, 872 | "tab": [ 873 | "pytorch" 874 | ] 875 | }, 876 | "outputs": [ 877 | { 878 | "data": { 879 | "text/plain": [ 880 | "False" 881 | ] 882 | }, 883 | "execution_count": 20, 884 | "metadata": {}, 885 | "output_type": "execute_result" 886 | } 887 | ], 888 | "source": [ 889 | "before = id(Y)\n", 890 | "Y = Y + X\n", 891 | "id(Y) == before" 892 | ] 893 | }, 894 | { 895 | "cell_type": "markdown", 896 | "id": "1764d724", 897 | "metadata": { 898 | "slideshow": { 899 | "slide_type": "-" 900 | } 901 | }, 902 | "source": [ 903 | "执行原地操作" 904 | ] 905 | }, 906 | { 907 | "cell_type": "code", 908 | "execution_count": 21, 909 | "id": "13b7fdf6", 910 | "metadata": { 911 | "execution": { 912 | "iopub.execute_input": "2023-08-18T07:05:03.236933Z", 913 | "iopub.status.busy": "2023-08-18T07:05:03.236016Z", 914 | "iopub.status.idle": "2023-08-18T07:05:03.243252Z", 915 | "shell.execute_reply": "2023-08-18T07:05:03.242153Z" 916 | }, 917 | "origin_pos": 89, 918 | "tab": [ 919 | "pytorch" 920 | ] 921 | }, 922 | "outputs": [ 923 | { 924 | "name": "stdout", 925 | "output_type": "stream", 926 | "text": [ 927 | "id(Z): 140327634811696\n", 928 | "id(Z): 140327634811696\n" 929 | ] 930 | } 931 | ], 932 | "source": [ 933 | "Z = torch.zeros_like(Y)\n", 934 | "print('id(Z):', id(Z))\n", 935 | "Z[:] = X + Y\n", 936 | "print('id(Z):', id(Z))" 937 | ] 938 | }, 939 | { 940 | "cell_type": "markdown", 941 | "id": "08b6232e", 942 | "metadata": { 943 | "slideshow": { 944 | "slide_type": "slide" 945 | } 946 | }, 947 | "source": [ 948 | "如果在后续计算中没有重复使用`X`,\n", 949 | "我们也可以使用`X[:] = X + Y`或`X += Y`来减少操作的内存开销" 950 | ] 951 | }, 952 | { 953 | "cell_type": "code", 954 | "execution_count": 22, 955 | "id": "c8a97d75", 956 | "metadata": { 957 | "execution": { 958 | "iopub.execute_input": "2023-08-18T07:05:03.248290Z", 959 | "iopub.status.busy": "2023-08-18T07:05:03.247521Z", 960 | "iopub.status.idle": "2023-08-18T07:05:03.255046Z", 961 | "shell.execute_reply": "2023-08-18T07:05:03.253935Z" 962 | }, 963 | "origin_pos": 94, 964 | "tab": [ 965 | "pytorch" 966 | ] 967 | }, 968 | "outputs": [ 969 | { 970 | "data": { 971 | "text/plain": [ 972 | "True" 973 | ] 974 | }, 975 | "execution_count": 22, 976 | "metadata": {}, 977 | "output_type": "execute_result" 978 | } 979 | ], 980 | "source": [ 981 | "before = id(X)\n", 982 | "X += Y\n", 983 | "id(X) == before" 984 | ] 985 | }, 986 | { 987 | "cell_type": "markdown", 988 | "id": "62d8bda0", 989 | "metadata": { 990 | "slideshow": { 991 | "slide_type": "slide" 992 | } 993 | }, 994 | "source": [ 995 | "转换为NumPy张量(`ndarray`)" 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": 23, 1001 | "id": "7386f580", 1002 | "metadata": { 1003 | "execution": { 1004 | "iopub.execute_input": "2023-08-18T07:05:03.259655Z", 1005 | "iopub.status.busy": "2023-08-18T07:05:03.259273Z", 1006 | "iopub.status.idle": "2023-08-18T07:05:03.266501Z", 1007 | "shell.execute_reply": "2023-08-18T07:05:03.265738Z" 1008 | }, 1009 | "origin_pos": 100, 1010 | "tab": [ 1011 | "pytorch" 1012 | ] 1013 | }, 1014 | "outputs": [ 1015 | { 1016 | "data": { 1017 | "text/plain": [ 1018 | "(numpy.ndarray, torch.Tensor)" 1019 | ] 1020 | }, 1021 | "execution_count": 23, 1022 | "metadata": {}, 1023 | "output_type": "execute_result" 1024 | } 1025 | ], 1026 | "source": [ 1027 | "A = X.numpy()\n", 1028 | "B = torch.tensor(A)\n", 1029 | "type(A), type(B)" 1030 | ] 1031 | }, 1032 | { 1033 | "cell_type": "markdown", 1034 | "id": "bd310838", 1035 | "metadata": { 1036 | "slideshow": { 1037 | "slide_type": "-" 1038 | } 1039 | }, 1040 | "source": [ 1041 | "将大小为1的张量转换为Python标量" 1042 | ] 1043 | }, 1044 | { 1045 | "cell_type": "code", 1046 | "execution_count": 24, 1047 | "id": "10a429bd", 1048 | "metadata": { 1049 | "execution": { 1050 | "iopub.execute_input": "2023-08-18T07:05:03.270566Z", 1051 | "iopub.status.busy": "2023-08-18T07:05:03.270102Z", 1052 | "iopub.status.idle": "2023-08-18T07:05:03.276982Z", 1053 | "shell.execute_reply": "2023-08-18T07:05:03.276051Z" 1054 | }, 1055 | "origin_pos": 105, 1056 | "tab": [ 1057 | "pytorch" 1058 | ] 1059 | }, 1060 | "outputs": [ 1061 | { 1062 | "data": { 1063 | "text/plain": [ 1064 | "(tensor([3.5000]), 3.5, 3.5, 3)" 1065 | ] 1066 | }, 1067 | "execution_count": 24, 1068 | "metadata": {}, 1069 | "output_type": "execute_result" 1070 | } 1071 | ], 1072 | "source": [ 1073 | "a = torch.tensor([3.5])\n", 1074 | "a, a.item(), float(a), int(a)" 1075 | ] 1076 | } 1077 | ], 1078 | "metadata": { 1079 | "celltoolbar": "Slideshow", 1080 | "language_info": { 1081 | "name": "python" 1082 | }, 1083 | "required_libs": [], 1084 | "rise": { 1085 | "autolaunch": true, 1086 | "enable_chalkboard": true, 1087 | "overlay": "
", 1088 | "scroll": true 1089 | } 1090 | }, 1091 | "nbformat": 4, 1092 | "nbformat_minor": 5 1093 | } -------------------------------------------------------------------------------- /chapter_preliminaries/pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "bbe8a716", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 数据预处理\n", 13 | "\n", 14 | "创建一个人工数据集,并存储在CSV(逗号分隔值)文件" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "ee72fd16", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:03:38.903209Z", 24 | "iopub.status.busy": "2023-08-18T07:03:38.902351Z", 25 | "iopub.status.idle": "2023-08-18T07:03:38.918117Z", 26 | "shell.execute_reply": "2023-08-18T07:03:38.916775Z" 27 | }, 28 | "origin_pos": 1, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import os\n", 36 | "\n", 37 | "os.makedirs(os.path.join('..', 'data'), exist_ok=True)\n", 38 | "data_file = os.path.join('..', 'data', 'house_tiny.csv')\n", 39 | "with open(data_file, 'w') as f:\n", 40 | " f.write('NumRooms,Alley,Price\\n')\n", 41 | " f.write('NA,Pave,127500\\n')\n", 42 | " f.write('2,NA,106000\\n')\n", 43 | " f.write('4,NA,178100\\n')\n", 44 | " f.write('NA,NA,140000\\n')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "50063800", 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "source": [ 56 | "从创建的CSV文件中加载原始数据集" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "id": "5fb16e52", 63 | "metadata": { 64 | "execution": { 65 | "iopub.execute_input": "2023-08-18T07:03:38.923957Z", 66 | "iopub.status.busy": "2023-08-18T07:03:38.923101Z", 67 | "iopub.status.idle": "2023-08-18T07:03:39.372116Z", 68 | "shell.execute_reply": "2023-08-18T07:03:39.371151Z" 69 | }, 70 | "origin_pos": 3, 71 | "tab": [ 72 | "pytorch" 73 | ] 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | " NumRooms Alley Price\n", 81 | "0 NaN Pave 127500\n", 82 | "1 2.0 NaN 106000\n", 83 | "2 4.0 NaN 178100\n", 84 | "3 NaN NaN 140000\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "import pandas as pd\n", 90 | "\n", 91 | "data = pd.read_csv(data_file)\n", 92 | "print(data)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "2a0d9ba1", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "slide" 101 | } 102 | }, 103 | "source": [ 104 | "为了处理缺失的数据,典型的方法包括*插值法*和*删除法*,\n", 105 | "这里,我们将考虑插值法" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 3, 111 | "id": "d460a301", 112 | "metadata": { 113 | "execution": { 114 | "iopub.execute_input": "2023-08-18T07:03:39.375828Z", 115 | "iopub.status.busy": "2023-08-18T07:03:39.375535Z", 116 | "iopub.status.idle": "2023-08-18T07:03:39.389220Z", 117 | "shell.execute_reply": "2023-08-18T07:03:39.387998Z" 118 | }, 119 | "origin_pos": 5, 120 | "tab": [ 121 | "pytorch" 122 | ] 123 | }, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | " NumRooms Alley\n", 130 | "0 3.0 Pave\n", 131 | "1 2.0 NaN\n", 132 | "2 4.0 NaN\n", 133 | "3 3.0 NaN\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]\n", 139 | "inputs = inputs.fillna(inputs.mean())\n", 140 | "print(inputs)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "id": "13fbca82", 146 | "metadata": { 147 | "slideshow": { 148 | "slide_type": "slide" 149 | } 150 | }, 151 | "source": [ 152 | "对于`inputs`中的类别值或离散值,我们将“NaN”视为一个类别" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 4, 158 | "id": "09ab8738", 159 | "metadata": { 160 | "execution": { 161 | "iopub.execute_input": "2023-08-18T07:03:39.394176Z", 162 | "iopub.status.busy": "2023-08-18T07:03:39.393444Z", 163 | "iopub.status.idle": "2023-08-18T07:03:39.409892Z", 164 | "shell.execute_reply": "2023-08-18T07:03:39.408559Z" 165 | }, 166 | "origin_pos": 7, 167 | "tab": [ 168 | "pytorch" 169 | ] 170 | }, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | " NumRooms Alley_Pave Alley_nan\n", 177 | "0 3.0 1 0\n", 178 | "1 2.0 0 1\n", 179 | "2 4.0 0 1\n", 180 | "3 3.0 0 1\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "inputs = pd.get_dummies(inputs, dummy_na=True)\n", 186 | "print(inputs)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "id": "56c7800a", 192 | "metadata": { 193 | "slideshow": { 194 | "slide_type": "slide" 195 | } 196 | }, 197 | "source": [ 198 | "现在`inputs`和`outputs`中的所有条目都是数值类型,它们可以转换为张量格式" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 5, 204 | "id": "4f551c6d", 205 | "metadata": { 206 | "execution": { 207 | "iopub.execute_input": "2023-08-18T07:03:39.414531Z", 208 | "iopub.status.busy": "2023-08-18T07:03:39.413831Z", 209 | "iopub.status.idle": "2023-08-18T07:03:40.467689Z", 210 | "shell.execute_reply": "2023-08-18T07:03:40.466637Z" 211 | }, 212 | "origin_pos": 10, 213 | "tab": [ 214 | "pytorch" 215 | ] 216 | }, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "(tensor([[3., 1., 0.],\n", 222 | " [2., 0., 1.],\n", 223 | " [4., 0., 1.],\n", 224 | " [3., 0., 1.]], dtype=torch.float64),\n", 225 | " tensor([127500., 106000., 178100., 140000.], dtype=torch.float64))" 226 | ] 227 | }, 228 | "execution_count": 5, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "import torch\n", 235 | "\n", 236 | "X = torch.tensor(inputs.to_numpy(dtype=float))\n", 237 | "y = torch.tensor(outputs.to_numpy(dtype=float))\n", 238 | "X, y" 239 | ] 240 | } 241 | ], 242 | "metadata": { 243 | "celltoolbar": "Slideshow", 244 | "language_info": { 245 | "name": "python" 246 | }, 247 | "required_libs": [], 248 | "rise": { 249 | "autolaunch": true, 250 | "enable_chalkboard": true, 251 | "overlay": "
", 252 | "scroll": true 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 5 257 | } -------------------------------------------------------------------------------- /chapter_preliminaries/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_recurrent-modern/encoder-decoder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "80a4901a", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 编码器-解码器架构\n", 13 | "\n", 14 | "编码器" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "id": "17f77c60", 21 | "metadata": { 22 | "execution": { 23 | "iopub.execute_input": "2023-08-18T07:05:48.406295Z", 24 | "iopub.status.busy": "2023-08-18T07:05:48.405469Z", 25 | "iopub.status.idle": "2023-08-18T07:05:49.653322Z", 26 | "shell.execute_reply": "2023-08-18T07:05:49.651979Z" 27 | }, 28 | "origin_pos": 2, 29 | "tab": [ 30 | "pytorch" 31 | ] 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "from torch import nn\n", 36 | "\n", 37 | "\n", 38 | "class Encoder(nn.Module):\n", 39 | " \"\"\"编码器-解码器架构的基本编码器接口\"\"\"\n", 40 | " def __init__(self, **kwargs):\n", 41 | " super(Encoder, self).__init__(**kwargs)\n", 42 | "\n", 43 | " def forward(self, X, *args):\n", 44 | " raise NotImplementedError" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "614d0a9c", 50 | "metadata": { 51 | "slideshow": { 52 | "slide_type": "slide" 53 | } 54 | }, 55 | "source": [ 56 | "解码器" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "id": "5c7a6471", 63 | "metadata": { 64 | "execution": { 65 | "iopub.execute_input": "2023-08-18T07:05:49.659889Z", 66 | "iopub.status.busy": "2023-08-18T07:05:49.659020Z", 67 | "iopub.status.idle": "2023-08-18T07:05:49.666360Z", 68 | "shell.execute_reply": "2023-08-18T07:05:49.665230Z" 69 | }, 70 | "origin_pos": 7, 71 | "tab": [ 72 | "pytorch" 73 | ] 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "class Decoder(nn.Module):\n", 78 | " \"\"\"编码器-解码器架构的基本解码器接口\"\"\"\n", 79 | " def __init__(self, **kwargs):\n", 80 | " super(Decoder, self).__init__(**kwargs)\n", 81 | "\n", 82 | " def init_state(self, enc_outputs, *args):\n", 83 | " raise NotImplementedError\n", 84 | "\n", 85 | " def forward(self, X, state):\n", 86 | " raise NotImplementedError" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "id": "eae87cdc", 92 | "metadata": { 93 | "slideshow": { 94 | "slide_type": "slide" 95 | } 96 | }, 97 | "source": [ 98 | "合并编码器和解码器" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 3, 104 | "id": "53fb0929", 105 | "metadata": { 106 | "execution": { 107 | "iopub.execute_input": "2023-08-18T07:05:49.671685Z", 108 | "iopub.status.busy": "2023-08-18T07:05:49.670944Z", 109 | "iopub.status.idle": "2023-08-18T07:05:49.678831Z", 110 | "shell.execute_reply": "2023-08-18T07:05:49.677718Z" 111 | }, 112 | "origin_pos": 12, 113 | "tab": [ 114 | "pytorch" 115 | ] 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "class EncoderDecoder(nn.Module):\n", 120 | " \"\"\"编码器-解码器架构的基类\"\"\"\n", 121 | " def __init__(self, encoder, decoder, **kwargs):\n", 122 | " super(EncoderDecoder, self).__init__(**kwargs)\n", 123 | " self.encoder = encoder\n", 124 | " self.decoder = decoder\n", 125 | "\n", 126 | " def forward(self, enc_X, dec_X, *args):\n", 127 | " enc_outputs = self.encoder(enc_X, *args)\n", 128 | " dec_state = self.decoder.init_state(enc_outputs, *args)\n", 129 | " return self.decoder(dec_X, dec_state)" 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "celltoolbar": "Slideshow", 135 | "language_info": { 136 | "name": "python" 137 | }, 138 | "required_libs": [], 139 | "rise": { 140 | "autolaunch": true, 141 | "enable_chalkboard": true, 142 | "overlay": "
", 143 | "scroll": true 144 | } 145 | }, 146 | "nbformat": 4, 147 | "nbformat_minor": 5 148 | } -------------------------------------------------------------------------------- /chapter_recurrent-modern/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/rise.css: -------------------------------------------------------------------------------- 1 | 2 | div.text_cell_render.rendered_html { 3 | padding: 0.35em 0.1em; 4 | } 5 | 6 | div.code_cell { 7 | font-size: 120%; 8 | } 9 | 10 | div.my-top-right { 11 | position: absolute; 12 | right: 5%; 13 | top: 1em; 14 | font-size: 2em; 15 | } 16 | 17 | div.my-top-left { 18 | position: absolute; 19 | left: 5%; 20 | top: 1em; 21 | font-size: 2em; 22 | } 23 | -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/text-preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0afcc24a", 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "-" 9 | } 10 | }, 11 | "source": [ 12 | "# 文本预处理\n", 13 | "\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "id": "bb8907ca", 20 | "metadata": { 21 | "execution": { 22 | "iopub.execute_input": "2023-08-18T07:02:24.243885Z", 23 | "iopub.status.busy": "2023-08-18T07:02:24.243343Z", 24 | "iopub.status.idle": "2023-08-18T07:02:26.213654Z", 25 | "shell.execute_reply": "2023-08-18T07:02:26.212745Z" 26 | }, 27 | "origin_pos": 2, 28 | "tab": [ 29 | "pytorch" 30 | ] 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import collections\n", 35 | "import re\n", 36 | "from d2l import torch as d2l" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "80e119b6", 42 | "metadata": { 43 | "slideshow": { 44 | "slide_type": "-" 45 | } 46 | }, 47 | "source": [ 48 | "将数据集读取到由多条文本行组成的列表中" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "id": "ac0f9f0d", 55 | "metadata": { 56 | "execution": { 57 | "iopub.execute_input": "2023-08-18T07:02:26.218338Z", 58 | "iopub.status.busy": "2023-08-18T07:02:26.217685Z", 59 | "iopub.status.idle": "2023-08-18T07:02:26.304928Z", 60 | "shell.execute_reply": "2023-08-18T07:02:26.304151Z" 61 | }, 62 | "origin_pos": 6, 63 | "tab": [ 64 | "pytorch" 65 | ] 66 | }, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "Downloading ../data/timemachine.txt from http://d2l-data.s3-accelerate.amazonaws.com/timemachine.txt...\n", 73 | "# 文本总行数: 3221\n", 74 | "the time machine by h g wells\n", 75 | "twinkled and his usually pale face was flushed and animated the\n" 76 | ] 77 | } 78 | ], 79 | "source": [ 80 | "d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL + 'timemachine.txt',\n", 81 | " '090b5e7e70c295757f55df93cb0a180b9691891a')\n", 82 | "\n", 83 | "def read_time_machine(): \n", 84 | " \"\"\"将时间机器数据集加载到文本行的列表中\"\"\"\n", 85 | " with open(d2l.download('time_machine'), 'r') as f:\n", 86 | " lines = f.readlines()\n", 87 | " return [re.sub('[^A-Za-z]+', ' ', line).strip().lower() for line in lines]\n", 88 | "\n", 89 | "lines = read_time_machine()\n", 90 | "print(f'\n", 91 | "print(lines[0])\n", 92 | "print(lines[10])" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "b899d3e4", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "slide" 101 | } 102 | }, 103 | "source": [ 104 | "每个文本序列又被拆分成一个词元列表" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 3, 110 | "id": "afd6a9df", 111 | "metadata": { 112 | "execution": { 113 | "iopub.execute_input": "2023-08-18T07:02:26.308604Z", 114 | "iopub.status.busy": "2023-08-18T07:02:26.308048Z", 115 | "iopub.status.idle": "2023-08-18T07:02:26.317083Z", 116 | "shell.execute_reply": "2023-08-18T07:02:26.316264Z" 117 | }, 118 | "origin_pos": 8, 119 | "tab": [ 120 | "pytorch" 121 | ] 122 | }, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n", 129 | "[]\n", 130 | "[]\n", 131 | "[]\n", 132 | "[]\n", 133 | "['i']\n", 134 | "[]\n", 135 | "[]\n", 136 | "['the', 'time', 'traveller', 'for', 'so', 'it', 'will', 'be', 'convenient', 'to', 'speak', 'of', 'him']\n", 137 | "['was', 'expounding', 'a', 'recondite', 'matter', 'to', 'us', 'his', 'grey', 'eyes', 'shone', 'and']\n", 138 | "['twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the']\n" 139 | ] 140 | } 141 | ], 142 | "source": [ 143 | "def tokenize(lines, token='word'): \n", 144 | " \"\"\"将文本行拆分为单词或字符词元\"\"\"\n", 145 | " if token == 'word':\n", 146 | " return [line.split() for line in lines]\n", 147 | " elif token == 'char':\n", 148 | " return [list(line) for line in lines]\n", 149 | " else:\n", 150 | " print('错误:未知词元类型:' + token)\n", 151 | "\n", 152 | "tokens = tokenize(lines)\n", 153 | "for i in range(11):\n", 154 | " print(tokens[i])" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "id": "4f4bc35b", 160 | "metadata": { 161 | "slideshow": { 162 | "slide_type": "slide" 163 | } 164 | }, 165 | "source": [ 166 | "构建一个字典,通常也叫做*词表*(vocabulary),\n", 167 | "用来将字符串类型的词元映射到从$0$开始的数字索引中" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "id": "16db7dad", 174 | "metadata": { 175 | "execution": { 176 | "iopub.execute_input": "2023-08-18T07:02:26.320587Z", 177 | "iopub.status.busy": "2023-08-18T07:02:26.320050Z", 178 | "iopub.status.idle": "2023-08-18T07:02:26.330519Z", 179 | "shell.execute_reply": "2023-08-18T07:02:26.329736Z" 180 | }, 181 | "origin_pos": 10, 182 | "tab": [ 183 | "pytorch" 184 | ] 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "class Vocab: \n", 189 | " \"\"\"文本词表\"\"\"\n", 190 | " def __init__(self, tokens=None, min_freq=0, reserved_tokens=None):\n", 191 | " if tokens is None:\n", 192 | " tokens = []\n", 193 | " if reserved_tokens is None:\n", 194 | " reserved_tokens = []\n", 195 | " counter = count_corpus(tokens)\n", 196 | " self._token_freqs = sorted(counter.items(), key=lambda x: x[1],\n", 197 | " reverse=True)\n", 198 | " self.idx_to_token = [''] + reserved_tokens\n", 199 | " self.token_to_idx = {token: idx\n", 200 | " for idx, token in enumerate(self.idx_to_token)}\n", 201 | " for token, freq in self._token_freqs:\n", 202 | " if freq < min_freq:\n", 203 | " break\n", 204 | " if token not in self.token_to_idx:\n", 205 | " self.idx_to_token.append(token)\n", 206 | " self.token_to_idx[token] = len(self.idx_to_token) - 1\n", 207 | "\n", 208 | " def __len__(self):\n", 209 | " return len(self.idx_to_token)\n", 210 | "\n", 211 | " def __getitem__(self, tokens):\n", 212 | " if not isinstance(tokens, (list, tuple)):\n", 213 | " return self.token_to_idx.get(tokens, self.unk)\n", 214 | " return [self.__getitem__(token) for token in tokens]\n", 215 | "\n", 216 | " def to_tokens(self, indices):\n", 217 | " if not isinstance(indices, (list, tuple)):\n", 218 | " return self.idx_to_token[indices]\n", 219 | " return [self.idx_to_token[index] for index in indices]\n", 220 | "\n", 221 | " @property\n", 222 | " def unk(self):\n", 223 | " return 0\n", 224 | "\n", 225 | " @property\n", 226 | " def token_freqs(self):\n", 227 | " return self._token_freqs\n", 228 | "\n", 229 | "def count_corpus(tokens): \n", 230 | " \"\"\"统计词元的频率\"\"\"\n", 231 | " if len(tokens) == 0 or isinstance(tokens[0], list):\n", 232 | " tokens = [token for line in tokens for token in line]\n", 233 | " return collections.Counter(tokens)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "id": "8bea4a87", 239 | "metadata": { 240 | "slideshow": { 241 | "slide_type": "slide" 242 | } 243 | }, 244 | "source": [ 245 | "构建词表" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 5, 251 | "id": "1501d478", 252 | "metadata": { 253 | "execution": { 254 | "iopub.execute_input": "2023-08-18T07:02:26.333942Z", 255 | "iopub.status.busy": "2023-08-18T07:02:26.333382Z", 256 | "iopub.status.idle": "2023-08-18T07:02:26.346927Z", 257 | "shell.execute_reply": "2023-08-18T07:02:26.346182Z" 258 | }, 259 | "origin_pos": 12, 260 | "tab": [ 261 | "pytorch" 262 | ] 263 | }, 264 | "outputs": [ 265 | { 266 | "name": "stdout", 267 | "output_type": "stream", 268 | "text": [ 269 | "[('', 0), ('the', 1), ('i', 2), ('and', 3), ('of', 4), ('a', 5), ('to', 6), ('was', 7), ('in', 8), ('that', 9)]\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "vocab = Vocab(tokens)\n", 275 | "print(list(vocab.token_to_idx.items())[:10])" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "id": "f1cfdd0c", 281 | "metadata": { 282 | "slideshow": { 283 | "slide_type": "-" 284 | } 285 | }, 286 | "source": [ 287 | "将每一条文本行转换成一个数字索引列表" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 6, 293 | "id": "f0244f09", 294 | "metadata": { 295 | "execution": { 296 | "iopub.execute_input": "2023-08-18T07:02:26.350343Z", 297 | "iopub.status.busy": "2023-08-18T07:02:26.349779Z", 298 | "iopub.status.idle": "2023-08-18T07:02:26.354215Z", 299 | "shell.execute_reply": "2023-08-18T07:02:26.353468Z" 300 | }, 301 | "origin_pos": 14, 302 | "tab": [ 303 | "pytorch" 304 | ] 305 | }, 306 | "outputs": [ 307 | { 308 | "name": "stdout", 309 | "output_type": "stream", 310 | "text": [ 311 | "文本: ['the', 'time', 'machine', 'by', 'h', 'g', 'wells']\n", 312 | "索引: [1, 19, 50, 40, 2183, 2184, 400]\n", 313 | "文本: ['twinkled', 'and', 'his', 'usually', 'pale', 'face', 'was', 'flushed', 'and', 'animated', 'the']\n", 314 | "索引: [2186, 3, 25, 1044, 362, 113, 7, 1421, 3, 1045, 1]\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "for i in [0, 10]:\n", 320 | " print('文本:', tokens[i])\n", 321 | " print('索引:', vocab[tokens[i]])" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "id": "b400e092", 327 | "metadata": { 328 | "slideshow": { 329 | "slide_type": "slide" 330 | } 331 | }, 332 | "source": [ 333 | "将所有功能打包到`load_corpus_time_machine`函数中" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 7, 339 | "id": "578ed76f", 340 | "metadata": { 341 | "execution": { 342 | "iopub.execute_input": "2023-08-18T07:02:26.357414Z", 343 | "iopub.status.busy": "2023-08-18T07:02:26.357141Z", 344 | "iopub.status.idle": "2023-08-18T07:02:26.470812Z", 345 | "shell.execute_reply": "2023-08-18T07:02:26.470008Z" 346 | }, 347 | "origin_pos": 16, 348 | "tab": [ 349 | "pytorch" 350 | ] 351 | }, 352 | "outputs": [ 353 | { 354 | "data": { 355 | "text/plain": [ 356 | "(170580, 28)" 357 | ] 358 | }, 359 | "execution_count": 7, 360 | "metadata": {}, 361 | "output_type": "execute_result" 362 | } 363 | ], 364 | "source": [ 365 | "def load_corpus_time_machine(max_tokens=-1): \n", 366 | " \"\"\"返回时光机器数据集的词元索引列表和词表\"\"\"\n", 367 | " lines = read_time_machine()\n", 368 | " tokens = tokenize(lines, 'char')\n", 369 | " vocab = Vocab(tokens)\n", 370 | " corpus = [vocab[token] for line in tokens for token in line]\n", 371 | " if max_tokens > 0:\n", 372 | " corpus = corpus[:max_tokens]\n", 373 | " return corpus, vocab\n", 374 | "\n", 375 | "corpus, vocab = load_corpus_time_machine()\n", 376 | "len(corpus), len(vocab)" 377 | ] 378 | } 379 | ], 380 | "metadata": { 381 | "celltoolbar": "Slideshow", 382 | "language_info": { 383 | "name": "python" 384 | }, 385 | "required_libs": [], 386 | "rise": { 387 | "autolaunch": true, 388 | "enable_chalkboard": true, 389 | "overlay": "
", 390 | "scroll": true 391 | } 392 | }, 393 | "nbformat": 4, 394 | "nbformat_minor": 5 395 | } --------------------------------------------------------------------------------