├── .gitignore ├── .gitmodules ├── INFO.md ├── Jenkinsfile ├── LICENSE ├── README.md ├── STYLE_GUIDE.md ├── TERMINOLOGY.md ├── chapter_appendix-tools-for-deep-learning ├── aws.md ├── aws_origin.md ├── contributing.md ├── contributing_origin.md ├── d2l.md ├── d2l_origin.md ├── index.md ├── index_origin.md ├── jupyter.md ├── jupyter_origin.md ├── sagemaker.md ├── sagemaker_origin.md ├── selecting-servers-gpus.md └── selecting-servers-gpus_origin.md ├── chapter_attention-mechanisms ├── attention-cues.md ├── attention-cues_origin.md ├── attention-scoring-functions.md ├── attention-scoring-functions_origin.md ├── bahdanau-attention.md ├── bahdanau-attention_origin.md ├── index.md ├── index_origin.md ├── multihead-attention.md ├── multihead-attention_origin.md ├── nadaraya-waston.md ├── nadaraya-waston_origin.md ├── self-attention-and-positional-encoding.md ├── self-attention-and-positional-encoding_origin.md ├── transformer.md └── transformer_origin.md ├── chapter_computational-performance ├── async-computation.md ├── async-computation_origin.md ├── auto-parallelism.md ├── auto-parallelism_origin.md ├── hardware.md ├── hardware_origin.md ├── hybridize.md ├── hybridize_origin.md ├── index.md ├── index_origin.md ├── multiple-gpus-concise.md ├── multiple-gpus-concise_origin.md ├── multiple-gpus.md ├── multiple-gpus_origin.md ├── parameterserver.md └── parameterserver_origin.md ├── chapter_computer-vision ├── anchor.md ├── anchor_origin.md ├── bounding-box.md ├── bounding-box_origin.md ├── fcn.md ├── fcn_origin.md ├── fine-tuning.md ├── fine-tuning_origin.md ├── image-augmentation.md ├── image-augmentation_origin.md ├── index.md ├── index_origin.md ├── kaggle-cifar10.md ├── kaggle-cifar10_origin.md ├── kaggle-dog.md ├── kaggle-dog_origin.md ├── multiscale-object-detection.md ├── multiscale-object-detection_origin.md ├── neural-style.md ├── neural-style_origin.md ├── object-detection-dataset.md ├── object-detection-dataset_origin.md ├── rcnn.md ├── rcnn_origin.md ├── semantic-segmentation-and-dataset.md ├── semantic-segmentation-and-dataset_origin.md ├── ssd.md ├── ssd_origin.md ├── transposed-conv.md └── transposed-conv_origin.md ├── chapter_convolutional-modern ├── alexnet.md ├── alexnet_origin.md ├── batch-norm.md ├── batch-norm_origin.md ├── densenet.md ├── densenet_origin.md ├── googlenet.md ├── googlenet_origin.md ├── index.md ├── index_origin.md ├── nin.md ├── nin_origin.md ├── resnet.md ├── resnet_origin.md ├── vgg.md └── vgg_origin.md ├── chapter_convolutional-neural-networks ├── channels.md ├── channels_origin.md ├── conv-layer.md ├── conv-layer_origin.md ├── index.md ├── index_origin.md ├── lenet.md ├── lenet_origin.md ├── padding-and-strides.md ├── padding-and-strides_origin.md ├── pooling.md ├── pooling_origin.md ├── why-conv.md └── why-conv_origin.md ├── chapter_deep-learning-computation ├── custom-layer.md ├── custom-layer_origin.md ├── deferred-init.md ├── deferred-init_origin.md ├── index.md ├── index_origin.md ├── model-construction.md ├── model-construction_origin.md ├── parameters.md ├── parameters_origin.md ├── read-write.md ├── read-write_origin.md ├── use-gpu.md └── use-gpu_origin.md ├── chapter_installation ├── index.md └── index_origin.md ├── chapter_introduction ├── index.md └── index_origin.md ├── chapter_linear-networks ├── image-classification-dataset.md ├── image-classification-dataset_origin.md ├── index.md ├── index_origin.md ├── linear-regression-concise.md ├── linear-regression-concise_origin.md ├── linear-regression-scratch.md ├── linear-regression-scratch_origin.md ├── linear-regression.md ├── linear-regression_origin.md ├── softmax-regression-concise.md ├── softmax-regression-concise_origin.md ├── softmax-regression-scratch.md ├── softmax-regression-scratch_origin.md ├── softmax-regression.md └── softmax-regression_origin.md ├── chapter_multilayer-perceptrons ├── backprop.md ├── backprop_origin.md ├── dropout.md ├── dropout_origin.md ├── environment.md ├── environment_origin.md ├── index.md ├── index_origin.md ├── kaggle-house-price.md ├── kaggle-house-price_origin.md ├── mlp-concise.md ├── mlp-concise_origin.md ├── mlp-scratch.md ├── mlp-scratch_origin.md ├── mlp.md ├── mlp_origin.md ├── numerical-stability-and-init.md ├── numerical-stability-and-init_origin.md ├── underfit-overfit.md ├── underfit-overfit_origin.md ├── weight-decay.md └── weight-decay_origin.md ├── chapter_natural-language-processing-applications ├── finetuning-bert.md ├── finetuning-bert_origin.md ├── index.md ├── index_origin.md ├── natural-language-inference-and-dataset.md ├── natural-language-inference-and-dataset_origin.md ├── natural-language-inference-attention.md ├── natural-language-inference-attention_origin.md ├── natural-language-inference-bert.md ├── natural-language-inference-bert_origin.md ├── sentiment-analysis-and-dataset.md ├── sentiment-analysis-and-dataset_origin.md ├── sentiment-analysis-cnn.md ├── sentiment-analysis-cnn_origin.md ├── sentiment-analysis-rnn.md └── sentiment-analysis-rnn_origin.md ├── chapter_natural-language-processing-pretraining ├── approx-training.md ├── approx-training_origin.md ├── bert-dataset.md ├── bert-dataset_origin.md ├── bert-pretraining.md ├── bert-pretraining_origin.md ├── bert.md ├── bert_origin.md ├── glove.md ├── glove_origin.md ├── index.md ├── index_origin.md ├── similarity-analogy.md ├── similarity-analogy_origin.md ├── subword-embedding.md ├── subword-embedding_origin.md ├── word-embedding-dataset.md ├── word-embedding-dataset_origin.md ├── word2vec-pretraining.md ├── word2vec-pretraining_origin.md ├── word2vec.md └── word2vec_origin.md ├── chapter_notation ├── index.md └── index_origin.md ├── chapter_optimization ├── adadelta.md ├── adadelta_origin.md ├── adagrad.md ├── adagrad_origin.md ├── adam.md ├── adam_origin.md ├── convexity.md ├── convexity_origin.md ├── gd.md ├── gd_origin.md ├── index.md ├── index_origin.md ├── lr-scheduler.md ├── lr-scheduler_origin.md ├── minibatch-sgd.md ├── minibatch-sgd_origin.md ├── momentum.md ├── momentum_origin.md ├── optimization-intro.md ├── optimization-intro_origin.md ├── rmsprop.md ├── rmsprop_origin.md ├── sgd.md └── sgd_origin.md ├── chapter_preface ├── index.md └── index_origin.md ├── chapter_preliminaries ├── autograd.md ├── autograd_origin.md ├── calculus.md ├── calculus_origin.md ├── index.md ├── index_origin.md ├── linear-algebra.md ├── linear-algebra_origin.md ├── lookup-api.md ├── lookup-api_origin.md ├── ndarray.md ├── ndarray_origin.md ├── pandas.md ├── pandas_origin.md ├── probability.md ├── probability_origin.md └── softmax-regression-scratch.md ├── chapter_recurrent-modern ├── beam-search.md ├── beam-search_origin.md ├── bi-rnn.md ├── bi-rnn_origin.md ├── deep-rnn.md ├── deep-rnn_origin.md ├── encoder-decoder.md ├── encoder-decoder_origin.md ├── gru.md ├── gru_origin.md ├── index.md ├── index_origin.md ├── lstm.md ├── lstm_origin.md ├── machine-translation-and-dataset.md ├── machine-translation-and-dataset_origin.md ├── seq2seq.md └── seq2seq_origin.md ├── chapter_recurrent-neural-networks ├── bptt.md ├── bptt_origin.md ├── index.md ├── index_origin.md ├── language-models-and-dataset.md ├── language-models-and-dataset_origin.md ├── rnn-concise.md ├── rnn-concise_origin.md ├── rnn-scratch.md ├── rnn-scratch_origin.md ├── rnn.md ├── rnn_origin.md ├── sequence.md ├── sequence_origin.md ├── text-preprocessing.md └── text-preprocessing_origin.md ├── chapter_references └── zreferences.md ├── config.ini ├── contrib ├── chapter_recommender-systems │ ├── autorec.md │ ├── ctr.md │ ├── fm.md │ ├── index.md │ ├── mf.md │ └── movielens.md └── to-rm-mx-contrib-text │ ├── chapter_natural-language-processing │ ├── machine-translation.md │ ├── sentiment-analysis-cnn.md │ ├── sentiment-analysis-rnn.md │ └── similarity-analogy.md │ └── d2lzh │ ├── __init__.py │ ├── text │ ├── __init__.py │ ├── embedding.py │ └── vocab.py │ └── utils.py ├── d2l.bib ├── d2l ├── __init__.py ├── mindspore.py ├── mxnet.py ├── paddle.py ├── tensorflow.py └── torch.py ├── graffle ├── appendix │ ├── 3dFunc.graffle │ ├── ChainNet1.graffle │ ├── ChainNet2.graffle │ ├── GridPoints.graffle │ ├── GridTransform.graffle │ ├── GridTransformFilled.graffle │ ├── GridWithArrow.graffle │ ├── Marginal.graffle │ ├── ParVec.graffle │ ├── ProjVec.graffle │ ├── RectTrans.graffle │ ├── SpaceDivision.graffle │ ├── SpaceDivision3D.graffle │ ├── SubArea.graffle │ ├── SumOrder.graffle │ ├── VecAdd.graffle │ ├── VecAngle.graffle │ ├── comparing_estimators.graffle │ ├── mutual_information.graffle │ ├── negSecDer.graffle │ ├── posSecDer.graffle │ ├── statistical_power.graffle │ ├── statistical_significance.graffle │ └── zeroSecDer.graffle ├── attention │ ├── add_norm.graffle │ ├── attention-output.graffle │ ├── attention.graffle │ ├── cnn-rnn-self-attention.graffle │ ├── encoder-decoder.graffle │ ├── eye-book.graffle │ ├── eye-coffee.graffle │ ├── multi-head-attention.graffle │ ├── positional_encoding.graffle │ ├── qkv.graffle │ ├── self-attention-predict.graffle │ ├── self-attention.graffle │ ├── seq2seq-attention-details.graffle │ ├── seq2seq_attention.graffle │ └── transformer.graffle ├── book-org.graffle ├── cnn-basic │ ├── conv-1x1.graffle │ ├── conv-multi-in.graffle │ ├── conv-pad.graffle │ ├── conv-stride.graffle │ ├── correlation.graffle │ ├── lenet-vert.graffle │ ├── lenet.graffle │ ├── pooling.graffle │ └── waldo-mask.graffle │ │ ├── data.plist │ │ └── image1.jpg ├── cnn-modern │ ├── ResNetManyFlavor.graffle │ ├── alexnet.graffle │ ├── densenet-block.graffle │ ├── densenet.graffle │ ├── functionclasses.graffle │ ├── inception-full.graffle │ ├── inception.graffle │ ├── nin-compare.graffle │ ├── nin.graffle │ ├── residual-block.graffle │ ├── resnet-block.graffle │ ├── resnet18.graffle │ └── vgg.graffle ├── computation │ ├── asyncgraph.graffle │ ├── blocks.graffle │ ├── computegraph.graffle │ ├── copyto.graffle │ ├── frontends.graffle │ ├── threading.graffle │ └── twogpu.graffle ├── contribute.graffle ├── convert.sh ├── gan │ └── gan.graffle ├── intro │ ├── data-collection.graffle │ ├── diveintodl.graffle │ ├── ml-loop.graffle │ ├── rl-environment.graffle │ ├── supervised-learning.graffle │ └── wake-word.graffle ├── linear │ ├── fit_linreg.graffle │ ├── neuron.graffle │ ├── singlelayer.graffle │ ├── singleneuron.graffle │ └── softmaxreg.graffle ├── mlp │ ├── add_norm.graffle │ ├── capacity_vs_error.graffle │ ├── dropout2.graffle │ ├── forward.graffle │ └── mlp.graffle ├── nlp │ ├── bert-input.graffle │ ├── bert-one-seq.graffle │ ├── bert-qa.graffle │ ├── bert-tagging.graffle │ ├── bert-two-seqs.graffle │ ├── cbow.graffle │ ├── conv1d-2d.graffle │ ├── conv1d-channel.graffle │ ├── conv1d.graffle │ ├── elmo-gpt-bert.graffle │ ├── hi-softmax.graffle │ ├── nli_attention.graffle │ ├── nlp-map-app.graffle │ ├── nlp-map-nli-attention.graffle │ ├── nlp-map-nli-bert.graffle │ ├── nlp-map-pretrain.graffle │ ├── nlp-map-sa-cnn.graffle │ ├── nlp-map-sa-rnn.graffle │ ├── sentiment-rnn.graffle │ ├── skip-gram.graffle │ └── textcnn.graffle ├── optimization │ └── convex.graffle ├── performance │ ├── a77.graffle │ ├── bw-hierarchy.graffle │ ├── bw-hierarchy.pdf │ ├── data-parallel.graffle │ ├── falseshare.graffle │ ├── mobo.graffle │ │ ├── data.plist │ │ ├── image1.tiff │ │ └── preview.jpeg │ ├── neon128.graffle │ ├── ps-distributed.graffle │ ├── ps-distributed.pdf │ ├── ps-multimachine.graffle │ ├── ps-multimachine.pdf │ ├── ps-multips.graffle │ ├── ps-multips.pdf │ ├── ps.graffle │ ├── ps.pdf │ └── splitting.graffle ├── preliminaries │ └── polygon_circle.graffle ├── recsys │ ├── rec-caser.graffle │ ├── rec-deepfm.graffle │ ├── rec-intro.graffle │ ├── rec-mf.graffle │ ├── rec-neumf.graffle │ ├── rec-ranking.graffle │ └── rec-seq-data.graffle ├── rnn │ ├── beam-search.graffle │ ├── birnn-ORIGINAL.graffle │ ├── birnn.graffle │ ├── deep-rnn-ORIGINAL.graffle │ ├── deep-rnn.graffle │ ├── hmm.graffle │ ├── lang-model-data.graffle │ ├── rnn-bptt.graffle │ ├── rnn-train.graffle │ ├── rnn.graffle │ ├── s2s-prob1.graffle │ ├── s2s-prob2.graffle │ ├── seq2seq-details.graffle │ ├── seq2seq.graffle │ ├── seq2seq_predict.graffle │ ├── sequence-model.graffle │ ├── timemachine-5gram.graffle │ └── truncated-bptt.graffle ├── transformer.graffle └── vision │ ├── anchor-label.graffle │ ├── fast-rcnn.graffle │ ├── faster-rcnn.graffle │ ├── fcn.graffle │ ├── finetune.graffle │ ├── iou.graffle │ ├── mask-rcnn.graffle │ ├── neural-style.graffle │ ├── r-cnn.graffle │ ├── roi.graffle │ ├── segmentation.graffle │ ├── ssd.graffle │ ├── style-transfer.graffle │ ├── trans_conv.graffle │ ├── trans_conv_2.graffle │ ├── trans_conv_pad1_2.graffle │ ├── trans_conv_stride2.graffle │ └── trans_conv_stride2_2.graffle ├── img ├── Marginal.svg ├── Neuron.svg ├── a77.svg ├── add_norm.svg ├── alexnet-original.svg ├── alexnet.svg ├── anchor-label.svg ├── asyncgraph.svg ├── attention-output.svg ├── attention.svg ├── autumn-oak.jpg ├── aws.png ├── banana.jpg ├── beam-search.svg ├── bert-input.svg ├── bert-one-seq.svg ├── bert-qa.svg ├── bert-tagging.svg ├── bert-two-seqs.svg ├── birnn.svg ├── blocks.svg ├── book-org.svg ├── bw-hierarchy.svg ├── capacity-vs-error.svg ├── capacity_vs_error.svg ├── cat-dog-pixels.png ├── cat-dog-test.svg ├── cat-dog-train.svg ├── cat1.jpg ├── cat2.jpg ├── cat3.jpg ├── catdog.jpg ├── cbow.svg ├── chain-net1.svg ├── chain-net2.svg ├── chmod.png ├── cnn-rnn-self-attention.svg ├── colab-2.png ├── colab.png ├── computegraph.svg ├── connect.png ├── contribute.svg ├── conv-1x1.svg ├── conv-multi-in.svg ├── conv-pad.svg ├── conv-stride.svg ├── conv1d-2d.svg ├── conv1d-channel.svg ├── conv1d.svg ├── convex-intersect.svg ├── copyto.svg ├── correlation.svg ├── cuda101.png ├── data-collection.svg ├── data-parallel.svg ├── death-cap.jpg ├── deep-rnn.svg ├── deeplearning-amazon.jpg ├── densenet-block.svg ├── densenet.svg ├── disk.png ├── dog1.jpg ├── dog2.jpg ├── dropout2.svg ├── ec2.png ├── edit-file.png ├── elmo-gpt-bert.svg ├── encoder-decoder.svg ├── eye-book.png ├── eye-book.svg ├── eye-coffee.png ├── eye-coffee.svg ├── falseshare.svg ├── falsesharing.svg ├── fast-rcnn.svg ├── faster-rcnn.svg ├── fcn.svg ├── filters.png ├── finetune.svg ├── fit-linreg.svg ├── flopsvsprice.svg ├── forward.svg ├── frontends.png ├── frontends.svg ├── frontends │ ├── Canvas 1.svg │ ├── image10.tiff │ ├── image2.tiff │ ├── image3.tiff │ ├── image4.tiff │ ├── image5.pdf │ └── image8.tiff ├── frontpage │ ├── jd-190715-en.png │ ├── jd-190715-zh.png │ ├── jd-20230208-zh-1(day).png │ ├── jd-20230208-zh-1.png │ ├── jd-20230208-zh-5(day).png │ ├── jd-20230208-zh-5.png │ ├── jd-20230208-zh-6(day).png │ └── jd-20230208-zh-6.png ├── ftse100.png ├── functionclasses.svg ├── gan.svg ├── git-clone.png ├── git-createpr.png ├── git-fork.png ├── git-forked.png ├── git-newpr.png ├── grid-points.svg ├── grid-transform-filled.svg ├── grid-transform.svg ├── gru-1.svg ├── gru-2.svg ├── gru-3.svg ├── hi-softmax.svg ├── hmm.svg ├── house-pricing.png ├── inception-full.svg ├── inception.svg ├── iou.svg ├── jupyter.png ├── jupyter00.png ├── jupyter01.png ├── jupyter02.png ├── jupyter03.png ├── jupyter04.png ├── jupyter05.png ├── jupyter06.png ├── kaggle-cifar10.png ├── kaggle-dog.jpg ├── kaggle-submit2.png ├── kaggle.png ├── keypair.png ├── koebel.jpg ├── lang-model-data.svg ├── latencynumbers.png ├── launching.png ├── lenet-vert.svg ├── lenet.svg ├── limits.png ├── lstm-0.svg ├── lstm-1.svg ├── lstm-2.svg ├── lstm-3.svg ├── marginal.svg ├── mask-rcnn.svg ├── ml-loop.svg ├── mlp.svg ├── mobo-symbol.svg ├── multi-head-attention.svg ├── mutual-information.svg ├── negSecDer.svg ├── neon128.svg ├── neural-style.jpg ├── neural-style.svg ├── neuron.svg ├── nin-compare.svg ├── nin.svg ├── nli-attention.svg ├── nli_attention.svg ├── nlp-map-app.svg ├── nlp-map-nli-attention.svg ├── nlp-map-nli-bert.svg ├── nlp-map-pretrain.svg ├── nlp-map-sa-cnn.svg ├── nlp-map-sa-rnn.svg ├── nonconvex.svg ├── nvlink-twoloop.svg ├── nvlink.svg ├── p2x.png ├── pacman.svg ├── par-vec.svg ├── pikachu.jpg ├── polygon-circle.svg ├── pooling.svg ├── popvssoda.png ├── posSecDer.svg ├── proj-vec.svg ├── projections.svg ├── ps-distributed.svg ├── ps-multimachine.svg ├── ps-multips.svg ├── ps.svg ├── qkv.svg ├── r-cnn.svg ├── rainier.jpg ├── rec-caser.svg ├── rec-deepfm.svg ├── rec-intro.svg ├── rec-mf.svg ├── rec-neumf.svg ├── rec-ranking.svg ├── rec-seq-data.svg ├── rect-trans.svg ├── residual-block.svg ├── resnet-block.svg ├── resnet18.svg ├── ringsync.svg ├── rl-environment.svg ├── rnn-bptt.svg ├── rnn-train.svg ├── rnn.svg ├── roi.svg ├── s2s-prob1.svg ├── s2s-prob2.svg ├── sagemaker-create-2.png ├── sagemaker-create-3-pytorch.png ├── sagemaker-create-3-tensorflow.png ├── sagemaker-create-3.png ├── sagemaker-create.png ├── sagemaker-open.png ├── sagemaker-stop.png ├── sagemaker-terminal.png ├── sagemaker.png ├── segmentation.svg ├── self-attention.svg ├── seq2seq-attention-details.svg ├── seq2seq-attention.svg ├── seq2seq-details.svg ├── seq2seq-predict.svg ├── seq2seq.svg ├── sequence-model.svg ├── singlelayer.svg ├── singleneuron.svg ├── skip-gram.svg ├── skylake.svg ├── softmaxreg.svg ├── space-division-3d.svg ├── space-division.svg ├── speech.png ├── splitting.svg ├── ssd.svg ├── stackedanimals.png ├── statistical-significance.svg ├── style-transfer.svg ├── sub-area.svg ├── sum-order.svg ├── supervised-learning.svg ├── tensorcore.jpg ├── textcnn.svg ├── threading.svg ├── timemachine-5gram.svg ├── trans_conv.svg ├── trans_conv_stride2.svg ├── transformer.svg ├── truncated-bptt.svg ├── turing-processing-block.png ├── turing.png ├── twogpu.svg ├── ubuntu-new.png ├── vec-add.svg ├── vec-angle.svg ├── vgg.svg ├── wake-word.svg ├── waldo-mask.jpg ├── wattvsprice.svg ├── where-wally-walker-books.jpg └── zeroSecDer.svg ├── index.md ├── setup.py └── static ├── build.yml ├── build_html.sh ├── cache.sh ├── favicon-blue-background.jpg ├── favicon.png ├── frontpage ├── _images │ ├── alex.jpg │ ├── anirudh.jpg │ ├── aston.jpg │ ├── brent.jpg │ ├── code.jpg │ ├── eq.jpg │ ├── figure.jpg │ ├── forum.gif │ ├── forum.jpg │ ├── forum.mp4 │ ├── front.png │ ├── huliujun.jpg │ ├── laptop_jupyter.png │ ├── logos │ │ ├── colab.png │ │ ├── logoimg1-zh.png │ │ ├── logoimg1.png │ │ ├── logoimg2-zh.png │ │ ├── logoimg2.png │ │ ├── logoimg3-zh.png │ │ ├── logoimg3.png │ │ ├── logoimg4-zh.png │ │ ├── logoimg4.png │ │ ├── logoimg5-zh.png │ │ ├── logoimg5.png │ │ ├── logoimg6-zh.png │ │ ├── logoimg6.png │ │ ├── logoimg7.png │ │ ├── sagemaker-studio-lab.png │ │ └── sagemaker.png │ ├── map.png │ ├── mu.jpg │ ├── notebook.gif │ ├── notebook.jpg │ ├── notebook.mp4 │ ├── rachel.jpeg │ ├── shuai.jpg │ ├── wugaosheng.jpg │ ├── xiaoting.jpg │ ├── xiejiehang.jpg │ ├── yi.jpg │ ├── yuan.jpg │ ├── zack.jpg │ └── zhangge.jpg ├── attachments │ ├── hardcopy.txt │ └── sagemaker.txt └── frontpage.html ├── latex-logo.png ├── logo-with-text.png ├── logo.png └── post_latex └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/.ipynb_checkpoints 2 | **/__pycache__ 3 | data/ 4 | *.json 5 | *.params 6 | *.DS_Store 7 | *.csv 8 | *egg-info* 9 | dist* 10 | _build/ 11 | test*.md 12 | run.sh -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "build/mx-theme"] 2 | path = build/mx-theme 3 | url = https://github.com/mli/mx-theme 4 | [submodule "build/utils"] 5 | path = build/utils 6 | url = https://github.com/d2l-ai/utils 7 | -------------------------------------------------------------------------------- /INFO.md: -------------------------------------------------------------------------------- 1 | ## 编译HTML版本 2 | 3 | 所有markdown文件需要在提交前清除output,它们会在服务器上重新执行生成结果。所以需要保证每个notebook执行不要太久,目前限制是20min。 4 | 5 | 在本地可以如下build html(需要GPU支持) 6 | 7 | ``` 8 | conda env update -f build/env.yml 9 | source activate d2l-zh-build 10 | make html 11 | ``` 12 | 13 | 生成的html会在`_build/html`。 14 | 15 | 如果没有改动notebook里面源代码,所以不想执行notebook,可以使用 16 | 17 | ``` 18 | make html EVAL=0 19 | ``` 20 | 21 | 但这样生成的html将不含有输出结果。 22 | 23 | ## 编译PDF版本 24 | 25 | 编译pdf版本需要xelatex、librsvg2-bin(svg图片转pdf)和思源字体。在Ubuntu可以这样安装。 26 | 27 | ``` 28 | sudo apt-get install texlive-full 29 | sudo apt-get install librsvg2-bin 30 | ``` 31 | 32 | ``` 33 | wget https://github.com/adobe-fonts/source-han-sans/releases/download/2.004R/SourceHanSansSC.zip 34 | wget -O SourceHanSerifSC.zip https://github.com/adobe-fonts/source-han-serif/releases/download/2.001R/09_SourceHanSerifSC.zip 35 | 36 | unzip SourceHanSansSC.zip -d SourceHanSansSC 37 | unzip SourceHanSerifSC.zip -d SourceHanSerifSC 38 | 39 | sudo mv SourceHanSansSC SourceHanSerifSC /usr/share/fonts/opentype/ 40 | sudo fc-cache -f -v 41 | ``` 42 | 43 | 44 | 这时候可以通过 `fc-list :lang=zh` 来查看安装的中文字体。 45 | 46 | 同样的去下载和安装英文字体 47 | 48 | ``` 49 | wget -O source-serif-pro.zip https://www.fontsquirrel.com/fonts/download/source-serif-pro 50 | unzip source-serif-pro -d source-serif-pro 51 | sudo mv source-serif-pro /usr/share/fonts/opentype/ 52 | 53 | wget -O source-sans-pro.zip https://www.fontsquirrel.com/fonts/download/source-sans-pro 54 | unzip source-sans-pro -d source-sans-pro 55 | sudo mv source-sans-pro /usr/share/fonts/opentype/ 56 | 57 | wget -O source-code-pro.zip https://www.fontsquirrel.com/fonts/download/source-code-pro 58 | unzip source-code-pro -d source-code-pro 59 | sudo mv source-code-pro /usr/share/fonts/opentype/ 60 | 61 | sudo fc-cache -f -v 62 | ``` 63 | 64 | 然后就可以编译了。 65 | 66 | ``` 67 | make pdf 68 | ``` 69 | 70 | ## 其他安装 71 | 72 | ``` 73 | python -m spacy download en # 需已 pip install spacy 74 | ``` 75 | 76 | ## 样式规范 77 | 78 | 贡献请遵照本教程的[样式规范](STYLE_GUIDE.md)。 79 | 80 | ## 中英文术语对照 81 | 82 | 翻译请参照[中英文术语对照](TERMINOLOGY.md)。 83 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | stage("Build and Publish") { 2 | // such as d2l-en and d2l-zh 3 | def REPO_NAME = env.JOB_NAME.split('/')[0] 4 | // such as en and zh 5 | def LANG = REPO_NAME.split('-')[1] 6 | // The current branch or the branch this PR will merge into 7 | def TARGET_BRANCH = env.CHANGE_TARGET ? env.CHANGE_TARGET : env.BRANCH_NAME 8 | // such as d2l-en-master 9 | def TASK = REPO_NAME + '-' + TARGET_BRANCH 10 | node('d2l-worker') { 11 | ws("workspace/${TASK}") { 12 | checkout scm 13 | // conda environment 14 | def ENV_NAME = "${TASK}-${EXECUTOR_NUMBER}"; 15 | 16 | sh label: "Build Environment", script: """set -ex 17 | conda env update -n ${ENV_NAME} -f static/build.yml 18 | conda activate ${ENV_NAME} 19 | pip uninstall -y d2lbook 20 | pip install git+https://github.com/d2l-ai/d2l-book 21 | pip list 22 | nvidia-smi 23 | """ 24 | 25 | sh label: "Sanity Check", script: """set -ex 26 | conda activate ${ENV_NAME} 27 | d2lbook build outputcheck tabcheck 28 | """ 29 | 30 | sh label: "Execute Notebooks", script: """set -ex 31 | conda activate ${ENV_NAME} 32 | ./static/cache.sh restore _build/eval/data 33 | d2lbook build eval 34 | ./static/cache.sh store _build/eval/data 35 | """ 36 | 37 | sh label: "Execute Notebooks [Pytorch]", script: """set -ex 38 | conda activate ${ENV_NAME} 39 | ./static/cache.sh restore _build/eval_pytorch/data 40 | d2lbook build eval --tab pytorch 41 | d2lbook build slides --tab pytorch 42 | ./static/cache.sh store _build/eval_pytorch/data 43 | """ 44 | 45 | sh label: "Execute Notebooks [Tensorflow]", script: """set -ex 46 | conda activate ${ENV_NAME} 47 | ./static/cache.sh restore _build/eval_tensorflow/data 48 | export TF_CPP_MIN_LOG_LEVEL=3 49 | export TF_FORCE_GPU_ALLOW_GROWTH=true 50 | d2lbook build eval --tab tensorflow 51 | ./static/cache.sh store _build/eval_tensorflow/data 52 | """ 53 | 54 | sh label: "Execute Notebooks [Paddlepaddle]", script: """set -ex 55 | conda activate ${ENV_NAME} 56 | ./static/cache.sh restore _build/eval_paddle/data 57 | d2lbook build eval --tab paddle 58 | ./static/cache.sh store _build/eval_paddle/data 59 | """ 60 | 61 | sh label: "Execute Notebooks [MindSpore]", script: """set -ex 62 | conda activate ${ENV_NAME} 63 | ./static/cache.sh restore _build/eval_mindspore/data 64 | d2lbook build eval --tab mindspore 65 | ./static/cache.sh store _build/eval_mindspore/data 66 | """ 67 | 68 | sh label:"Build HTML", script:"""set -ex 69 | conda activate ${ENV_NAME} 70 | ./static/build_html.sh 71 | """ 72 | 73 | sh label:"Build PDF", script:"""set -ex 74 | conda activate ${ENV_NAME} 75 | d2lbook build pdf 76 | """ 77 | 78 | sh label:"Build Pytorch PDF", script:"""set -ex 79 | conda activate ${ENV_NAME} 80 | d2lbook build pdf --tab pytorch 81 | """ 82 | 83 | if (env.BRANCH_NAME == 'release') { 84 | sh label:"Release", script:"""set -ex 85 | conda activate ${ENV_NAME} 86 | d2lbook build pkg 87 | d2lbook deploy html pdf slides pkg colab sagemaker --s3 s3://${LANG}-v2.d2l.ai 88 | """ 89 | 90 | } else { 91 | sh label:"Publish", script:"""set -ex 92 | conda activate ${ENV_NAME} 93 | d2lbook deploy html pdf --s3 s3://preview.d2l.ai/${JOB_NAME}/ 94 | """ 95 | if (env.BRANCH_NAME.startsWith("PR-")) { 96 | pullRequest.comment("Job ${JOB_NAME}/${BUILD_NUMBER} is complete. \nCheck the results at http://preview.d2l.ai/${JOB_NAME}/") 97 | } 98 | } 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 动手学深度学习(Dive into Deep Learning,D2L.ai) 2 | 3 | [![Build Status](http://ci.d2l.ai/job/d2l-zh/job/master/badge/icon)](http://ci.d2l.ai/job/d2l-zh/job/master/) 4 | 5 | [第二版:zh.D2L.ai](https://zh.d2l.ai) | [第一版:zh-v1.D2L.ai](https://zh-v1.d2l.ai/) | 安装和使用书中源代码: [第二版](https://zh.d2l.ai/chapter_installation/index.html) [第一版](https://zh-v1.d2l.ai/chapter_prerequisite/install.html) 6 | 7 |
理解深度学习的最佳方法是学以致用。
8 | 9 |

10 | 11 | 12 | 13 | 14 |

15 | 16 | 本开源项目代表了我们的一种尝试:我们将教给读者概念、背景知识和代码;我们将在同一个地方阐述剖析问题所需的批判性思维、解决问题所需的数学知识,以及实现解决方案所需的工程技能。 17 | 18 | 我们的目标是创建一个为实现以下目标的统一资源: 19 | 1. 所有人均可在网上免费获取; 20 | 1. 提供足够的技术深度,从而帮助读者实际成为深度学习应用科学家:既理解数学原理,又能够实现并不断改进方法; 21 | 1. 包含可运行的代码,为读者展示如何在实际中解决问题。这样不仅直接将数学公式对应成实际代码,而且可以修改代码、观察结果并及时获取经验; 22 | 1. 允许我们和整个社区不断快速迭代内容,从而紧跟仍在高速发展的深度学习领域; 23 | 1. 由包含有关技术细节问答的论坛作为补充,使大家可以相互答疑并交换经验。 24 | 25 |
将本书(中英文版)用作教材或参考书的大学
26 |

27 | 28 |

29 | 30 | 如果本书对你有帮助,请Star (★) 本仓库或引用本书的英文版: 31 | 32 | ``` 33 | @article{zhang2021dive, 34 | title={Dive into Deep Learning}, 35 | author={Zhang, Aston and Lipton, Zachary C. and Li, Mu and Smola, Alexander J.}, 36 | journal={arXiv preprint arXiv:2106.11342}, 37 | year={2021} 38 | } 39 | ``` 40 | 41 | ## 本书的英文版 42 | 43 | 虽然纸质书已出版,但深度学习领域依然在迅速发展。为了得到来自更广泛的英文开源社区的帮助,从而提升本书质量,本书的新版将继续用英文编写,并搬回中文版。 44 | 45 | 欢迎关注本书的[英文开源项目](https://github.com/d2l-ai/d2l-en)。 46 | 47 | ## 中英文教学资源 48 | 49 | 加州大学伯克利分校 2019 年春学期 [*Introduction to Deep Learning* 课程](http://courses.d2l.ai/berkeley-stat-157/index.html)教材(同时提供含教学视频地址的[中文版课件](https://github.com/d2l-ai/berkeley-stat-157/tree/master/slides-zh))。 50 | 51 | ## 学术界推荐 52 | 53 | >

"Dive into this book if you want to dive into deep learning!"

54 | > — 韩家炜,ACM 院士、IEEE 院士,美国伊利诺伊大学香槟分校计算机系 Michael Aiken Chair 教授 55 | 56 | >

"This is a highly welcome addition to the machine learning literature."

57 | > — Bernhard Schölkopf,ACM 院士、德国国家科学院院士,德国马克斯•普朗克研究所智能系统院院长 58 | 59 | >

"书中代码可谓‘所学即所用’。"

60 | > — 周志华,ACM 院士、IEEE 院士、AAAS 院士,南京大学计算机科学与技术系主任 61 | 62 | >

"这本书可以帮助深度学习实践者快速提升自己的能力。"

63 | > — 张潼,ASA 院士、IMS 院士,香港科技大学计算机系和数学系教授 64 | 65 | ## 工业界推荐 66 | 67 | >

"一本优秀的深度学习教材,值得任何想了解深度学习何以引爆人工智能革命的人关注。"

68 | > — 黄仁勋,NVIDIA创始人 & CEO 69 | 70 | >

"《动手学深度学习》是最适合工业界研发工程师学习的。我毫无保留地向广大的读者们强烈推荐。"

71 | > — 余凯,地平线公司创始人 & CEO 72 | 73 | >

"强烈推荐这本书!我特别赞赏这种手脑一体的学习方式。"

74 | > — 漆远,复旦大学“浩清”教授、人工智能创新与产业研究院院长 75 | 76 | >

"《动手学深度学习》是一本很容易让学习者上瘾的书。"

77 | > — 沈强,将门创投创始合伙人 78 | 79 | ## 贡献 80 | 81 | 感谢[社区贡献者们](https://github.com/d2l-ai/d2l-zh/graphs/contributors)为每一位读者改进这本开源书。 82 | 83 | [如何贡献](https://zh.d2l.ai/chapter_appendix-tools-for-deep-learning/contributing.html) | [致谢](https://zh.d2l.ai/chapter_preface/index.html) | [讨论或报告问题](https://discuss.d2l.ai/c/chinese-version/16) | [其他](INFO.md) 84 | -------------------------------------------------------------------------------- /TERMINOLOGY.md: -------------------------------------------------------------------------------- 1 | ## 英汉术语对照 2 | 3 | 鞍点,saddle point 4 | 5 | 变换,transform 6 | 7 | 编码器,encoder 8 | 9 | 标签,label 10 | 11 | 步幅,stride 12 | 13 | 参数,parameter 14 | 15 | 长短期记忆网络,long short-term memory (LSTM) 16 | 17 | 超参数,hyperparameter 18 | 19 | 层序softmax,hierarchical softmax 20 | 21 | 查准率,precision 22 | 23 | 成本,cost 24 | 25 | 词表,vocabulary 26 | 27 | 词嵌入,word embedding 28 | 29 | 词向量,word vector 30 | 31 | 词元,token 32 | 33 | 词元分析器,tokenizer 34 | 35 | 词元化,tokenize 36 | 37 | 汇聚层,pooling layer 38 | 39 | 稠密,dense 40 | 41 | 大小,size 42 | 43 | 导入,import 44 | 45 | 轮,epoch 46 | 47 | 暂退法,dropout 48 | 49 | 动量法,momentum (method) 50 | 51 | 独立同分布,independent and identically distributed (i.i.d.) 52 | 53 | 端到端,end-to-end 54 | 55 | 多层感知机,multilayer perceptron 56 | 57 | 多头注意力,multi-head attention 58 | 59 | 二元分类,binary classification 60 | 61 | 二元,bigram 62 | 63 | 子采样,subsample 64 | 65 | 发散,diverge 66 | 67 | 泛化,generalization 68 | 69 | 泛化误差,generalization error 70 | 71 | 方差,variance 72 | 73 | 分类,classification 74 | 75 | 分类器,classifier 76 | 77 | 负采样,negative sampling 78 | 79 | 感受野,receptive field 80 | 81 | 格拉姆矩阵,Gram matrix 82 | 83 | 共现,co-occurrence 84 | 85 | 广播,broadcast 86 | 87 | 规范化,normalization 88 | 89 | 过拟合,overfitting 90 | 91 | 核回归,kernel regression 92 | 93 | 恒等映射,identity mapping 94 | 95 | 假设,hypothesis 96 | 97 | 基准,baseline 98 | 99 | 激活函数,activation function 100 | 101 | 解码器,decoder 102 | 103 | 近似法,approximate method 104 | 105 | 经验风险最小化,empirical risk minimization 106 | 107 | 局部最小值,local minimum 108 | 109 | 卷积核,convolutional kernel 110 | 111 | 卷积神经网络,convolutional neural network 112 | 113 | 决策边界,decision boundary 114 | 115 | 均值,mean 116 | 117 | 均方误差,mean squared error 118 | 119 | 均匀采样,uniform sampling 120 | 121 | 块,block 122 | 123 | 困惑度,perplexity 124 | 125 | 拉普拉斯平滑,Laplace smoothing 126 | 127 | 连结,concatenate 128 | 129 | 类,class 130 | 131 | 交叉熵,cross-entropy 132 | 133 | 连续词袋,continous bag-of-words (CBOW) 134 | 135 | 零张量,zero tensor 136 | 137 | 流水线,pipeline 138 | 139 | 滤波器,filter 140 | 141 | 门控循环单元,gated recurrent units (GRU) 142 | 143 | 目标检测,object detection 144 | 145 | 偏置,bias 146 | 147 | 偏导数,partial derivative 148 | 149 | 偏移量,offset 150 | 151 | 批量,batch 152 | 153 | 齐普夫定律,Zipf's law 154 | 155 | 欠拟合,underfitting 156 | 157 | 情感分析,sentiment analysis 158 | 159 | 全连接层,fully-connected layer 160 | 161 | 权重,weight 162 | 163 | 三元,trigram 164 | 165 | 上采样,upsample 166 | 167 | 上下文变量,context variable 168 | 169 | 上下文窗口,context window 170 | 171 | 上下文词,context word 172 | 173 | 上下文向量,context vector 174 | 175 | 实例/示例,instance 176 | 177 | 收敛,converge 178 | 179 | 属性,property 180 | 181 | 数值方法,numerical method 182 | 183 | 数据集,dataset 184 | 185 | 数据示例,data instance 186 | 187 | 数据样例,data example 188 | 189 | 顺序分区,sequential partitioning 190 | 191 | softmax回归,softmax regression 192 | 193 | 随机采样,random sampling 194 | 195 | 损失函数,loss function 196 | 197 | 双向循环神经网络,bidirectional recurrent neural network 198 | 199 | 特征,feature 200 | 201 | 特征图,feature map 202 | 203 | 特征值,eigenvalue 204 | 205 | 梯度,gradient 206 | 207 | 梯度裁剪,gradient clipping 208 | 209 | 梯度消失,vanishing gradients 210 | 211 | 填充,padding 212 | 213 | 跳元模型,skip-gram model 214 | 215 | 调参,tune hyperparameter 216 | 217 | 停用词,stop words 218 | 219 | 通道,channel 220 | 221 | 凸优化,convex optimization 222 | 223 | 图像,image 224 | 225 | 未知词元,unknown token 226 | 227 | 无偏估计,unbiased estimate 228 | 229 | 误差,error 230 | 231 | 小批量,minibatch 232 | 233 | 小批量梯度,minibatch gradient 234 | 235 | 线性模型,linear model 236 | 237 | 线性回归,linear regression 238 | 239 | 协同过滤,collaborative filtering 240 | 241 | 学习率,learning rate 242 | 243 | 训练误差,training error 244 | 245 | 循环神经网络,recurrent neural network (RNN) 246 | 247 | 样例,example 248 | 249 | 一维梯度下降,gradient descent in one-dimensional space 250 | 251 | 一元,unigram 252 | 253 | 隐藏变量,hidden variable 254 | 255 | 隐藏层,hidden layer 256 | 257 | 优化器,optimizer 258 | 259 | 语料库,corpus 260 | 261 | 运算符,operator 262 | 263 | 自注意力,self-attention 264 | 265 | 真实值,ground truth 266 | 267 | 指标,metric 268 | 269 | 支持向量机,support vector machine 270 | 271 | 注意力机制,attention mechanism 272 | 273 | 注意力模型,attention model 274 | 275 | 注意力提示,attention cue 276 | 277 | 准确率/精度,accuracy 278 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/d2l.md: -------------------------------------------------------------------------------- 1 | # `d2l` API 文档 2 | :label:`sec_d2l` 3 | 4 | `d2l`包以下成员的实现及其定义和解释部分可在[源文件](https://github.com/d2l-ai/d2l-en/tree/master/d2l)中找到。 5 | 6 | 7 | :begin_tab:`mxnet` 8 | ```eval_rst 9 | .. currentmodule:: d2l.mxnet 10 | ``` 11 | :end_tab: 12 | 13 | :begin_tab:`pytorch` 14 | ```eval_rst 15 | .. currentmodule:: d2l.torch 16 | ``` 17 | :end_tab: 18 | 19 | :begin_tab:`tensorflow` 20 | ```eval_rst 21 | .. currentmodule:: d2l.torch 22 | ``` 23 | :end_tab: 24 | 25 | :begin_tab:`paddle` 26 | ```eval_rst 27 | .. currentmodule:: d2l.paddle 28 | ``` 29 | :end_tab: 30 | 31 | ## 模型 32 | 33 | ```eval_rst 34 | .. autoclass:: Module 35 | :members: 36 | 37 | .. autoclass:: LinearRegressionScratch 38 | :members: 39 | 40 | .. autoclass:: LinearRegression 41 | :members: 42 | 43 | .. autoclass:: Classification 44 | :members: 45 | ``` 46 | 47 | ## 数据 48 | 49 | ```eval_rst 50 | .. autoclass:: DataModule 51 | :members: 52 | 53 | .. autoclass:: SyntheticRegressionData 54 | :members: 55 | 56 | .. autoclass:: FashionMNIST 57 | :members: 58 | ``` 59 | 60 | ## 训练 61 | 62 | ```eval_rst 63 | .. autoclass:: Trainer 64 | :members: 65 | 66 | .. autoclass:: SGD 67 | :members: 68 | ``` 69 | 70 | ## 公用 71 | 72 | ```eval_rst 73 | .. autofunction:: add_to_class 74 | 75 | .. autofunction:: cpu 76 | 77 | .. autofunction:: gpu 78 | 79 | .. autofunction:: num_gpus 80 | 81 | .. autoclass:: ProgressBoard 82 | :members: 83 | 84 | .. autoclass:: HyperParameters 85 | :members: 86 | ``` 87 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/d2l_origin.md: -------------------------------------------------------------------------------- 1 | # `d2l` API Document 2 | :label:`sec_d2l` 3 | 4 | The implementations of the following members of the `d2l` package and sections where they are defined and explained can be found in the [source file](https://github.com/d2l-ai/d2l-en/tree/master/d2l). 5 | 6 | 7 | :begin_tab:`mxnet` 8 | 9 | ```eval_rst 10 | 11 | .. currentmodule:: d2l.mxnet 12 | 13 | ``` 14 | 15 | :end_tab: 16 | 17 | :begin_tab:`pytorch` 18 | 19 | ```eval_rst 20 | 21 | .. currentmodule:: d2l.torch 22 | 23 | ``` 24 | 25 | :begin_tab:`tensorflow` 26 | 27 | ```eval_rst 28 | 29 | .. currentmodule:: d2l.torch 30 | 31 | ``` 32 | 33 | :end_tab: 34 | 35 | ## Models 36 | 37 | ```eval_rst 38 | 39 | .. autoclass:: Module 40 | :members: 41 | 42 | .. autoclass:: LinearRegressionScratch 43 | :members: 44 | 45 | .. autoclass:: LinearRegression 46 | :members: 47 | 48 | .. autoclass:: Classification 49 | :members: 50 | 51 | ``` 52 | 53 | ## Data 54 | 55 | ```eval_rst 56 | 57 | .. autoclass:: DataModule 58 | :members: 59 | 60 | .. autoclass:: SyntheticRegressionData 61 | :members: 62 | 63 | .. autoclass:: FashionMNIST 64 | :members: 65 | 66 | ``` 67 | 68 | ## Trainer 69 | 70 | ```eval_rst 71 | 72 | .. autoclass:: Trainer 73 | :members: 74 | 75 | .. autoclass:: SGD 76 | :members: 77 | 78 | ``` 79 | 80 | ## Utilities 81 | 82 | ```eval_rst 83 | 84 | .. autofunction:: add_to_class 85 | 86 | .. autofunction:: cpu 87 | 88 | .. autofunction:: gpu 89 | 90 | .. autofunction:: num_gpus 91 | 92 | .. autoclass:: ProgressBoard 93 | :members: 94 | 95 | .. autoclass:: HyperParameters 96 | :members: 97 | 98 | ``` 99 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/index.md: -------------------------------------------------------------------------------- 1 | # 附录:深度学习工具 2 | :label:`chap_appendix_tools` 3 | 4 | 为了充分利用《动手学深度学习》,本书将在本附录中介绍不同工具, 5 | 例如如何运行这本交互式开源书籍和为本书做贡献。 6 | 7 | ```toc 8 | :maxdepth: 2 9 | 10 | jupyter 11 | sagemaker 12 | aws 13 | selecting-servers-gpus 14 | contributing 15 | d2l 16 | ``` 17 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/index_origin.md: -------------------------------------------------------------------------------- 1 | # Appendix: Tools for Deep Learning 2 | :label:`chap_appendix_tools` 3 | 4 | 5 | To get the most out of *Dive into Deep Learning*, 6 | we will talk you through different tools 7 | in this appendix, 8 | such as 9 | for running and contributing to this 10 | interactive open-source book. 11 | 12 | ```toc 13 | :maxdepth: 2 14 | 15 | jupyter 16 | sagemaker 17 | aws 18 | colab 19 | selecting-servers-gpus 20 | contributing 21 | utils 22 | d2l 23 | ``` 24 | 25 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/jupyter.md: -------------------------------------------------------------------------------- 1 | # 使用Jupyter Notebook 2 | :label:`sec_jupyter` 3 | 4 | 本节介绍如何使用Jupyter Notebook编辑和运行本书各章中的代码。确保你已按照 :ref:`chap_installation`中的说明安装了Jupyter并下载了代码。如果你想了解更多关于Jupyter的信息,请参阅其[文档](https://jupyter.readthedocs.io/en/latest/)中的优秀教程。 5 | 6 | ## 在本地编辑和运行代码 7 | 8 | 假设本书代码的本地路径为`xx/yy/d2l-en/`。使用shell将目录更改为此路径(`cd xx/yy/d2l-en`)并运行命令`jupyter notebook`。如果浏览器未自动打开,请打开http://localhost:8888。此时你将看到Jupyter的界面以及包含本书代码的所有文件夹,如 :numref:`fig_jupyter00`所示 9 | 10 | ![包含本书代码的文件夹](../img/jupyter00.png) 11 | :width:`600px` 12 | :label:`fig_jupyter00` 13 | 14 | 你可以通过单击网页上显示的文件夹来访问notebook文件。它们通常有后缀“.ipynb”。为了简洁起见,我们创建了一个临时的“test.ipynb”文件。单击后显示的内容如 :numref:`fig_jupyter01`所示。此notebook包括一个标记单元格和一个代码单元格。标记单元格中的内容包括“This Is a Title”和“This is text.”。代码单元包含两行Python代码。 15 | 16 | ![“test.ipynb”文件中的markdown和代码块](../img/jupyter01.png) 17 | :width:`600px` 18 | :label:`fig_jupyter01` 19 | 20 | 双击标记单元格以进入编辑模式。在单元格末尾添加一个新的文本字符串“Hello world.”,如 :numref:`fig_jupyter02`所示。 21 | 22 | ![编辑markdown单元格](../img/jupyter02.png) 23 | :width:`600px` 24 | :label:`fig_jupyter02` 25 | 26 | 如 :numref:`fig_jupyter03`所示,单击菜单栏中的“Cell” $\rightarrow$ “Run Cells”以运行编辑后的单元格。 27 | 28 | ![运行单元格](../img/jupyter03.png) 29 | :width:`600px` 30 | :label:`fig_jupyter03` 31 | 32 | 运行后,markdown单元格如 :numref:`fig_jupyter04`所示。 33 | 34 | ![编辑后的markdown单元格](../img/jupyter04.png) 35 | :width:`600px` 36 | :label:`fig_jupyter04` 37 | 38 | 接下来,单击代码单元。将最后一行代码后的元素乘以2,如 :numref:`fig_jupyter05`所示。 39 | 40 | ![编辑代码单元格](../img/jupyter05.png) 41 | :width:`600px` 42 | :label:`fig_jupyter05` 43 | 44 | 你还可以使用快捷键(默认情况下为Ctrl+Enter)运行单元格,并从 :numref:`fig_jupyter06`获取输出结果。 45 | 46 | ![运行代码单元格以获得输出](../img/jupyter06.png) 47 | :width:`600px` 48 | :label:`fig_jupyter06` 49 | 50 | 当一个notebook包含更多单元格时,我们可以单击菜单栏中的“Kernel”$\rightarrow$“Restart & Run All”来运行整个notebook中的所有单元格。通过单击菜单栏中的“Help”$\rightarrow$“Edit Keyboard Shortcuts”,可以根据你的首选项编辑快捷键。 51 | 52 | ## 高级选项 53 | 54 | 除了本地编辑,还有两件事非常重要:以markdown格式编辑notebook和远程运行Jupyter。当我们想要在更快的服务器上运行代码时,后者很重要。前者很重要,因为Jupyter原生的ipynb格式存储了大量辅助数据,这些数据实际上并不特定于notebook中的内容,主要与代码的运行方式和运行位置有关。这让git感到困惑,并且使得合并贡献非常困难。幸运的是,还有另一种选择——在markdown中进行本地编辑。 55 | 56 | ### Jupyter中的Markdown文件 57 | 58 | 如果你希望对本书的内容有所贡献,则需要在GitHub上修改源文件(md文件,而不是ipynb文件)。使用notedown插件,我们可以直接在Jupyter中修改md格式的notebook。 59 | 60 | 首先,安装notedown插件,运行Jupyter Notebook并加载插件: 61 | 62 | ``` 63 | pip install d2l-notedown # 你可能需要卸载原始notedown 64 | jupyter notebook --NotebookApp.contents_manager_class='notedown.NotedownContentsManager' 65 | ``` 66 | 67 | 要在运行Jupyter Notebook时默认打开notedown插件,请执行以下操作:首先,生成一个Jupyter Notebook配置文件(如果已经生成了,可以跳过此步骤)。 68 | 69 | ``` 70 | jupyter notebook --generate-config 71 | ``` 72 | 73 | 然后,在Jupyter Notebook配置文件的末尾添加以下行(对于Linux/macOS,通常位于`~/.jupyter/jupyter_notebook_config.py`): 74 | 75 | ``` 76 | c.NotebookApp.contents_manager_class = 'notedown.NotedownContentsManager' 77 | ``` 78 | 79 | 在这之后,你只需要运行`jupyter notebook`命令就可以默认打开notedown插件。 80 | 81 | ### 在远程服务器上运行Jupyter Notebook 82 | 83 | 有时,你可能希望在远程服务器上运行Jupyter Notebook,并通过本地计算机上的浏览器访问它。如果本地计算机上安装了Linux或MacOS(Windows也可以通过PuTTY等第三方软件支持此功能),则可以使用端口转发: 84 | 85 | ``` 86 | ssh myserver -L 8888:localhost:8888 87 | ``` 88 | 89 | 以上是远程服务器`myserver`的地址。然后我们可以使用http://localhost:8888 访问运行Jupyter Notebook的远程服务器`myserver`。下一节将详细介绍如何在AWS实例上运行Jupyter Notebook。 90 | 91 | ### 执行时间 92 | 93 | 我们可以使用`ExecuteTime`插件来计算Jupyter Notebook中每个代码单元的执行时间。使用以下命令安装插件: 94 | 95 | ``` 96 | pip install jupyter_contrib_nbextensions 97 | jupyter contrib nbextension install --user 98 | jupyter nbextension enable execute_time/ExecuteTime 99 | ``` 100 | 101 | ## 小结 102 | 103 | * 使用Jupyter Notebook工具,我们可以编辑、运行和为本书做贡献。 104 | * 使用端口转发在远程服务器上运行Jupyter Notebook。 105 | 106 | ## 练习 107 | 108 | 1. 在本地计算机上使用Jupyter Notebook编辑并运行本书中的代码。 109 | 1. 使用Jupyter Notebook通过端口转发来远程编辑和运行本书中的代码。 110 | 1. 对于两个方矩阵,测量$\mathbf{A}^\top \mathbf{B}$与$\mathbf{A} \mathbf{B}$在$\mathbb{R}^{1024 \times 1024}$中的运行时间。哪一个更快? 111 | 112 | [Discussions](https://discuss.d2l.ai/t/5731) 113 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/sagemaker.md: -------------------------------------------------------------------------------- 1 | # 使用Amazon SageMaker 2 | :label:`sec_sagemaker` 3 | 4 | 深度学习程序可能需要很多计算资源,这很容易超出你的本地计算机所能提供的范围。云计算服务允许你使用功能更强大的计算机更轻松地运行本书的GPU密集型代码。本节将介绍如何使用Amazon SageMaker运行本书的代码。 5 | 6 | ## 注册 7 | 8 | 首先,我们需要在注册一个帐户https://aws.amazon.com/。 为了增加安全性,鼓励使用双因素身份验证。设置详细的计费和支出警报也是一个好主意,以避免任何意外,例如,当忘记停止运行实例时。登录AWS帐户后,转到[console](http://console.aws.amazon.com/)并搜索“Amazon SageMaker”(参见 :numref:`fig_sagemaker`),然后单击它打开SageMaker面板。 9 | 10 | ![搜索并打开SageMaker面板](../img/sagemaker.png) 11 | :width:`300px` 12 | :label:`fig_sagemaker` 13 | 14 | ## 创建SageMaker实例 15 | 16 | 接下来,让我们创建一个notebook实例,如 :numref:`fig_sagemaker-create`所示。 17 | 18 | ![创建一个SageMaker实例](../img/sagemaker-create.png) 19 | :width:`400px` 20 | :label:`fig_sagemaker-create` 21 | 22 | SageMaker提供多个具有不同计算能力和价格的[实例类型](https://aws.amazon.com/sagemaker/pricing/instance-types/)。创建notebook实例时,可以指定其名称和类型。在 :numref:`fig_sagemaker-create-2`中,我们选择`ml.p3.2xlarge`:使用一个Tesla V100 GPU和一个8核CPU,这个实例的性能足够本书的大部分内容使用。 23 | 24 | ![选择实例类型](../img/sagemaker-create-2.png) 25 | :width:`400px` 26 | :label:`fig_sagemaker-create-2` 27 | 28 | :begin_tab:`mxnet` 29 | 用于与SageMaker一起运行的ipynb格式的整本书可从https://github.com/d2l-ai/d2l-en-sagemaker获得。 30 | 我们可以指定此GitHub存储库URL( :numref:`fig_sagemaker-create-3`),以允许SageMaker在创建实例时克隆它。 31 | :end_tab: 32 | 33 | :begin_tab:`pytorch` 34 | 用于与SageMaker一起运行的ipynb格式的整本书可从https://github.com/d2l-ai/d2l-pytorch-sagemaker获得。 35 | 我们可以指定此GitHub存储库URL( :numref:`fig_sagemaker-create-3`),以允许SageMaker在创建实例时克隆它。 36 | :end_tab: 37 | 38 | :begin_tab:`tensorflow` 39 | 用于与SageMaker一起运行的ipynb格式的整本书可从https://github.com/d2l-ai/d2l-tensorflow-sagemaker获得。 40 | 我们可以指定此GitHub存储库URL( :numref:`fig_sagemaker-create-3`),以允许SageMaker在创建实例时克隆它。 41 | :end_tab: 42 | 43 | ![指定GitHub存储库](../img/sagemaker-create-3.png) 44 | :width:`400px` 45 | :label:`fig_sagemaker-create-3` 46 | 47 | ## 运行和停止实例 48 | 49 | 创建实例可能需要几分钟的时间。当实例准备就绪时,单击它旁边的“Open Jupyter”链接( :numref:`fig_sagemaker-open`),以便你可以在此实例上编辑并运行本书的所有Jupyter Notebook(类似于 :numref:`sec_jupyter`中的步骤)。 50 | 51 | ![在创建的SageMaker实例上打开Jupyter](../img/sagemaker-open.png) 52 | :width:`400px` 53 | :label:`fig_sagemaker-open` 54 | 55 | 完成工作后,不要忘记停止实例以避免进一步收费( :numref:`fig_sagemaker-stop`)。 56 | 57 | ![停止SageMaker实例](../img/sagemaker-stop.png) 58 | :width:`300px` 59 | :label:`fig_sagemaker-stop` 60 | 61 | ## 更新Notebook 62 | 63 | :begin_tab:`mxnet` 64 | 这本开源书的notebook将定期在GitHub上的[d2l-ai/d2l-en-sagemaker](https://github.com/d2l-ai/d2l-en-sagemaker)存储库中更新。要更新至最新版本,你可以在SageMaker实例( :numref:`fig_sagemaker-terminal`)上打开终端。 65 | :end_tab: 66 | 67 | :begin_tab:`pytorch` 68 | 这本开源书的notebook将定期在GitHub上的[d2l-ai/d2l-pytorch-sagemaker](https://github.com/d2l-ai/d2l-pytorch-sagemaker)存储库中更新。要更新至最新版本,你可以在SageMaker实例( :numref:`fig_sagemaker-terminal`)上打开终端。 69 | :end_tab: 70 | 71 | :begin_tab:`tensorflow` 72 | 这本开源书的notebook将定期在GitHub上的[d2l-ai/d2l-tensorflow-sagemaker](https://github.com/d2l-ai/d2l-tensorflow-sagemaker)存储库中更新。要更新至最新版本,你可以在SageMaker实例( :numref:`fig_sagemaker-terminal`)上打开终端。 73 | :end_tab: 74 | 75 | ![在SageMaker实例上打开终端](../img/sagemaker-terminal.png) 76 | :width:`300px` 77 | :label:`fig_sagemaker-terminal` 78 | 79 | 你可能希望在从远程存储库提取更新之前提交本地更改。否则,只需在终端中使用以下命令放弃所有本地更改: 80 | 81 | :begin_tab:`mxnet` 82 | 83 | ```bash 84 | cd SageMaker/d2l-en-sagemaker/ 85 | git reset --hard 86 | git pull 87 | ``` 88 | 89 | 90 | :end_tab: 91 | 92 | :begin_tab:`pytorch` 93 | 94 | ```bash 95 | cd SageMaker/d2l-pytorch-sagemaker/ 96 | git reset --hard 97 | git pull 98 | ``` 99 | 100 | 101 | :end_tab: 102 | 103 | :begin_tab:`tensorflow` 104 | 105 | ```bash 106 | cd SageMaker/d2l-tensorflow-sagemaker/ 107 | git reset --hard 108 | git pull 109 | ``` 110 | 111 | 112 | :end_tab: 113 | 114 | ## 小结 115 | 116 | * 我们可以使用Amazon SageMaker创建一个GPU的notebook实例来运行本书的密集型代码。 117 | * 我们可以通过Amazon SageMaker实例上的终端更新notebooks。 118 | 119 | ## 练习 120 | 121 | 1. 使用Amazon SageMaker编辑并运行任何需要GPU的部分。 122 | 1. 打开终端以访问保存本书所有notebooks的本地目录。 123 | 124 | [Discussions](https://discuss.d2l.ai/t/5732) 125 | -------------------------------------------------------------------------------- /chapter_appendix-tools-for-deep-learning/selecting-servers-gpus.md: -------------------------------------------------------------------------------- 1 | # 选择服务器和GPU 2 | :label:`sec_buy_gpu` 3 | 4 | 深度学习训练通常需要大量的计算。目前,GPU是深度学习最具成本效益的硬件加速器。与CPU相比,GPU更便宜,性能更高,通常超过一个数量级。此外,一台服务器可以支持多个GPU,高端服务器最多支持8个GPU。更典型的数字是工程工作站最多4个GPU,这是因为热量、冷却和电源需求会迅速增加,超出办公楼所能支持的范围。对于更大的部署,云计算(例如亚马逊的[P3](https://aws.amazon.com/ec2/instance-types/p3/)和[G4](https://aws.amazon.com/blogs/aws/in-the-works-ec2-instances-g4-with-nvidia-t4-gpus/)实例)是一个更实用的解决方案。 5 | 6 | ## 选择服务器 7 | 8 | 通常不需要购买具有多个线程的高端CPU,因为大部分计算都发生在GPU上。这就是说,由于Python中的全局解释器锁(GIL),CPU的单线程性能在有4-8个GPU的情况下可能很重要。所有的条件都是一样的,这意味着核数较少但时钟频率较高的CPU可能是更经济的选择。例如,当在6核4GHz和8核3.5GHz CPU之间进行选择时,前者更可取,即使其聚合速度较低。一个重要的考虑因素是,GPU使用大量的电能,从而释放大量的热量。这需要非常好的冷却和足够大的机箱来容纳GPU。如有可能,请遵循以下指南: 9 | 10 | 1. **电源**。GPU使用大量的电源。每个设备预计高达350W(检查显卡的*峰值需求*而不是一般需求,因为高效代码可能会消耗大量能源)。如果电源不能满足需求,系统会变得不稳定。 11 | 1. **机箱尺寸**。GPU很大,辅助电源连接器通常需要额外的空间。此外,大型机箱更容易冷却。 12 | 1. **GPU散热**。如果有大量的GPU,可能需要投资水冷。此外,即使风扇较少,也应以“公版设计”为目标,因为它们足够薄,可以在设备之间进气。当使用多风扇GPU,安装多个GPU时,它可能太厚而无法获得足够的空气。 13 | 1. **PCIe插槽**。在GPU之间来回移动数据(以及在GPU之间交换数据)需要大量带宽。建议使用16通道的PCIe 3.0插槽。当安装了多个GPU时,请务必仔细阅读主板说明,以确保在同时使用多个GPU时16$\times$带宽仍然可用,并且使用的是PCIe3.0,而不是用于附加插槽的PCIe2.0。在安装多个GPU的情况下,一些主板的带宽降级到8$\times$甚至4$\times$。这部分是由于CPU提供的PCIe通道数量限制。 14 | 15 | 简而言之,以下是构建深度学习服务器的一些建议。 16 | 17 | * **初学者**。购买低功耗的低端GPU(适合深度学习的廉价游戏GPU,功耗150-200W)。如果幸运的话,大家现在常用的计算机将支持它。 18 | * **1个GPU**。一个4核的低端CPU就足够了,大多数主板也足够了。以至少32 GB的DRAM为目标,投资SSD进行本地数据访问。600W的电源应足够。买一个有很多风扇的GPU。 19 | * **2个GPU**。一个4-6核的低端CPU就足够了。可以考虑64 GB的DRAM并投资于SSD。两个高端GPU将需要1000瓦的功率。对于主板,请确保它们具有*两个*PCIe 3.0 x16插槽。如果可以,请使用PCIe 3.0 x16插槽之间有两个可用空间(60毫米间距)的主板,以提供额外的空气。在这种情况下,购买两个具有大量风扇的GPU。 20 | * **4个GPU**。确保购买的CPU具有相对较快的单线程速度(即较高的时钟频率)。可能需要具有更多PCIe通道的CPU,例如AMD Threadripper。可能需要相对昂贵的主板才能获得4个PCIe 3.0 x16插槽,因为它们可能需要一个PLX来多路复用PCIe通道。购买带有公版设计的GPU,这些GPU很窄,并且让空气进入GPU之间。需要一个1600-2000W的电源,而办公室的插座可能不支持。此服务器可能在运行时*声音很大,很热*。不想把它放在桌子下面。建议使用128 GB的DRAM。获取一个用于本地存储的SSD(1-2 TB NVMe)和RAID配置的硬盘来存储数据。 21 | * **8 GPU**。需要购买带有多个冗余电源的专用多GPU服务器机箱(例如,每个电源为1600W时为2+1)。这将需要双插槽服务器CPU、256 GB ECC DRAM、快速网卡(建议使用10 GBE),并且需要检查服务器是否支持GPU的*物理外形*。用户GPU和服务器GPU之间的气流和布线位置存在显著差异(例如RTX 2080和Tesla V100)。这意味着可能无法在服务器中安装消费级GPU,因为电源线间隙不足或缺少合适的接线(本书一位合著者痛苦地发现了这一点)。 22 | 23 | ## 选择GPU 24 | 25 | 目前,AMD和NVIDIA是专用GPU的两大主要制造商。NVIDIA是第一个进入深度学习领域的公司,通过CUDA为深度学习框架提供更好的支持。因此,大多数买家选择NVIDIA GPU。 26 | 27 | NVIDIA提供两种类型的GPU,针对个人用户(例如,通过GTX和RTX系列)和企业用户(通过其Tesla系列)。这两种类型的GPU提供了相当的计算能力。但是,企业用户GPU通常使用强制(被动)冷却、更多内存和ECC(纠错)内存。这些GPU更适用于数据中心,通常成本是消费者GPU的十倍。 28 | 29 | 如果是一个拥有100个服务器的大公司,则应该考虑英伟达Tesla系列,或者在云中使用GPU服务器。对于实验室或10+服务器的中小型公司,英伟达RTX系列可能是最具成本效益的,可以购买超微或华硕机箱的预配置服务器,这些服务器可以有效地容纳4-8个GPU。 30 | 31 | GPU供应商通常每一到两年发布一代,例如2017年发布的GTX 1000(Pascal)系列和2019年发布的RTX 2000(Turing)系列。每个系列都提供几种不同的型号,提供不同的性能级别。GPU性能主要是以下三个参数的组合: 32 | 33 | 1. **计算能力**。通常大家会追求32位浮点计算能力。16位浮点训练(FP16)也进入主流。如果只对预测感兴趣,还可以使用8位整数。最新一代图灵GPU提供4-bit加速。不幸的是,目前训练低精度网络的算法还没有普及; 34 | 1. **内存大小**。随着模型变大或训练期间使用的批量变大,将需要更多的GPU内存。检查HBM2(高带宽内存)与GDDR6(图形DDR)内存。HBM2速度更快,但成本更高; 35 | 1. **内存带宽**。当有足够的内存带宽时,才能最大限度地利用计算能力。如果使用GDDR6,请追求宽内存总线。 36 | 37 | 对于大多数用户,只需看看计算能力就足够了。请注意,许多GPU提供不同类型的加速。例如,NVIDIA的Tensor Cores将操作符子集的速度提高了5$\times$。确保所使用的库支持这一点。GPU内存应不小于4GB(8GB更好)。尽量避免将GPU也用于显示GUI(改用内置显卡)。如果无法避免,请添加额外的2GB RAM以确保安全。 38 | 39 | :numref:`fig_flopsvsprice`比较了各种GTX 900、GTX 1000和RTX 2000系列的(GFlops)和价格(Price)。价格是维基百科上的建议价格。 40 | 41 | ![浮点计算能力和价格比较](../img/flopsvsprice.svg) 42 | :label:`fig_flopsvsprice` 43 | 44 | 由上图,可以看出很多事情: 45 | 46 | 1. 在每个系列中,价格和性能大致成比例。Titan因拥有大GPU内存而有相当的溢价。然而,通过比较980 Ti和1080 Ti可以看出,较新型号具有更好的成本效益。RTX 2000系列的价格似乎没有多大提高。然而,它们提供了更优秀的低精度性能(FP16、INT8和INT4); 47 | 2. GTX 1000系列的性价比大约是900系列的两倍; 48 | 3. 对于RTX 2000系列,浮点计算能力是价格的“仿射”函数。 49 | 50 | ![浮点计算能力和能耗](../img/wattvsprice.svg) 51 | :label:`fig_wattvsprice` 52 | 53 | :numref:`fig_wattvsprice`显示了能耗与计算量基本成线性关系。其次,后一代更有效率。这似乎与对应于RTX 2000系列的图表相矛盾。然而,这是TensorCore不成比例的大能耗的结果。 54 | 55 | ## 小结 56 | 57 | * 在构建服务器时,请注意电源、PCIe总线通道、CPU单线程速度和散热。 58 | * 如果可能,应该购买最新一代的GPU。 59 | * 使用云进行大型部署。 60 | * 高密度服务器可能不与所有GPU兼容。在购买之前,请检查一下机械和散热规格。 61 | * 为提高效率,请使用FP16或更低的精度。 62 | -------------------------------------------------------------------------------- /chapter_attention-mechanisms/index.md: -------------------------------------------------------------------------------- 1 | # 注意力机制 2 | :label:`chap_attention` 3 | 4 | 灵长类动物的视觉系统接受了大量的感官输入, 5 | 这些感官输入远远超过了大脑能够完全处理的程度。 6 | 然而,并非所有刺激的影响都是相等的。 7 | 意识的聚集和专注使灵长类动物能够在复杂的视觉环境中将注意力引向感兴趣的物体,例如猎物和天敌。 8 | 只关注一小部分信息的能力对进化更加有意义,使人类得以生存和成功。 9 | 10 | 自19世纪以来,科学家们一直致力于研究认知神经科学领域的注意力。 11 | 本章的很多章节将涉及到一些研究。 12 | 13 | 首先回顾一个经典注意力框架,解释如何在视觉场景中展开注意力。 14 | 受此框架中的*注意力提示*(attention cues)的启发, 15 | 我们将设计能够利用这些注意力提示的模型。 16 | 1964年的Nadaraya-Waston核回归(kernel regression)正是具有 17 | *注意力机制*(attention mechanism)的机器学习的简单演示。 18 | 19 | 然后继续介绍的是注意力函数,它们在深度学习的注意力模型设计中被广泛使用。 20 | 具体来说,我们将展示如何使用这些函数来设计*Bahdanau注意力*。 21 | Bahdanau注意力是深度学习中的具有突破性价值的注意力模型,它双向对齐并且可以微分。 22 | 23 | 最后将描述仅仅基于注意力机制的*Transformer*架构, 24 | 该架构中使用了*多头注意力*(multi-head attention) 25 | 和*自注意力*(self-attention)。 26 | 自2017年横空出世,Transformer一直都普遍存在于现代的深度学习应用中, 27 | 例如语言、视觉、语音和强化学习领域。 28 | 29 | ```toc 30 | :maxdepth: 2 31 | 32 | attention-cues 33 | nadaraya-waston 34 | attention-scoring-functions 35 | bahdanau-attention 36 | multihead-attention 37 | self-attention-and-positional-encoding 38 | transformer 39 | ``` 40 | -------------------------------------------------------------------------------- /chapter_attention-mechanisms/index_origin.md: -------------------------------------------------------------------------------- 1 | # Attention Mechanisms 2 | :label:`chap_attention` 3 | 4 | The optic nerve of a primate's visual system 5 | receives massive sensory input, 6 | far exceeding what the brain can fully process. 7 | Fortunately, 8 | not all stimuli are created equal. 9 | Focalization and concentration of consciousness 10 | have enabled primates to direct attention 11 | to objects of interest, 12 | such as preys and predators, 13 | in the complex visual environment. 14 | The ability of paying attention to 15 | only a small fraction of the information 16 | has evolutionary significance, 17 | allowing human beings 18 | to live and succeed. 19 | 20 | Scientists have been studying attention 21 | in the cognitive neuroscience field 22 | since the 19th century. 23 | In this chapter, 24 | we will begin by reviewing a popular framework 25 | explaining how attention is deployed in a visual scene. 26 | Inspired by the attention cues in this framework, 27 | we will design models 28 | that leverage such attention cues. 29 | Notably, the Nadaraya-Waston kernel regression 30 | in 1964 is a simple demonstration of machine learning with *attention mechanisms*. 31 | 32 | Next, we will go on to introduce attention functions 33 | that have been extensively used in 34 | the design of attention models in deep learning. 35 | Specifically, 36 | we will show how to use these functions 37 | to design the *Bahdanau attention*, 38 | a groundbreaking attention model in deep learning 39 | that can align bidirectionally and is differentiable. 40 | 41 | In the end, 42 | equipped with 43 | the more recent 44 | *multi-head attention* 45 | and *self-attention* designs, 46 | we will describe the *Transformer* architecture 47 | based solely on attention mechanisms. 48 | Since their proposal in 2017, 49 | Transformers 50 | have been pervasive in modern 51 | deep learning applications, 52 | such as in areas of 53 | language, 54 | vision, speech, 55 | and reinforcement learning. 56 | 57 | ```toc 58 | :maxdepth: 2 59 | 60 | attention-cues 61 | nadaraya-waston 62 | attention-scoring-functions 63 | bahdanau-attention 64 | multihead-attention 65 | self-attention-and-positional-encoding 66 | transformer 67 | ``` 68 | 69 | -------------------------------------------------------------------------------- /chapter_computational-performance/index.md: -------------------------------------------------------------------------------- 1 | # 计算性能 2 | :label:`chap_performance` 3 | 4 | 在深度学习中,数据集和模型通常都很大,导致计算量也会很大。 5 | 因此,计算的性能非常重要。 6 | 本章将集中讨论影响计算性能的主要因素:命令式编程、符号编程、 7 | 异步计算、自动并行和多GPU计算。 8 | 通过学习本章,对于前几章中实现的那些模型,可以进一步提高它们的计算性能。 9 | 例如,我们可以在不影响准确性的前提下,大大减少训练时间。 10 | 11 | ```toc 12 | :maxdepth: 2 13 | 14 | hybridize 15 | async-computation 16 | auto-parallelism 17 | hardware 18 | multiple-gpus 19 | multiple-gpus-concise 20 | parameterserver 21 | ``` 22 | -------------------------------------------------------------------------------- /chapter_computational-performance/index_origin.md: -------------------------------------------------------------------------------- 1 | # Computational Performance 2 | :label:`chap_performance` 3 | 4 | In deep learning, 5 | datasets and models are usually large, 6 | which involves heavy computation. 7 | Therefore, computational performance matters a lot. 8 | This chapter will focus on the major factors that affect computational performance: 9 | imperative programming, symbolic programming, asynchronous computing, automatic parallellism, and multi-GPU computation. 10 | By studying this chapter, you may further improve computational performance of those models implemented in the previous chapters, 11 | for example, by reducing training time without affecting accuracy. 12 | 13 | ```toc 14 | :maxdepth: 2 15 | 16 | hybridize 17 | async-computation 18 | auto-parallelism 19 | hardware 20 | multiple-gpus 21 | multiple-gpus-concise 22 | parameterserver 23 | ``` 24 | -------------------------------------------------------------------------------- /chapter_computer-vision/index.md: -------------------------------------------------------------------------------- 1 | # 计算机视觉 2 | :label:`chap_cv` 3 | 4 | 近年来,深度学习一直是提高计算机视觉系统性能的变革力量。 5 | 无论是医疗诊断、自动驾驶,还是智能滤波器、摄像头监控,许多计算机视觉领域的应用都与我们当前和未来的生活密切相关。 6 | 可以说,最先进的计算机视觉应用与深度学习几乎是不可分割的。 7 | 有鉴于此,本章将重点介绍计算机视觉领域,并探讨最近在学术界和行业中具有影响力的方法和应用。 8 | 9 | 在 :numref:`chap_cnn`和 :numref:`chap_modern_cnn`中,我们研究了计算机视觉中常用的各种卷积神经网络,并将它们应用到简单的图像分类任务中。 10 | 本章开头,我们将介绍两种可以改进模型泛化的方法,即*图像增广*和*微调*,并将它们应用于图像分类。 11 | 由于深度神经网络可以有效地表示多个层次的图像,因此这种分层表示已成功用于各种计算机视觉任务,例如*目标检测*(object detection)、*语义分割*(semantic segmentation)和*样式迁移*(style transfer)。 12 | 秉承计算机视觉中利用分层表示的关键思想,我们将从物体检测的主要组件和技术开始,继而展示如何使用*完全卷积网络*对图像进行语义分割,然后我们将解释如何使用样式迁移技术来生成像本书封面一样的图像。 13 | 最后在结束本章时,我们将本章和前几章的知识应用于两个流行的计算机视觉基准数据集。 14 | 15 | ```toc 16 | :maxdepth: 2 17 | 18 | image-augmentation 19 | fine-tuning 20 | bounding-box 21 | anchor 22 | multiscale-object-detection 23 | object-detection-dataset 24 | ssd 25 | rcnn 26 | semantic-segmentation-and-dataset 27 | transposed-conv 28 | fcn 29 | neural-style 30 | kaggle-cifar10 31 | kaggle-dog 32 | ``` 33 | -------------------------------------------------------------------------------- /chapter_computer-vision/index_origin.md: -------------------------------------------------------------------------------- 1 | # Computer Vision 2 | :label:`chap_cv` 3 | 4 | Whether it is medical diagnosis, self-driving vehicles, camera monitoring, or smart filters, many applications in the field of computer vision are closely related to our current and future lives. 5 | In recent years, deep learning has been 6 | the transformative power for advancing the performance of computer vision systems. 7 | It can be said that the most advanced computer vision applications are almost inseparable from deep learning. 8 | In view of this, this chapter will focus on the field of computer vision, and investigate methods and applications that have recently been influential in academia and industry. 9 | 10 | 11 | In :numref:`chap_cnn` and :numref:`chap_modern_cnn`, we studied various convolutional neural networks that are 12 | commonly used in computer vision, and applied them 13 | to simple image classification tasks. 14 | At the beginning of this chapter, we will describe 15 | two methods that 16 | may improve model generalization, namely *image augmentation* and *fine-tuning*, 17 | and apply them to image classification. 18 | Since deep neural networks can effectively represent images in multiple levels, 19 | such layerwise representations have been successfully 20 | used in various computer vision tasks such as *object detection*, *semantic segmentation*, and *style transfer*. 21 | Following the key idea of leveraging layerwise representations in computer vision, 22 | we will begin with major components and techniques for object detection. Next, we will show how to use *fully convolutional networks* for semantic segmentation of images. Then we will explain how to use style transfer techniques to generate images like the cover of this book. 23 | In the end, we conclude this chapter 24 | by applying the materials of this chapter and several previous chapters on two popular computer vision benchmark datasets. 25 | 26 | ```toc 27 | :maxdepth: 2 28 | 29 | image-augmentation 30 | fine-tuning 31 | bounding-box 32 | anchor 33 | multiscale-object-detection 34 | object-detection-dataset 35 | ssd 36 | rcnn 37 | semantic-segmentation-and-dataset 38 | transposed-conv 39 | fcn 40 | neural-style 41 | kaggle-cifar10 42 | kaggle-dog 43 | ``` 44 | 45 | -------------------------------------------------------------------------------- /chapter_convolutional-modern/index.md: -------------------------------------------------------------------------------- 1 | # 现代卷积神经网络 2 | :label:`chap_modern_cnn` 3 | 4 | 上一章我们介绍了卷积神经网络的基本原理,本章将介绍现代的卷积神经网络架构,许多现代卷积神经网络的研究都是建立在这一章的基础上的。 5 | 在本章中的每一个模型都曾一度占据主导地位,其中许多模型都是ImageNet竞赛的优胜者。ImageNet竞赛自2010年以来,一直是计算机视觉中监督学习进展的指向标。 6 | 7 | 这些模型包括: 8 | 9 | - AlexNet。它是第一个在大规模视觉竞赛中击败传统计算机视觉模型的大型神经网络; 10 | - 使用重复块的网络(VGG)。它利用许多重复的神经网络块; 11 | - 网络中的网络(NiN)。它重复使用由卷积层和$1\times 1$卷积层(用来代替全连接层)来构建深层网络; 12 | - 含并行连结的网络(GoogLeNet)。它使用并行连结的网络,通过不同窗口大小的卷积层和最大汇聚层来并行抽取信息; 13 | - 残差网络(ResNet)。它通过残差块构建跨层的数据通道,是计算机视觉中最流行的体系架构; 14 | - 稠密连接网络(DenseNet)。它的计算成本很高,但给我们带来了更好的效果。 15 | 16 | 虽然深度神经网络的概念非常简单——将神经网络堆叠在一起。但由于不同的网络架构和超参数选择,这些神经网络的性能会发生很大变化。 17 | 本章介绍的神经网络是将人类直觉和相关数学见解结合后,经过大量研究试错后的结晶。 18 | 我们会按时间顺序介绍这些模型,在追寻历史的脉络的同时,帮助培养对该领域发展的直觉。这将有助于研究开发自己的架构。 19 | 例如,本章介绍的批量规范化(batch normalization)和残差网络(ResNet)为设计和训练深度神经网络提供了重要思想指导。 20 | 21 | ```toc 22 | :maxdepth: 2 23 | 24 | alexnet 25 | vgg 26 | nin 27 | googlenet 28 | batch-norm 29 | resnet 30 | densenet 31 | ``` 32 | -------------------------------------------------------------------------------- /chapter_convolutional-modern/index_origin.md: -------------------------------------------------------------------------------- 1 | # Modern Convolutional Neural Networks 2 | :label:`chap_modern_cnn` 3 | 4 | Now that we understand the basics of wiring together CNNs, 5 | we will take you through a tour of modern CNN architectures. 6 | In this chapter, each section corresponds 7 | to a significant CNN architecture that was 8 | at some point (or currently) the base model 9 | upon which many research projects and deployed systems were built. 10 | Each of these networks was briefly a dominant architecture 11 | and many were winners or runners-up in the ImageNet competition, 12 | which has served as a barometer of progress 13 | on supervised learning in computer vision since 2010. 14 | 15 | These models include AlexNet, the first large-scale network deployed 16 | to beat conventional computer vision methods on a large-scale vision challenge; 17 | the VGG network, which makes use of a number of repeating blocks of elements; the network in network (NiN) which convolves 18 | whole neural networks patch-wise over inputs; 19 | GoogLeNet, which uses networks with parallel concatenations; 20 | residual networks (ResNet), which remain the most popular 21 | off-the-shelf architecture in computer vision; 22 | and densely connected networks (DenseNet), 23 | which are expensive to compute but have set some recent benchmarks. 24 | 25 | While the idea of *deep* neural networks is quite simple 26 | (stack together a bunch of layers), 27 | performance can vary wildly across architectures and hyperparameter choices. 28 | The neural networks described in this chapter 29 | are the product of intuition, a few mathematical insights, 30 | and a whole lot of trial and error. 31 | We present these models in chronological order, 32 | partly to convey a sense of the history 33 | so that you can form your own intuitions 34 | about where the field is heading 35 | and perhaps develop your own architectures. 36 | For instance, 37 | batch normalization and residual connections described in this chapter have offered two popular ideas for training and designing deep models. 38 | 39 | ```toc 40 | :maxdepth: 2 41 | 42 | alexnet 43 | vgg 44 | nin 45 | googlenet 46 | batch-norm 47 | resnet 48 | densenet 49 | ``` 50 | 51 | -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/index.md: -------------------------------------------------------------------------------- 1 | # 卷积神经网络 2 | :label:`chap_cnn` 3 | 4 | 在前面的章节中,我们遇到过图像数据。 5 | 这种数据的每个样本都由一个二维像素网格组成, 6 | 每个像素可能是一个或者多个数值,取决于是黑白还是彩色图像。 7 | 到目前为止,我们处理这类结构丰富的数据的方式还不够有效。 8 | 我们仅仅通过将图像数据展平成一维向量而忽略了每个图像的空间结构信息,再将数据送入一个全连接的多层感知机中。 9 | 因为这些网络特征元素的顺序是不变的,因此最优的结果是利用先验知识,即利用相近像素之间的相互关联性,从图像数据中学习得到有效的模型。 10 | 11 | 本章介绍的*卷积神经网络*(convolutional neural network,CNN)是一类强大的、为处理图像数据而设计的神经网络。 12 | 基于卷积神经网络架构的模型在计算机视觉领域中已经占主导地位,当今几乎所有的图像识别、目标检测或语义分割相关的学术竞赛和商业应用都以这种方法为基础。 13 | 14 | 现代卷积神经网络的设计得益于生物学、群论和一系列的补充实验。 15 | 卷积神经网络需要的参数少于全连接架构的网络,而且卷积也很容易用GPU并行计算。 16 | 因此卷积神经网络除了能够高效地采样从而获得精确的模型,还能够高效地计算。 17 | 久而久之,从业人员越来越多地使用卷积神经网络。即使在通常使用循环神经网络的一维序列结构任务上(例如音频、文本和时间序列分析),卷积神经网络也越来越受欢迎。 18 | 通过对卷积神经网络一些巧妙的调整,也使它们在图结构数据和推荐系统中发挥作用。 19 | 20 | 在本章的开始,我们将介绍构成所有卷积网络主干的基本元素。 21 | 这包括卷积层本身、填充(padding)和步幅(stride)的基本细节、用于在相邻区域汇聚信息的汇聚层(pooling)、在每一层中多通道(channel)的使用,以及有关现代卷积网络架构的仔细讨论。 22 | 在本章的最后,我们将介绍一个完整的、可运行的LeNet模型:这是第一个成功应用的卷积神经网络,比现代深度学习兴起时间还要早。 23 | 在下一章中,我们将深入研究一些流行的、相对较新的卷积神经网络架构的完整实现,这些网络架构涵盖了现代从业者通常使用的大多数经典技术。 24 | 25 | ```toc 26 | :maxdepth: 2 27 | 28 | why-conv 29 | conv-layer 30 | padding-and-strides 31 | channels 32 | pooling 33 | lenet 34 | ``` 35 | -------------------------------------------------------------------------------- /chapter_convolutional-neural-networks/index_origin.md: -------------------------------------------------------------------------------- 1 | # Convolutional Neural Networks 2 | :label:`chap_cnn` 3 | 4 | In earlier chapters, we came up against image data, 5 | for which each example consists of a two-dimensional grid of pixels. 6 | Depending on whether we are handling black-and-white or color images, 7 | each pixel location might be associated with either 8 | one or multiple numerical values, respectively. 9 | Until now, our way of dealing with this rich structure 10 | was deeply unsatisfying. 11 | We simply discarded each image's spatial structure 12 | by flattening them into one-dimensional vectors, feeding them 13 | through a fully-connected MLP. 14 | Because these networks are invariant to the order 15 | of the features, 16 | we could get similar results 17 | regardless of whether we preserve an order 18 | corresponding to the spatial structure of the pixels 19 | or if we permute the columns of our design matrix 20 | before fitting the MLP's parameters. 21 | Preferably, we would leverage our prior knowledge 22 | that nearby pixels are typically related to each other, 23 | to build efficient models for learning from image data. 24 | 25 | This chapter introduces *convolutional neural networks* (CNNs), 26 | a powerful family of neural networks 27 | that are designed for precisely this purpose. 28 | CNN-based architectures are now ubiquitous 29 | in the field of computer vision, 30 | and have become so dominant 31 | that hardly anyone today would develop 32 | a commercial application or enter a competition 33 | related to image recognition, object detection, 34 | or semantic segmentation, 35 | without building off of this approach. 36 | 37 | Modern CNNs, as they are called colloquially 38 | owe their design to inspirations from biology, group theory, 39 | and a healthy dose of experimental tinkering. 40 | In addition to their sample efficiency in achieving accurate models, 41 | CNNs tend to be computationally efficient, 42 | both because they require fewer parameters than fully-connected architectures 43 | and because convolutions are easy to parallelize across GPU cores. 44 | Consequently, practitioners often apply CNNs whenever possible, 45 | and increasingly they have emerged as credible competitors 46 | even on tasks with a one-dimensional sequence structure, 47 | such as audio, text, and time series analysis, 48 | where recurrent neural networks are conventionally used. 49 | Some clever adaptations of CNNs have also brought them to bear 50 | on graph-structured data and in recommender systems. 51 | 52 | First, we will walk through the basic operations 53 | that comprise the backbone of all convolutional networks. 54 | These include the convolutional layers themselves, 55 | nitty-gritty details including padding and stride, 56 | the pooling layers used to aggregate information 57 | across adjacent spatial regions, 58 | the use of multiple channels at each layer, 59 | and a careful discussion of the structure of modern architectures. 60 | We will conclude the chapter with a full working example of LeNet, 61 | the first convolutional network successfully deployed, 62 | long before the rise of modern deep learning. 63 | In the next chapter, we will dive into full implementations 64 | of some popular and comparatively recent CNN architectures 65 | whose designs represent most of the techniques 66 | commonly used by modern practitioners. 67 | 68 | ```toc 69 | :maxdepth: 2 70 | 71 | why-conv 72 | conv-layer 73 | padding-and-strides 74 | channels 75 | pooling 76 | lenet 77 | ``` 78 | 79 | -------------------------------------------------------------------------------- /chapter_deep-learning-computation/deferred-init.md: -------------------------------------------------------------------------------- 1 | # 延后初始化 2 | :label:`sec_deferred_init` 3 | 4 | 到目前为止,我们忽略了建立网络时需要做的以下这些事情: 5 | 6 | * 我们定义了网络架构,但没有指定输入维度。 7 | * 我们添加层时没有指定前一层的输出维度。 8 | * 我们在初始化参数时,甚至没有足够的信息来确定模型应该包含多少参数。 9 | 10 | 有些读者可能会对我们的代码能运行感到惊讶。 11 | 毕竟,深度学习框架无法判断网络的输入维度是什么。 12 | 这里的诀窍是框架的*延后初始化*(defers initialization), 13 | 即直到数据第一次通过模型传递时,框架才会动态地推断出每个层的大小。 14 | 15 | 在以后,当使用卷积神经网络时, 16 | 由于输入维度(即图像的分辨率)将影响每个后续层的维数, 17 | 有了该技术将更加方便。 18 | 现在我们在编写代码时无须知道维度是什么就可以设置参数, 19 | 这种能力可以大大简化定义和修改模型的任务。 20 | 接下来,我们将更深入地研究初始化机制。 21 | 22 | ## 实例化网络 23 | 24 | 首先,让我们实例化一个多层感知机。 25 | 26 | ```{.python .input} 27 | from mxnet import np, npx 28 | from mxnet.gluon import nn 29 | npx.set_np() 30 | 31 | def get_net(): 32 | net = nn.Sequential() 33 | net.add(nn.Dense(256, activation='relu')) 34 | net.add(nn.Dense(10)) 35 | return net 36 | 37 | net = get_net() 38 | ``` 39 | 40 | ```{.python .input} 41 | #@tab tensorflow 42 | import tensorflow as tf 43 | 44 | net = tf.keras.models.Sequential([ 45 | tf.keras.layers.Dense(256, activation=tf.nn.relu), 46 | tf.keras.layers.Dense(10), 47 | ]) 48 | ``` 49 | 50 | 此时,因为输入维数是未知的,所以网络不可能知道输入层权重的维数。 51 | 因此,框架尚未初始化任何参数,我们通过尝试访问以下参数进行确认。 52 | 53 | ```{.python .input} 54 | print(net.collect_params) 55 | print(net.collect_params()) 56 | ``` 57 | 58 | ```{.python .input} 59 | #@tab tensorflow 60 | [net.layers[i].get_weights() for i in range(len(net.layers))] 61 | ``` 62 | 63 | :begin_tab:`mxnet` 64 | 注意,当参数对象存在时,每个层的输入维度为-1。 65 | MXNet使用特殊值-1表示参数维度仍然未知。 66 | 此时,尝试访问`net[0].weight.data()`将触发运行时错误, 67 | 提示必须先初始化网络,然后才能访问参数。 68 | 现在让我们看看当我们试图通过`initialize`函数初始化参数时会发生什么。 69 | :end_tab: 70 | 71 | :begin_tab:`tensorflow` 72 | 请注意,每个层对象都存在,但权重为空。 73 | 使用`net.get_weights()`将抛出一个错误,因为权重尚未初始化。 74 | :end_tab: 75 | 76 | ```{.python .input} 77 | net.initialize() 78 | net.collect_params() 79 | ``` 80 | 81 | :begin_tab:`mxnet` 82 | 如我们所见,一切都没有改变。 83 | 当输入维度未知时,调用`initialize`不会真正初始化参数。 84 | 而是会在MXNet内部声明希望初始化参数,并且可以选择初始化分布。 85 | :end_tab: 86 | 87 | 接下来让我们将数据通过网络,最终使框架初始化参数。 88 | 89 | ```{.python .input} 90 | X = np.random.uniform(size=(2, 20)) 91 | net(X) 92 | 93 | net.collect_params() 94 | ``` 95 | 96 | ```{.python .input} 97 | #@tab tensorflow 98 | X = tf.random.uniform((2, 20)) 99 | net(X) 100 | [w.shape for w in net.get_weights()] 101 | ``` 102 | 103 | 一旦我们知道输入维数是20,框架可以通过代入值20来识别第一层权重矩阵的形状。 104 | 识别出第一层的形状后,框架处理第二层,依此类推,直到所有形状都已知为止。 105 | 注意,在这种情况下,只有第一层需要延迟初始化,但是框架仍是按顺序初始化的。 106 | 等到知道了所有的参数形状,框架就可以初始化参数。 107 | 108 | ## 小结 109 | 110 | * 延后初始化使框架能够自动推断参数形状,使修改模型架构变得容易,避免了一些常见的错误。 111 | * 我们可以通过模型传递数据,使框架最终初始化参数。 112 | 113 | ## 练习 114 | 115 | 1. 如果指定了第一层的输入尺寸,但没有指定后续层的尺寸,会发生什么?是否立即进行初始化? 116 | 1. 如果指定了不匹配的维度会发生什么? 117 | 1. 如果输入具有不同的维度,需要做什么?提示:查看参数绑定的相关内容。 118 | 119 | :begin_tab:`mxnet` 120 | [Discussions](https://discuss.d2l.ai/t/5770) 121 | :end_tab: 122 | 123 | :begin_tab:`pytorch` 124 | [Discussions](https://discuss.d2l.ai/t/5770) 125 | :end_tab: 126 | 127 | :begin_tab:`tensorflow` 128 | [Discussions](https://discuss.d2l.ai/t/1833) 129 | :end_tab: 130 | 131 | :begin_tab:`paddle` 132 | [Discussions](https://discuss.d2l.ai/t/11779) 133 | :end_tab: 134 | -------------------------------------------------------------------------------- /chapter_deep-learning-computation/index.md: -------------------------------------------------------------------------------- 1 | # 深度学习计算 2 | :label:`chap_computation` 3 | 4 | 除了庞大的数据集和强大的硬件, 5 | 优秀的软件工具在深度学习的快速发展中发挥了不可或缺的作用。 6 | 从2007年发布的开创性的Theano库开始, 7 | 灵活的开源工具使研究人员能够快速开发模型原型, 8 | 避免了我们使用标准组件时的重复工作, 9 | 同时仍然保持了我们进行底层修改的能力。 10 | 随着时间的推移,深度学习库已经演变成提供越来越粗糙的抽象。 11 | 就像半导体设计师从指定晶体管到逻辑电路再到编写代码一样, 12 | 神经网络研究人员已经从考虑单个人工神经元的行为转变为从层的角度构思网络, 13 | 通常在设计架构时考虑的是更粗糙的块(block)。 14 | 15 | 之前我们已经介绍了一些基本的机器学习概念, 16 | 并慢慢介绍了功能齐全的深度学习模型。 17 | 在上一章中,我们从零开始实现了多层感知机的每个组件, 18 | 然后展示了如何利用高级API轻松地实现相同的模型。 19 | 为了易于学习,我们调用了深度学习库,但是跳过了它们工作的细节。 20 | 在本章中,我们将深入探索深度学习计算的关键组件, 21 | 即模型构建、参数访问与初始化、设计自定义层和块、将模型读写到磁盘, 22 | 以及利用GPU实现显著的加速。 23 | 这些知识将使读者从深度学习“基础用户”变为“高级用户”。 24 | 虽然本章不介绍任何新的模型或数据集, 25 | 但后面的高级模型章节在很大程度上依赖于本章的知识。 26 | 27 | ```toc 28 | :maxdepth: 2 29 | 30 | model-construction 31 | parameters 32 | deferred-init 33 | custom-layer 34 | read-write 35 | use-gpu 36 | ``` 37 | -------------------------------------------------------------------------------- /chapter_deep-learning-computation/index_origin.md: -------------------------------------------------------------------------------- 1 | # Deep Learning Computation 2 | :label:`chap_computation` 3 | 4 | Alongside giant datasets and powerful hardware, 5 | great software tools have played an indispensable role 6 | in the rapid progress of deep learning. 7 | Starting with the pathbreaking Theano library released in 2007, 8 | flexible open-source tools have enabled researchers 9 | to rapidly prototype models, avoiding repetitive work 10 | when recycling standard components 11 | while still maintaining the ability to make low-level modifications. 12 | Over time, deep learning's libraries have evolved 13 | to offer increasingly coarse abstractions. 14 | Just as semiconductor designers went from specifying transistors 15 | to logical circuits to writing code, 16 | neural networks researchers have moved from thinking about 17 | the behavior of individual artificial neurons 18 | to conceiving of networks in terms of whole layers, 19 | and now often design architectures with far coarser *blocks* in mind. 20 | 21 | 22 | So far, we have introduced some basic machine learning concepts, 23 | ramping up to fully-functional deep learning models. 24 | In the last chapter, 25 | we implemented each component of an MLP from scratch 26 | and even showed how to leverage high-level APIs 27 | to roll out the same models effortlessly. 28 | To get you that far that fast, we *called upon* the libraries, 29 | but skipped over more advanced details about *how they work*. 30 | In this chapter, we will peel back the curtain, 31 | digging deeper into the key components of deep learning computation, 32 | namely model construction, parameter access and initialization, 33 | designing custom layers and blocks, reading and writing models to disk, 34 | and leveraging GPUs to achieve dramatic speedups. 35 | These insights will move you from *end user* to *power user*, 36 | giving you the tools needed to reap the benefits 37 | of a mature deep learning library while retaining the flexibility 38 | to implement more complex models, including those you invent yourself! 39 | While this chapter does not introduce any new models or datasets, 40 | the advanced modeling chapters that follow rely heavily on these techniques. 41 | 42 | ```toc 43 | :maxdepth: 2 44 | 45 | model-construction 46 | parameters 47 | deferred-init 48 | custom-layer 49 | read-write 50 | use-gpu 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /chapter_linear-networks/index.md: -------------------------------------------------------------------------------- 1 | # 线性神经网络 2 | :label:`chap_linear` 3 | 4 | 在介绍深度神经网络之前,我们需要了解神经网络训练的基础知识。 5 | 本章我们将介绍神经网络的整个训练过程, 6 | 包括:定义简单的神经网络架构、数据处理、指定损失函数和如何训练模型。 7 | 为了更容易学习,我们将从经典算法————*线性*神经网络开始,介绍神经网络的基础知识。 8 | 经典统计学习技术中的线性回归和softmax回归可以视为线性神经网络, 9 | 这些知识将为本书其他部分中更复杂的技术奠定基础。 10 | 11 | ```toc 12 | :maxdepth: 2 13 | 14 | linear-regression 15 | linear-regression-scratch 16 | linear-regression-concise 17 | softmax-regression 18 | image-classification-dataset 19 | softmax-regression-scratch 20 | softmax-regression-concise 21 | ``` 22 | -------------------------------------------------------------------------------- /chapter_linear-networks/index_origin.md: -------------------------------------------------------------------------------- 1 | # Linear Neural Networks 2 | :label:`chap_linear` 3 | 4 | Before we get into the details of deep neural networks, 5 | we need to cover the basics of neural network training. 6 | In this chapter, we will cover the entire training process, 7 | including defining simple neural network architectures, handling data, specifying a loss function, and training the model. 8 | In order to make things easier to grasp, we begin with the simplest concepts. 9 | Fortunately, classic statistical learning techniques such as linear and softmax regression 10 | can be cast as *linear* neural networks. 11 | Starting from these classic algorithms, we will introduce you to the basics, 12 | providing the basis for more complex techniques in the rest of the book. 13 | 14 | ```toc 15 | :maxdepth: 2 16 | 17 | linear-regression 18 | linear-regression-scratch 19 | linear-regression-concise 20 | softmax-regression 21 | image-classification-dataset 22 | softmax-regression-scratch 23 | softmax-regression-concise 24 | ``` 25 | 26 | -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/index.md: -------------------------------------------------------------------------------- 1 | # 多层感知机 2 | :label:`chap_perceptrons` 3 | 4 | 在本章中,我们将第一次介绍真正的*深度*网络。 5 | 最简单的深度网络称为*多层感知机*。多层感知机由多层神经元组成, 6 | 每一层与它的上一层相连,从中接收输入; 7 | 同时每一层也与它的下一层相连,影响当前层的神经元。 8 | 当我们训练容量较大的模型时,我们面临着*过拟合*的风险。 9 | 因此,本章将从基本的概念介绍开始讲起,包括*过拟合*、*欠拟合*和模型选择。 10 | 为了解决这些问题,本章将介绍*权重衰减*和*暂退法*等正则化技术。 11 | 我们还将讨论数值稳定性和参数初始化相关的问题, 12 | 这些问题是成功训练深度网络的关键。 13 | 在本章的最后,我们将把所介绍的内容应用到一个真实的案例:房价预测。 14 | 关于模型计算性能、可伸缩性和效率相关的问题,我们将放在后面的章节中讨论。 15 | 16 | ```toc 17 | :maxdepth: 2 18 | 19 | mlp 20 | mlp-scratch 21 | mlp-concise 22 | underfit-overfit 23 | weight-decay 24 | dropout 25 | backprop 26 | numerical-stability-and-init 27 | environment 28 | kaggle-house-price 29 | ``` 30 | -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/index_origin.md: -------------------------------------------------------------------------------- 1 | # Multilayer Perceptrons 2 | :label:`chap_perceptrons` 3 | 4 | In this chapter, we will introduce your first truly *deep* network. 5 | The simplest deep networks are called multilayer perceptrons, 6 | and they consist of multiple layers of neurons 7 | each fully connected to those in the layer below 8 | (from which they receive input) 9 | and those above (which they, in turn, influence). 10 | When we train high-capacity models we run the risk of overfitting. 11 | Thus, we will need to provide your first rigorous introduction 12 | to the notions of overfitting, underfitting, and model selection. 13 | To help you combat these problems, 14 | we will introduce regularization techniques such as weight decay and dropout. 15 | We will also discuss issues relating to numerical stability and parameter initialization 16 | that are key to successfully training deep networks. 17 | Throughout, we aim to give you a firm grasp not just of the concepts 18 | but also of the practice of using deep networks. 19 | At the end of this chapter, 20 | we apply what we have introduced so far to a real case: house price prediction. 21 | We punt matters relating to the computational performance, 22 | scalability, and efficiency of our models to subsequent chapters. 23 | 24 | ```toc 25 | :maxdepth: 2 26 | 27 | mlp 28 | mlp-scratch 29 | mlp-concise 30 | underfit-overfit 31 | weight-decay 32 | dropout 33 | backprop 34 | numerical-stability-and-init 35 | environment 36 | kaggle-house-price 37 | ``` 38 | 39 | -------------------------------------------------------------------------------- /chapter_multilayer-perceptrons/mlp-concise_origin.md: -------------------------------------------------------------------------------- 1 | # Concise Implementation of Multilayer Perceptrons 2 | :label:`sec_mlp_concise` 3 | 4 | As you might expect, by relying on the high-level APIs, 5 | we can implement MLPs even more concisely. 6 | 7 | ```{.python .input} 8 | from d2l import mxnet as d2l 9 | from mxnet import gluon, init, npx 10 | from mxnet.gluon import nn 11 | npx.set_np() 12 | ``` 13 | 14 | ```{.python .input} 15 | #@tab pytorch 16 | from d2l import torch as d2l 17 | import torch 18 | from torch import nn 19 | ``` 20 | 21 | ```{.python .input} 22 | #@tab tensorflow 23 | from d2l import tensorflow as d2l 24 | import tensorflow as tf 25 | ``` 26 | 27 | ## Model 28 | 29 | As compared with our concise implementation 30 | of softmax regression implementation 31 | (:numref:`sec_softmax_concise`), 32 | the only difference is that we add 33 | *two* fully-connected layers 34 | (previously, we added *one*). 35 | The first is our hidden layer, 36 | which contains 256 hidden units 37 | and applies the ReLU activation function. 38 | The second is our output layer. 39 | 40 | ```{.python .input} 41 | net = nn.Sequential() 42 | net.add(nn.Dense(256, activation='relu'), 43 | nn.Dense(10)) 44 | net.initialize(init.Normal(sigma=0.01)) 45 | ``` 46 | 47 | ```{.python .input} 48 | #@tab pytorch 49 | net = nn.Sequential(nn.Flatten(), 50 | nn.Linear(784, 256), 51 | nn.ReLU(), 52 | nn.Linear(256, 10)) 53 | 54 | def init_weights(m): 55 | if type(m) == nn.Linear: 56 | torch.nn.init.normal_(m.weight, std=0.01) 57 | 58 | net.apply(init_weights) 59 | ``` 60 | 61 | ```{.python .input} 62 | #@tab tensorflow 63 | net = tf.keras.models.Sequential([ 64 | tf.keras.layers.Flatten(), 65 | tf.keras.layers.Dense(256, activation='relu'), 66 | tf.keras.layers.Dense(10)]) 67 | ``` 68 | 69 | The training loop is exactly the same 70 | as when we implemented softmax regression. 71 | This modularity enables us to separate 72 | matters concerning the model architecture 73 | from orthogonal considerations. 74 | 75 | ```{.python .input} 76 | batch_size, lr, num_epochs = 256, 0.1, 10 77 | loss = gluon.loss.SoftmaxCrossEntropyLoss() 78 | trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) 79 | ``` 80 | 81 | ```{.python .input} 82 | #@tab pytorch 83 | batch_size, lr, num_epochs = 256, 0.1, 10 84 | loss = nn.CrossEntropyLoss() 85 | trainer = torch.optim.SGD(net.parameters(), lr=lr) 86 | ``` 87 | 88 | ```{.python .input} 89 | #@tab tensorflow 90 | batch_size, lr, num_epochs = 256, 0.1, 10 91 | loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) 92 | trainer = tf.keras.optimizers.SGD(learning_rate=lr) 93 | ``` 94 | 95 | ```{.python .input} 96 | #@tab all 97 | train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) 98 | d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer) 99 | ``` 100 | 101 | ## Summary 102 | 103 | * Using high-level APIs, we can implement MLPs much more concisely. 104 | * For the same classification problem, the implementation of an MLP is the same as that of softmax regression except for additional hidden layers with activation functions. 105 | 106 | ## Exercises 107 | 108 | 1. Try adding different numbers of hidden layers (you may also modify the learning rate). What setting works best? 109 | 1. Try out different activation functions. Which one works best? 110 | 1. Try different schemes for initializing the weights. What method works best? 111 | 112 | :begin_tab:`mxnet` 113 | [Discussions](https://discuss.d2l.ai/t/94) 114 | :end_tab: 115 | 116 | :begin_tab:`pytorch` 117 | [Discussions](https://discuss.d2l.ai/t/95) 118 | :end_tab: 119 | 120 | :begin_tab:`tensorflow` 121 | [Discussions](https://discuss.d2l.ai/t/262) 122 | :end_tab: 123 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-applications/finetuning-bert.md: -------------------------------------------------------------------------------- 1 | # 针对序列级和词元级应用微调BERT 2 | :label:`sec_finetuning-bert` 3 | 4 | 在本章的前几节中,我们为自然语言处理应用设计了不同的模型,例如基于循环神经网络、卷积神经网络、注意力和多层感知机。这些模型在有空间或时间限制的情况下是有帮助的,但是,为每个自然语言处理任务精心设计一个特定的模型实际上是不可行的。在 :numref:`sec_bert`中,我们介绍了一个名为BERT的预训练模型,该模型可以对广泛的自然语言处理任务进行最少的架构更改。一方面,在提出时,BERT改进了各种自然语言处理任务的技术水平。另一方面,正如在 :numref:`sec_bert-pretraining`中指出的那样,原始BERT模型的两个版本分别带有1.1亿和3.4亿个参数。因此,当有足够的计算资源时,我们可以考虑为下游自然语言处理应用微调BERT。 5 | 6 | 下面,我们将自然语言处理应用的子集概括为序列级和词元级。在序列层次上,介绍了在单文本分类任务和文本对分类(或回归)任务中,如何将文本输入的BERT表示转换为输出标签。在词元级别,我们将简要介绍新的应用,如文本标注和问答,并说明BERT如何表示它们的输入并转换为输出标签。在微调期间,不同应用之间的BERT所需的“最小架构更改”是额外的全连接层。在下游应用的监督学习期间,额外层的参数是从零开始学习的,而预训练BERT模型中的所有参数都是微调的。 7 | 8 | ## 单文本分类 9 | 10 | *单文本分类*将单个文本序列作为输入,并输出其分类结果。 11 | 除了我们在这一章中探讨的情感分析之外,语言可接受性语料库(Corpus of Linguistic Acceptability,COLA)也是一个单文本分类的数据集,它的要求判断给定的句子在语法上是否可以接受。 :cite:`Warstadt.Singh.Bowman.2019`。例如,“I should study.”是可以接受的,但是“I should studying.”不是可以接受的。 12 | 13 | ![微调BERT用于单文本分类应用,如情感分析和测试语言可接受性(这里假设输入的单个文本有六个词元)](../img/bert-one-seq.svg) 14 | :label:`fig_bert-one-seq` 15 | 16 | :numref:`sec_bert`描述了BERT的输入表示。BERT输入序列明确地表示单个文本和文本对,其中特殊分类标记“<cls>”用于序列分类,而特殊分类标记“<sep>”标记单个文本的结束或分隔成对文本。如 :numref:`fig_bert-one-seq`所示,在单文本分类应用中,特殊分类标记“<cls>”的BERT表示对整个输入文本序列的信息进行编码。作为输入单个文本的表示,它将被送入到由全连接(稠密)层组成的小多层感知机中,以输出所有离散标签值的分布。 17 | 18 | ## 文本对分类或回归 19 | 20 | 在本章中,我们还研究了自然语言推断。它属于*文本对分类*,这是一种对文本进行分类的应用类型。 21 | 22 | 以一对文本作为输入但输出连续值,*语义文本相似度*是一个流行的“文本对回归”任务。 23 | 这项任务评估句子的语义相似度。例如,在语义文本相似度基准数据集(Semantic Textual Similarity Benchmark)中,句子对的相似度得分是从0(无语义重叠)到5(语义等价)的分数区间 :cite:`Cer.Diab.Agirre.ea.2017`。我们的目标是预测这些分数。来自语义文本相似性基准数据集的样本包括(句子1,句子2,相似性得分): 24 | 25 | * "A plane is taking off."(“一架飞机正在起飞。”),"An air plane is taking off."(“一架飞机正在起飞。”),5.000分; 26 | * "A woman is eating something."(“一个女人在吃东西。”),"A woman is eating meat."(“一个女人在吃肉。”),3.000分; 27 | * "A woman is dancing."(一个女人在跳舞。),"A man is talking."(“一个人在说话。”),0.000分。 28 | 29 | ![文本对分类或回归应用的BERT微调,如自然语言推断和语义文本相似性(假设输入文本对分别有两个词元和三个词元)](../img/bert-two-seqs.svg) 30 | :label:`fig_bert-two-seqs` 31 | 32 | 与 :numref:`fig_bert-one-seq`中的单文本分类相比, :numref:`fig_bert-two-seqs`中的文本对分类的BERT微调在输入表示上有所不同。对于文本对回归任务(如语义文本相似性),可以应用细微的更改,例如输出连续的标签值和使用均方损失:它们在回归中很常见。 33 | 34 | ## 文本标注 35 | 36 | 现在让我们考虑词元级任务,比如*文本标注*(text tagging),其中每个词元都被分配了一个标签。在文本标注任务中,*词性标注*为每个单词分配词性标记(例如,形容词和限定词)。 37 | 根据单词在句子中的作用。如,在Penn树库II标注集中,句子“John Smith‘s car is new”应该被标记为“NNP(名词,专有单数)NNP POS(所有格结尾)NN(名词,单数或质量)VB(动词,基本形式)JJ(形容词)”。 38 | 39 | ![文本标记应用的BERT微调,如词性标记。假设输入的单个文本有六个词元。](../img/bert-tagging.svg) 40 | :label:`fig_bert-tagging` 41 | 42 | :numref:`fig_bert-tagging`中说明了文本标记应用的BERT微调。与 :numref:`fig_bert-one-seq`相比,唯一的区别在于,在文本标注中,输入文本的*每个词元*的BERT表示被送到相同的额外全连接层中,以输出词元的标签,例如词性标签。 43 | 44 | ## 问答 45 | 46 | 作为另一个词元级应用,*问答*反映阅读理解能力。 47 | 例如,斯坦福问答数据集(Stanford Question Answering Dataset,SQuAD v1.1)由阅读段落和问题组成,其中每个问题的答案只是段落中的一段文本(文本片段) :cite:`Rajpurkar.Zhang.Lopyrev.ea.2016`。举个例子,考虑一段话:“Some experts report that a mask's efficacy is inconclusive.However,mask makers insist that their products,such as N95 respirator masks,can guard against the virus.”(“一些专家报告说面罩的功效是不确定的。然而,口罩制造商坚持他们的产品,如N95口罩,可以预防病毒。”)还有一个问题“Who say that N95 respirator masks can guard against the virus?”(“谁说N95口罩可以预防病毒?”)。答案应该是文章中的文本片段“mask makers”(“口罩制造商”)。因此,SQuAD v1.1的目标是在给定问题和段落的情况下预测段落中文本片段的开始和结束。 48 | 49 | ![对问答进行BERT微调(假设输入文本对分别有两个和三个词元)](../img/bert-qa.svg) 50 | :label:`fig_bert-qa` 51 | 52 | 为了微调BERT进行问答,在BERT的输入中,将问题和段落分别作为第一个和第二个文本序列。为了预测文本片段开始的位置,相同的额外的全连接层将把来自位置$i$的任何词元的BERT表示转换成标量分数$s_i$。文章中所有词元的分数还通过softmax转换成概率分布,从而为文章中的每个词元位置$i$分配作为文本片段开始的概率$p_i$。预测文本片段的结束与上面相同,只是其额外的全连接层中的参数与用于预测开始位置的参数无关。当预测结束时,位置$i$的词元由相同的全连接层变换成标量分数$e_i$。 :numref:`fig_bert-qa`描述了用于问答的微调BERT。 53 | 54 | 对于问答,监督学习的训练目标就像最大化真实值的开始和结束位置的对数似然一样简单。当预测片段时,我们可以计算从位置$i$到位置$j$的有效片段的分数$s_i + e_j$($i \leq j$),并输出分数最高的跨度。 55 | 56 | ## 小结 57 | 58 | * 对于序列级和词元级自然语言处理应用,BERT只需要最小的架构改变(额外的全连接层),如单个文本分类(例如,情感分析和测试语言可接受性)、文本对分类或回归(例如,自然语言推断和语义文本相似性)、文本标记(例如,词性标记)和问答。 59 | * 在下游应用的监督学习期间,额外层的参数是从零开始学习的,而预训练BERT模型中的所有参数都是微调的。 60 | 61 | ## 练习 62 | 63 | 1. 让我们为新闻文章设计一个搜索引擎算法。当系统接收到查询(例如,“冠状病毒爆发期间的石油行业”)时,它应该返回与该查询最相关的新闻文章的排序列表。假设我们有一个巨大的新闻文章池和大量的查询。为了简化问题,假设为每个查询标记了最相关的文章。如何在算法设计中应用负采样(见 :numref:`subsec_negative-sampling`)和BERT? 64 | 1. 我们如何利用BERT来训练语言模型? 65 | 1. 我们能在机器翻译中利用BERT吗? 66 | 67 | [Discussions](https://discuss.d2l.ai/t/5729) 68 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-applications/index.md: -------------------------------------------------------------------------------- 1 | # 自然语言处理:应用 2 | :label:`chap_nlp_app` 3 | 4 | 前面我们学习了如何在文本序列中表示词元, 5 | 并在 :numref:`chap_nlp_pretrain`中训练了词元的表示。 6 | 这样的预训练文本表示可以通过不同模型架构,放入不同的下游自然语言处理任务。 7 | 8 | 前一章我们提及到一些自然语言处理应用,这些应用没有预训练,只是为了解释深度学习架构。 9 | 例如,在 :numref:`chap_rnn`中, 10 | 我们依赖循环神经网络设计语言模型来生成类似中篇小说的文本。 11 | 在 :numref:`chap_modern_rnn`和 :numref:`chap_attention`中, 12 | 我们还设计了基于循环神经网络和注意力机制的机器翻译模型。 13 | 14 | 然而,本书并不打算全面涵盖所有此类应用。 15 | 相反,我们的重点是*如何应用深度语言表征学习来解决自然语言处理问题*。 16 | 在给定预训练的文本表示的情况下, 17 | 本章将探讨两种流行且具有代表性的下游自然语言处理任务: 18 | 情感分析和自然语言推断,它们分别分析单个文本和文本对之间的关系。 19 | 20 | ![预训练文本表示可以通过不同模型架构,放入不同的下游自然语言处理应用(本章重点介绍如何为不同的下游应用设计模型)](../img/nlp-map-app.svg) 21 | :label:`fig_nlp-map-app` 22 | 23 | 如 :numref:`fig_nlp-map-app`所述, 24 | 本章将重点描述然后使用不同类型的深度学习架构 25 | (如多层感知机、卷积神经网络、循环神经网络和注意力) 26 | 设计自然语言处理模型。 27 | 尽管在 :numref:`fig_nlp-map-app`中, 28 | 可以将任何预训练的文本表示与任何应用的架构相结合, 29 | 但我们选择了一些具有代表性的组合。 30 | 具体来说,我们将探索基于循环神经网络和卷积神经网络的流行架构进行情感分析。 31 | 对于自然语言推断,我们选择注意力和多层感知机来演示如何分析文本对。 32 | 最后,我们介绍了如何为广泛的自然语言处理应用, 33 | 如在序列级(单文本分类和文本对分类)和词元级(文本标注和问答)上 34 | 对预训练BERT模型进行微调。 35 | 作为一个具体的经验案例,我们将针对自然语言推断对BERT进行微调。 36 | 37 | 正如我们在 :numref:`sec_bert`中介绍的那样, 38 | 对于广泛的自然语言处理应用,BERT只需要最少的架构更改。 39 | 然而,这一好处是以微调下游应用的大量BERT参数为代价的。 40 | 当空间或时间有限时,基于多层感知机、卷积神经网络、循环神经网络 41 | 和注意力的精心构建的模型更具可行性。 42 | 下面,我们从情感分析应用开始,分别解读基于循环神经网络和卷积神经网络的模型设计。 43 | 44 | ```toc 45 | :maxdepth: 2 46 | 47 | sentiment-analysis-and-dataset 48 | sentiment-analysis-rnn 49 | sentiment-analysis-cnn 50 | natural-language-inference-and-dataset 51 | natural-language-inference-attention 52 | finetuning-bert 53 | natural-language-inference-bert 54 | ``` 55 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-applications/index_origin.md: -------------------------------------------------------------------------------- 1 | # Natural Language Processing: Applications 2 | :label:`chap_nlp_app` 3 | 4 | We have seen how to represent tokens in text sequences and train their representations in :numref:`chap_nlp_pretrain`. 5 | Such pretrained text representations can be fed to various models for different downstream natural language processing tasks. 6 | 7 | In fact, 8 | earlier chapters have already discussed some natural language processing applications 9 | *without pretraining*, 10 | just for explaining deep learning architectures. 11 | For instance, in :numref:`chap_rnn`, 12 | we have relied on RNNs to design language models to generate novella-like text. 13 | In :numref:`chap_modern_rnn` and :numref:`chap_attention`, 14 | we have also designed models based on RNNs and attention mechanisms for machine translation. 15 | 16 | However, this book does not intend to cover all such applications in a comprehensive manner. 17 | Instead, 18 | our focus is on *how to apply (deep) representation learning of languages to addressing natural language processing problems*. 19 | Given pretrained text representations, 20 | this chapter will explore two 21 | popular and representative 22 | downstream natural language processing tasks: 23 | sentiment analysis and natural language inference, 24 | which analyze single text and relationships of text pairs, respectively. 25 | 26 | ![Pretrained text representations can be fed to various deep learning architectures for different downstream natural language processing applications. This chapter focuses on how to design models for different downstream natural language processing applications.](../img/nlp-map-app.svg) 27 | :label:`fig_nlp-map-app` 28 | 29 | As depicted in :numref:`fig_nlp-map-app`, 30 | this chapter focuses on describing the basic ideas of designing natural language processing models using different types of deep learning architectures, such as MLPs, CNNs, RNNs, and attention. 31 | Though it is possible to combine any pretrained text representations with any architecture for either application in :numref:`fig_nlp-map-app`, 32 | we select a few representative combinations. 33 | Specifically, we will explore popular architectures based on RNNs and CNNs for sentiment analysis. 34 | For natural language inference, we choose attention and MLPs to demonstrate how to analyze text pairs. 35 | In the end, we introduce how to fine-tune a pretrained BERT model 36 | for a wide range of natural language processing applications, 37 | such as on a sequence level (single text classification and text pair classification) 38 | and a token level (text tagging and question answering). 39 | As a concrete empirical case, 40 | we will fine-tune BERT for natural language inference. 41 | 42 | As we have introduced in :numref:`sec_bert`, 43 | BERT requires minimal architecture changes 44 | for a wide range of natural language processing applications. 45 | However, this benefit comes at the cost of fine-tuning 46 | a huge number of BERT parameters for the downstream applications. 47 | When space or time is limited, 48 | those crafted models based on MLPs, CNNs, RNNs, and attention 49 | are more feasible. 50 | In the following, we start by the sentiment analysis application 51 | and illustrate the model design based on RNNs and CNNs, respectively. 52 | 53 | ```toc 54 | :maxdepth: 2 55 | 56 | sentiment-analysis-and-dataset 57 | sentiment-analysis-rnn 58 | sentiment-analysis-cnn 59 | natural-language-inference-and-dataset 60 | natural-language-inference-attention 61 | finetuning-bert 62 | natural-language-inference-bert 63 | ``` 64 | 65 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-pretraining/approx-training.md: -------------------------------------------------------------------------------- 1 | # 近似训练 2 | :label:`sec_approx_train` 3 | 4 | 回想一下我们在 :numref:`sec_word2vec`中的讨论。跳元模型的主要思想是使用softmax运算来计算基于给定的中心词$w_c$生成上下文字$w_o$的条件概率(如 :eqref:`eq_skip-gram-softmax`),对应的对数损失在 :eqref:`eq_skip-gram-log`给出。 5 | 6 | 由于softmax操作的性质,上下文词可以是词表$\mathcal{V}$中的任意项, :eqref:`eq_skip-gram-log`包含与整个词表大小一样多的项的求和。因此, :eqref:`eq_skip-gram-grad`中跳元模型的梯度计算和 :eqref:`eq_cbow-gradient`中的连续词袋模型的梯度计算都包含求和。不幸的是,在一个词典上(通常有几十万或数百万个单词)求和的梯度的计算成本是巨大的! 7 | 8 | 为了降低上述计算复杂度,本节将介绍两种近似训练方法:*负采样*和*分层softmax*。 9 | 由于跳元模型和连续词袋模型的相似性,我们将以跳元模型为例来描述这两种近似训练方法。 10 | 11 | ## 负采样 12 | :label:`subsec_negative-sampling` 13 | 14 | 负采样修改了原目标函数。给定中心词$w_c$的上下文窗口,任意上下文词$w_o$来自该上下文窗口的被认为是由下式建模概率的事件: 15 | 16 | $$P(D=1\mid w_c, w_o) = \sigma(\mathbf{u}_o^\top \mathbf{v}_c),$$ 17 | 18 | 其中$\sigma$使用了sigmoid激活函数的定义: 19 | 20 | $$\sigma(x) = \frac{1}{1+\exp(-x)}.$$ 21 | :eqlabel:`eq_sigma-f` 22 | 23 | 让我们从最大化文本序列中所有这些事件的联合概率开始训练词嵌入。具体而言,给定长度为$T$的文本序列,以$w^{(t)}$表示时间步$t$的词,并使上下文窗口为$m$,考虑最大化联合概率: 24 | 25 | $$ \prod_{t=1}^{T} \prod_{-m \leq j \leq m,\ j \neq 0} P(D=1\mid w^{(t)}, w^{(t+j)}).$$ 26 | :eqlabel:`eq-negative-sample-pos` 27 | 28 | 然而, :eqref:`eq-negative-sample-pos`只考虑那些正样本的事件。仅当所有词向量都等于无穷大时, :eqref:`eq-negative-sample-pos`中的联合概率才最大化为1。当然,这样的结果毫无意义。为了使目标函数更有意义,*负采样*添加从预定义分布中采样的负样本。 29 | 30 | 用$S$表示上下文词$w_o$来自中心词$w_c$的上下文窗口的事件。对于这个涉及$w_o$的事件,从预定义分布$P(w)$中采样$K$个不是来自这个上下文窗口*噪声词*。用$N_k$表示噪声词$w_k$($k=1, \ldots, K$)不是来自$w_c$的上下文窗口的事件。假设正例和负例$S, N_1, \ldots, N_K$的这些事件是相互独立的。负采样将 :eqref:`eq-negative-sample-pos`中的联合概率(仅涉及正例)重写为 31 | 32 | $$ \prod_{t=1}^{T} \prod_{-m \leq j \leq m,\ j \neq 0} P(w^{(t+j)} \mid w^{(t)}),$$ 33 | 34 | 通过事件$S, N_1, \ldots, N_K$近似条件概率: 35 | 36 | $$ P(w^{(t+j)} \mid w^{(t)}) =P(D=1\mid w^{(t)}, w^{(t+j)})\prod_{k=1,\ w_k \sim P(w)}^K P(D=0\mid w^{(t)}, w_k).$$ 37 | :eqlabel:`eq-negative-sample-conditional-prob` 38 | 39 | 分别用$i_t$和$h_k$表示词$w^{(t)}$和噪声词$w_k$在文本序列的时间步$t$处的索引。 :eqref:`eq-negative-sample-conditional-prob`中关于条件概率的对数损失为: 40 | 41 | $$ 42 | \begin{aligned} 43 | -\log P(w^{(t+j)} \mid w^{(t)}) 44 | =& -\log P(D=1\mid w^{(t)}, w^{(t+j)}) - \sum_{k=1,\ w_k \sim P(w)}^K \log P(D=0\mid w^{(t)}, w_k)\\ 45 | =&- \log\, \sigma\left(\mathbf{u}_{i_{t+j}}^\top \mathbf{v}_{i_t}\right) - \sum_{k=1,\ w_k \sim P(w)}^K \log\left(1-\sigma\left(\mathbf{u}_{h_k}^\top \mathbf{v}_{i_t}\right)\right)\\ 46 | =&- \log\, \sigma\left(\mathbf{u}_{i_{t+j}}^\top \mathbf{v}_{i_t}\right) - \sum_{k=1,\ w_k \sim P(w)}^K \log\sigma\left(-\mathbf{u}_{h_k}^\top \mathbf{v}_{i_t}\right). 47 | \end{aligned} 48 | $$ 49 | 50 | 我们可以看到,现在每个训练步的梯度计算成本与词表大小无关,而是线性依赖于$K$。当将超参数$K$设置为较小的值时,在负采样的每个训练步处的梯度的计算成本较小。 51 | 52 | ## 层序Softmax 53 | 54 | 作为另一种近似训练方法,*层序Softmax*(hierarchical softmax)使用二叉树( :numref:`fig_hi_softmax`中说明的数据结构),其中树的每个叶节点表示词表$\mathcal{V}$中的一个词。 55 | 56 | ![用于近似训练的分层softmax,其中树的每个叶节点表示词表中的一个词](../img/hi-softmax.svg) 57 | :label:`fig_hi_softmax` 58 | 59 | 用$L(w)$表示二叉树中表示字$w$的从根节点到叶节点的路径上的节点数(包括两端)。设$n(w,j)$为该路径上的$j^\mathrm{th}$节点,其上下文字向量为$\mathbf{u}_{n(w, j)}$。例如, :numref:`fig_hi_softmax`中的$L(w_3) = 4$。分层softmax将 :eqref:`eq_skip-gram-softmax`中的条件概率近似为 60 | 61 | $$P(w_o \mid w_c) = \prod_{j=1}^{L(w_o)-1} \sigma\left( [\![ n(w_o, j+1) = \text{leftChild}(n(w_o, j)) ]\!] \cdot \mathbf{u}_{n(w_o, j)}^\top \mathbf{v}_c\right),$$ 62 | 63 | 其中函数$\sigma$在 :eqref:`eq_sigma-f`中定义,$\text{leftChild}(n)$是节点$n$的左子节点:如果$x$为真,$[\![x]\!] = 1$;否则$[\![x]\!] = -1$。 64 | 65 | 为了说明,让我们计算 :numref:`fig_hi_softmax`中给定词$w_c$生成词$w_3$的条件概率。这需要$w_c$的词向量$\mathbf{v}_c$和从根到$w_3$的路径( :numref:`fig_hi_softmax`中加粗的路径)上的非叶节点向量之间的点积,该路径依次向左、向右和向左遍历: 66 | 67 | $$P(w_3 \mid w_c) = \sigma(\mathbf{u}_{n(w_3, 1)}^\top \mathbf{v}_c) \cdot \sigma(-\mathbf{u}_{n(w_3, 2)}^\top \mathbf{v}_c) \cdot \sigma(\mathbf{u}_{n(w_3, 3)}^\top \mathbf{v}_c).$$ 68 | 69 | 由$\sigma(x)+\sigma(-x) = 1$,它认为基于任意词$w_c$生成词表$\mathcal{V}$中所有词的条件概率总和为1: 70 | 71 | $$\sum_{w \in \mathcal{V}} P(w \mid w_c) = 1.$$ 72 | :eqlabel:`eq_hi-softmax-sum-one` 73 | 74 | 幸运的是,由于二叉树结构,$L(w_o)-1$大约与$\mathcal{O}(\text{log}_2|\mathcal{V}|)$是一个数量级。当词表大小$\mathcal{V}$很大时,与没有近似训练的相比,使用分层softmax的每个训练步的计算代价显著降低。 75 | 76 | ## 小结 77 | 78 | * 负采样通过考虑相互独立的事件来构造损失函数,这些事件同时涉及正例和负例。训练的计算量与每一步的噪声词数成线性关系。 79 | * 分层softmax使用二叉树中从根节点到叶节点的路径构造损失函数。训练的计算成本取决于词表大小的对数。 80 | 81 | ## 练习 82 | 83 | 1. 如何在负采样中对噪声词进行采样? 84 | 1. 验证 :eqref:`eq_hi-softmax-sum-one`是否有效。 85 | 1. 如何分别使用负采样和分层softmax训练连续词袋模型? 86 | 87 | [Discussions](https://discuss.d2l.ai/t/5741) 88 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-pretraining/index.md: -------------------------------------------------------------------------------- 1 | # 自然语言处理:预训练 2 | :label:`chap_nlp_pretrain` 3 | 4 | 人与人之间需要交流。 5 | 出于人类这种基本需要,每天都有大量的书面文本产生。 6 | 比如,社交媒体、聊天应用、电子邮件、产品评论、新闻文章、 7 | 研究论文和书籍中的丰富文本, 8 | 使计算机能够理解它们以提供帮助或基于人类语言做出决策变得至关重要。 9 | 10 | *自然语言处理*是指研究使用自然语言的计算机和人类之间的交互。 11 | 在实践中,使用自然语言处理技术来处理和分析文本数据是非常常见的, 12 | 例如 :numref:`sec_language_model`的语言模型 13 | 和 :numref:`sec_machine_translation`的机器翻译模型。 14 | 15 | 要理解文本,我们可以从学习它的表示开始。 16 | 利用来自大型语料库的现有文本序列, 17 | *自监督学习*(self-supervised learning) 18 | 已被广泛用于预训练文本表示, 19 | 例如通过使用周围文本的其它部分来预测文本的隐藏部分。 20 | 通过这种方式,模型可以通过有监督地从*海量*文本数据中学习,而不需要*昂贵*的标签标注! 21 | 22 | 本章我们将看到:当将每个单词或子词视为单个词元时, 23 | 可以在大型语料库上使用word2vec、GloVe或子词嵌入模型预先训练每个词元的词元。 24 | 经过预训练后,每个词元的表示可以是一个向量。 25 | 但是,无论上下文是什么,它都保持不变。 26 | 例如,“bank”(可以译作银行或者河岸)的向量表示在 27 | “go to the bank to deposit some money”(去银行存点钱) 28 | 和“go to the bank to sit down”(去河岸坐下来)中是相同的。 29 | 因此,许多较新的预训练模型使相同词元的表示适应于不同的上下文, 30 | 其中包括基于Transformer编码器的更深的自监督模型BERT。 31 | 在本章中,我们将重点讨论如何预训练文本的这种表示, 32 | 如 :numref:`fig_nlp-map-pretrain`中所强调的那样。 33 | 34 | ![预训练好的文本表示可以放入各种深度学习架构,应用于不同自然语言处理任务(本章主要研究上游文本的预训练)](../img/nlp-map-pretrain.svg) 35 | :label:`fig_nlp-map-pretrain` 36 | 37 | :numref:`fig_nlp-map-pretrain`显示了 38 | 预训练好的文本表示可以放入各种深度学习架构,应用于不同自然语言处理任务。 39 | 我们将在 :numref:`chap_nlp_app`中介绍它们。 40 | 41 | 42 | ```toc 43 | :maxdepth: 2 44 | 45 | word2vec 46 | approx-training 47 | word-embedding-dataset 48 | word2vec-pretraining 49 | glove 50 | subword-embedding 51 | similarity-analogy 52 | bert 53 | bert-dataset 54 | bert-pretraining 55 | ``` 56 | -------------------------------------------------------------------------------- /chapter_natural-language-processing-pretraining/index_origin.md: -------------------------------------------------------------------------------- 1 | # Natural Language Processing: Pretraining 2 | :label:`chap_nlp_pretrain` 3 | 4 | 5 | Humans need to communicate. 6 | Out of this basic need of the human condition, a vast amount of written text has been generated on an everyday basis. 7 | Given rich text in social media, chat apps, emails, product reviews, news articles, research papers, and books, it becomes vital to enable computers to understand them to offer assistance or make decisions based on human languages. 8 | 9 | *Natural language processing* studies interactions between computers and humans using natural languages. 10 | In practice, it is very common to use natural language processing techniques to process and analyze text (human natural language) data, such as language models in :numref:`sec_language_model` and machine translation models in :numref:`sec_machine_translation`. 11 | 12 | To understand text, we can begin by learning 13 | its representations. 14 | Leveraging the existing text sequences 15 | from large corpora, 16 | *self-supervised learning* 17 | has been extensively 18 | used to pretrain text representations, 19 | such as by predicting some hidden part of the text 20 | using some other part of their surrounding text. 21 | In this way, 22 | models learn through supervision 23 | from *massive* text data 24 | without *expensive* labeling efforts! 25 | 26 | 27 | As we will see in this chapter, 28 | when treating each word or subword as an individual token, 29 | the representation of each token can be pretrained 30 | using word2vec, GloVe, or subword embedding models 31 | on large corpora. 32 | After pretraining, representation of each token can be a vector, 33 | however, it remains the same no matter what the context is. 34 | For instance, the vector representation of "bank" is the same 35 | in both 36 | "go to the bank to deposit some money" 37 | and 38 | "go to the bank to sit down". 39 | Thus, many more recent pretraining models adapt representation of the same token 40 | to different contexts. 41 | Among them is BERT, a much deeper self-supervised model based on the transformer encoder. 42 | In this chapter, we will focus on how to pretrain such representations for text, 43 | as highlighted in :numref:`fig_nlp-map-pretrain`. 44 | 45 | ![Pretrained text representations can be fed to various deep learning architectures for different downstream natural language processing applications. This chapter focuses on the upstream text representation pretraining.](../img/nlp-map-pretrain.svg) 46 | :label:`fig_nlp-map-pretrain` 47 | 48 | 49 | For sight of the big picture, 50 | :numref:`fig_nlp-map-pretrain` shows that 51 | the pretrained text representations can be fed to 52 | a variety of deep learning architectures for different downstream natural language processing applications. 53 | We will cover them in :numref:`chap_nlp_app`. 54 | 55 | ```toc 56 | :maxdepth: 2 57 | 58 | word2vec 59 | approx-training 60 | word-embedding-dataset 61 | word2vec-pretraining 62 | glove 63 | subword-embedding 64 | similarity-analogy 65 | bert 66 | bert-dataset 67 | bert-pretraining 68 | 69 | ``` 70 | 71 | -------------------------------------------------------------------------------- /chapter_notation/index.md: -------------------------------------------------------------------------------- 1 | # 符号 2 | :label:`chap_notation` 3 | 4 | 本书中使用的符号概述如下。 5 | 6 | ## 数字 7 | 8 | * $x$:标量 9 | * $\mathbf{x}$:向量 10 | * $\mathbf{X}$:矩阵 11 | * $\mathsf{X}$:张量 12 | * $\mathbf{I}$:单位矩阵 13 | * $x_i$, $[\mathbf{x}]_i$:向量$\mathbf{x}$第$i$个元素 14 | * $x_{ij}$, $[\mathbf{X}]_{ij}$:矩阵$\mathbf{X}$第$i$行第$j$列的元素 15 | 16 | ## 集合论 17 | 18 | * $\mathcal{X}$: 集合 19 | * $\mathbb{Z}$: 整数集合 20 | * $\mathbb{R}$: 实数集合 21 | * $\mathbb{R}^n$: $n$维实数向量集合 22 | * $\mathbb{R}^{a\times b}$: 包含$a$行和$b$列的实数矩阵集合 23 | * $\mathcal{A}\cup\mathcal{B}$: 集合$\mathcal{A}$和$\mathcal{B}$的并集 24 | * $\mathcal{A}\cap\mathcal{B}$:集合$\mathcal{A}$和$\mathcal{B}$的交集 25 | * $\mathcal{A}\setminus\mathcal{B}$:集合$\mathcal{A}$与集合$\mathcal{B}$相减,$\mathcal{B}$关于$\mathcal{A}$的相对补集 26 | 27 | ## 函数和运算符 28 | 29 | * $f(\cdot)$:函数 30 | * $\log(\cdot)$:自然对数 31 | * $\exp(\cdot)$: 指数函数 32 | * $\mathbf{1}_\mathcal{X}$: 指示函数 33 | * $\mathbf{(\cdot)}^\top$: 向量或矩阵的转置 34 | * $\mathbf{X}^{-1}$: 矩阵的逆 35 | * $\odot$: 按元素相乘 36 | * $[\cdot, \cdot]$:连结 37 | * $\lvert \mathcal{X} \rvert$:集合的基数 38 | * $\|\cdot\|_p$: :$L_p$ 正则 39 | * $\|\cdot\|$: $L_2$ 正则 40 | * $\langle \mathbf{x}, \mathbf{y} \rangle$:向量$\mathbf{x}$和$\mathbf{y}$的点积 41 | * $\sum$: 连加 42 | * $\prod$: 连乘 43 | * $\stackrel{\mathrm{def}}{=}$:定义 44 | 45 | ## 微积分 46 | 47 | * $\frac{dy}{dx}$:$y$关于$x$的导数 48 | * $\frac{\partial y}{\partial x}$:$y$关于$x$的偏导数 49 | * $\nabla_{\mathbf{x}} y$:$y$关于$\mathbf{x}$的梯度 50 | * $\int_a^b f(x) \;dx$: $f$在$a$到$b$区间上关于$x$的定积分 51 | * $\int f(x) \;dx$: $f$关于$x$的不定积分 52 | 53 | ## 概率与信息论 54 | 55 | * $P(\cdot)$:概率分布 56 | * $z \sim P$: 随机变量$z$具有概率分布$P$ 57 | * $P(X \mid Y)$:$X\mid Y$的条件概率 58 | * $p(x)$: 概率密度函数 59 | * ${E}_{x} [f(x)]$: 函数$f$对$x$的数学期望 60 | * $X \perp Y$: 随机变量$X$和$Y$是独立的 61 | * $X \perp Y \mid Z$: 随机变量$X$和$Y$在给定随机变量$Z$的条件下是独立的 62 | * $\mathrm{Var}(X)$: 随机变量$X$的方差 63 | * $\sigma_X$: 随机变量$X$的标准差 64 | * $\mathrm{Cov}(X, Y)$: 随机变量$X$和$Y$的协方差 65 | * $\rho(X, Y)$: 随机变量$X$和$Y$的相关性 66 | * $H(X)$: 随机变量$X$的熵 67 | * $D_{\mathrm{KL}}(P\|Q)$: $P$和$Q$的KL-散度 68 | 69 | ## 复杂度 70 | 71 | * $\mathcal{O}$:大O标记 72 | 73 | [Discussions](https://discuss.d2l.ai/t/2089) 74 | -------------------------------------------------------------------------------- /chapter_notation/index_origin.md: -------------------------------------------------------------------------------- 1 | # Notation 2 | :label:`chap_notation` 3 | 4 | The notation used throughout this book is summarized below. 5 | 6 | 7 | ## Numbers 8 | 9 | * $x$: A scalar 10 | * $\mathbf{x}$: A vector 11 | * $\mathbf{X}$: A matrix 12 | * $\mathsf{X}$: A tensor 13 | * $\mathbf{I}$: An identity matrix 14 | * $x_i$, $[\mathbf{x}]_i$: The $i^\mathrm{th}$ element of vector $\mathbf{x}$ 15 | * $x_{ij}$, $[\mathbf{X}]_{ij}$: The element of matrix $\mathbf{X}$ at row $i$ and column $j$ 16 | 17 | 18 | 19 | 20 | ## Set Theory 21 | 22 | 23 | * $\mathcal{X}$: A set 24 | * $\mathbb{Z}$: The set of integers 25 | * $\mathbb{R}$: The set of real numbers 26 | * $\mathbb{R}^n$: The set of $n$-dimensional vectors of real numbers 27 | * $\mathbb{R}^{a\times b}$: The set of matrices of real numbers with $a$ rows and $b$ columns 28 | * $\mathcal{A}\cup\mathcal{B}$: Union of sets $\mathcal{A}$ and $\mathcal{B}$ 29 | * $\mathcal{A}\cap\mathcal{B}$: Intersection of sets $\mathcal{A}$ and $\mathcal{B}$ 30 | * $\mathcal{A}\setminus\mathcal{B}$: Subtraction of set $\mathcal{B}$ from set $\mathcal{A}$ 31 | 32 | 33 | ## Functions and Operators 34 | 35 | 36 | * $f(\cdot)$: A function 37 | * $\log(\cdot)$: The natural logarithm 38 | * $\exp(\cdot)$: The exponential function 39 | * $\mathbf{1}_\mathcal{X}$: The indicator function 40 | * $\mathbf{(\cdot)}^\top$: Transpose of a vector or a matrix 41 | * $\mathbf{X}^{-1}$: Inverse of matrix $\mathbf{X}$ 42 | * $\odot$: Hadamard (elementwise) product 43 | * $[\cdot, \cdot]$: Concatenation 44 | * $\lvert \mathcal{X} \rvert$: Cardinality of set $\mathcal{X}$ 45 | * $\|\cdot\|_p$: $L_p$ norm 46 | * $\|\cdot\|$: $L_2$ norm 47 | * $\langle \mathbf{x}, \mathbf{y} \rangle$: Dot product of vectors $\mathbf{x}$ and $\mathbf{y}$ 48 | * $\sum$: Series addition 49 | * $\prod$: Series multiplication 50 | * $\stackrel{\mathrm{def}}{=}$: Definition 51 | 52 | 53 | ## Calculus 54 | 55 | * $\frac{dy}{dx}$: Derivative of $y$ with respect to $x$ 56 | * $\frac{\partial y}{\partial x}$: Partial derivative of $y$ with respect to $x$ 57 | * $\nabla_{\mathbf{x}} y$: Gradient of $y$ with respect to $\mathbf{x}$ 58 | * $\int_a^b f(x) \;dx$: Definite integral of $f$ from $a$ to $b$ with respect to $x$ 59 | * $\int f(x) \;dx$: Indefinite integral of $f$ with respect to $x$ 60 | 61 | ## Probability and Information Theory 62 | 63 | * $P(\cdot)$: Probability distribution 64 | * $z \sim P$: Random variable $z$ has probability distribution $P$ 65 | * $P(X \mid Y)$: Conditional probability of $X \mid Y$ 66 | * $p(x)$: Probability density function 67 | * ${E}_{x} [f(x)]$: Expectation of $f$ with respect to $x$ 68 | * $X \perp Y$: Random variables $X$ and $Y$ are independent 69 | * $X \perp Y \mid Z$: Random variables $X$ and $Y$ are conditionally independent given random variable $Z$ 70 | * $\mathrm{Var}(X)$: Variance of random variable $X$ 71 | * $\sigma_X$: Standard deviation of random variable $X$ 72 | * $\mathrm{Cov}(X, Y)$: Covariance of random variables $X$ and $Y$ 73 | * $\rho(X, Y)$: Correlation of random variables $X$ and $Y$ 74 | * $H(X)$: Entropy of random variable $X$ 75 | * $D_{\mathrm{KL}}(P\|Q)$: KL-divergence of distributions $P$ and $Q$ 76 | 77 | 78 | 79 | ## Complexity 80 | 81 | * $\mathcal{O}$: Big O notation 82 | 83 | 84 | [Discussions](https://discuss.d2l.ai/t/25) 85 | -------------------------------------------------------------------------------- /chapter_optimization/index.md: -------------------------------------------------------------------------------- 1 | # 优化算法 2 | :label:`chap_optimization` 3 | 4 | 截止到目前,本书已经使用了许多优化算法来训练深度学习模型。优化算法使我们能够继续更新模型参数,并使损失函数的值最小化。这就像在训练集上评估一样。事实上,任何满足于将优化视为黑盒装置,以在简单的设置中最小化目标函数的人,都可能会知道存在着一系列此类“咒语”(名称如“SGD”和“Adam”)。 5 | 6 | 但是,为了做得更好,还需要更深入的知识。优化算法对于深度学习非常重要。一方面,训练复杂的深度学习模型可能需要数小时、几天甚至数周。优化算法的性能直接影响模型的训练效率。另一方面,了解不同优化算法的原则及其超参数的作用将使我们能够以有针对性的方式调整超参数,以提高深度学习模型的性能。 7 | 8 | 在本章中,我们深入探讨常见的深度学习优化算法。深度学习中出现的几乎所有优化问题都是*非凸*的。尽管如此,在*凸问题*背景下设计和分析算法是非常有启发性的。正是出于这个原因,本章包括了凸优化的入门,以及凸目标函数上非常简单的随机梯度下降算法的证明。 9 | 10 | ```toc 11 | :maxdepth: 2 12 | 13 | optimization-intro 14 | convexity 15 | gd 16 | sgd 17 | minibatch-sgd 18 | momentum 19 | adagrad 20 | rmsprop 21 | adadelta 22 | adam 23 | lr-scheduler 24 | ``` 25 | -------------------------------------------------------------------------------- /chapter_optimization/index_origin.md: -------------------------------------------------------------------------------- 1 | # Optimization Algorithms 2 | :label:`chap_optimization` 3 | 4 | If you read the book in sequence up to this point you already used a number of optimization algorithms to train deep learning models. 5 | They were the tools that allowed us to continue updating model parameters and to minimize the value of the loss function, as evaluated on the training set. Indeed, anyone content with treating optimization as a black box device to minimize objective functions in a simple setting might well content oneself with the knowledge that there exists an array of incantations of such a procedure (with names such as "SGD" and "Adam"). 6 | 7 | To do well, however, some deeper knowledge is required. 8 | Optimization algorithms are important for deep learning. 9 | On one hand, training a complex deep learning model can take hours, days, or even weeks. 10 | The performance of the optimization algorithm directly affects the model's training efficiency. 11 | On the other hand, understanding the principles of different optimization algorithms and the role of their hyperparameters 12 | will enable us to tune the hyperparameters in a targeted manner to improve the performance of deep learning models. 13 | 14 | In this chapter, we explore common deep learning optimization algorithms in depth. 15 | Almost all optimization problems arising in deep learning are *nonconvex*. 16 | Nonetheless, the design and analysis of algorithms in the context of *convex* problems have proven to be very instructive. 17 | It is for that reason that this chapter includes a primer on convex optimization and the proof for a very simple stochastic gradient descent algorithm on a convex objective function. 18 | 19 | ```toc 20 | :maxdepth: 2 21 | 22 | optimization-intro 23 | convexity 24 | gd 25 | sgd 26 | minibatch-sgd 27 | momentum 28 | adagrad 29 | rmsprop 30 | adadelta 31 | adam 32 | lr-scheduler 33 | ``` 34 | 35 | -------------------------------------------------------------------------------- /chapter_preliminaries/index.md: -------------------------------------------------------------------------------- 1 | # 预备知识 2 | :label:`chap_preliminaries` 3 | 4 | 要学习深度学习,首先需要先掌握一些基本技能。 5 | 所有机器学习方法都涉及从数据中提取信息。 6 | 因此,我们先学习一些关于数据的实用技能,包括存储、操作和预处理数据。 7 | 8 | 机器学习通常需要处理大型数据集。 9 | 我们可以将某些数据集视为一个表,其中表的行对应样本,列对应属性。 10 | 线性代数为人们提供了一些用来处理表格数据的方法。 11 | 我们不会太深究细节,而是将重点放在矩阵运算的基本原理及其实现上。 12 | 13 | 深度学习是关于优化的学习。 14 | 对于一个带有参数的模型,我们想要找到其中能拟合数据的最好模型。 15 | 在算法的每个步骤中,决定以何种方式调整参数需要一点微积分知识。 16 | 本章将简要介绍这些知识。 17 | 幸运的是,`autograd`包会自动计算微分,本章也将介绍它。 18 | 19 | 机器学习还涉及如何做出预测:给定观察到的信息,某些未知属性可能的值是多少? 20 | 要在不确定的情况下进行严格的推断,我们需要借用概率语言。 21 | 22 | 最后,官方文档提供了本书之外的大量描述和示例。 23 | 在本章的结尾,我们将展示如何在官方文档中查找所需信息。 24 | 25 | 本书对读者数学基础无过分要求,只要可以正确理解深度学习所需的数学知识即可。 26 | 但这并不意味着本书中不涉及数学方面的内容,本章会快速介绍一些基本且常用的数学知识, 27 | 以便读者能够理解书中的大部分数学内容。 28 | 如果读者想要深入理解全部数学内容,可以进一步学习本书数学附录中给出的数学基础知识。 29 | 30 | ```toc 31 | :maxdepth: 2 32 | 33 | ndarray 34 | pandas 35 | linear-algebra 36 | calculus 37 | autograd 38 | probability 39 | lookup-api 40 | ``` 41 | -------------------------------------------------------------------------------- /chapter_preliminaries/index_origin.md: -------------------------------------------------------------------------------- 1 | # Preliminaries 2 | :label:`chap_preliminaries` 3 | 4 | To get started with deep learning, 5 | we will need to develop a few basic skills. 6 | All machine learning is concerned 7 | with extracting information from data. 8 | So we will begin by learning the practical skills 9 | for storing, manipulating, and preprocessing data. 10 | 11 | Moreover, machine learning typically requires 12 | working with large datasets, which we can think of as tables, 13 | where the rows correspond to examples 14 | and the columns correspond to attributes. 15 | Linear algebra gives us a powerful set of techniques 16 | for working with tabular data. 17 | We will not go too far into the weeds but rather focus on the basic 18 | of matrix operations and their implementation. 19 | 20 | Additionally, deep learning is all about optimization. 21 | We have a model with some parameters and 22 | we want to find those that fit our data *the best*. 23 | Determining which way to move each parameter at each step of an algorithm 24 | requires a little bit of calculus, which will be briefly introduced. 25 | Fortunately, the `autograd` package automatically computes differentiation for us, 26 | and we will cover it next. 27 | 28 | Next, machine learning is concerned with making predictions: 29 | what is the likely value of some unknown attribute, 30 | given the information that we observe? 31 | To reason rigorously under uncertainty 32 | we will need to invoke the language of probability. 33 | 34 | In the end, the official documentation provides 35 | plenty of descriptions and examples that are beyond this book. 36 | To conclude the chapter, we will show you how to look up documentation for 37 | the needed information. 38 | 39 | This book has kept the mathematical content to the minimum necessary 40 | to get a proper understanding of deep learning. 41 | However, it does not mean that 42 | this book is mathematics free. 43 | Thus, this chapter provides a rapid introduction to 44 | basic and frequently-used mathematics to allow anyone to understand 45 | at least *most* of the mathematical content of the book. 46 | If you wish to understand *all* of the mathematical content, 47 | further reviewing the [online appendix on mathematics](https://d2l.ai/chapter_appendix-mathematics-for-deep-learning/index.html) should be sufficient. 48 | 49 | ```toc 50 | :maxdepth: 2 51 | 52 | ndarray 53 | pandas 54 | linear-algebra 55 | calculus 56 | autograd 57 | probability 58 | lookup-api 59 | ``` 60 | 61 | -------------------------------------------------------------------------------- /chapter_preliminaries/lookup-api.md: -------------------------------------------------------------------------------- 1 | # 查阅文档 2 | 3 | :begin_tab:`mxnet` 4 | 由于篇幅限制,本书不可能介绍每一个MXNet函数和类。 5 | API文档、其他教程和示例提供了本书之外的大量文档。 6 | 本节提供了一些查看MXNet API的指导。 7 | :end_tab: 8 | 9 | :begin_tab:`pytorch` 10 | 由于篇幅限制,本书不可能介绍每一个PyTorch函数和类。 11 | API文档、其他教程和示例提供了本书之外的大量文档。 12 | 本节提供了一些查看PyTorch API的指导。 13 | :end_tab: 14 | 15 | :begin_tab:`tensorflow` 16 | 由于篇幅限制,本书不可能介绍每一个TensorFlow函数和类。 17 | API文档、其他教程和示例提供了本书之外的大量文档。 18 | 本节提供了一些查TensorFlow API的指导。 19 | :end_tab: 20 | 21 | ## 查找模块中的所有函数和类 22 | 23 | 为了知道模块中可以调用哪些函数和类,可以调用`dir`函数。 24 | 例如,我们可以(**查询随机数生成模块中的所有属性:**) 25 | 26 | ```{.python .input} 27 | from mxnet import np 28 | print(dir(np.random)) 29 | ``` 30 | 31 | ```{.python .input} 32 | #@tab pytorch 33 | import torch 34 | print(dir(torch.distributions)) 35 | ``` 36 | 37 | ```{.python .input} 38 | #@tab tensorflow 39 | import tensorflow as tf 40 | print(dir(tf.random)) 41 | ``` 42 | 43 | ```{.python .input} 44 | #@tab paddle 45 | import warnings 46 | warnings.filterwarnings(action='ignore') 47 | import paddle 48 | print(dir(paddle.distribution)) 49 | ``` 50 | 51 | ```{.python .input} 52 | #@tab mindspore 53 | import mindspore 54 | 55 | print(dir(mindspore)) 56 | ``` 57 | 58 | 通常可以忽略以“`__`”(双下划线)开始和结束的函数,它们是Python中的特殊对象, 59 | 或以单个“`_`”(单下划线)开始的函数,它们通常是内部函数。 60 | 根据剩余的函数名或属性名,我们可能会猜测这个模块提供了各种生成随机数的方法, 61 | 包括从均匀分布(`uniform`)、正态分布(`normal`)和多项分布(`multinomial`)中采样。 62 | 63 | ## 查找特定函数和类的用法 64 | 65 | 有关如何使用给定函数或类的更具体说明,可以调用`help`函数。 66 | 例如,我们来[**查看张量`ones`函数的用法。**] 67 | 68 | ```{.python .input} 69 | help(np.ones) 70 | ``` 71 | 72 | ```{.python .input} 73 | #@tab pytorch 74 | help(torch.ones) 75 | ``` 76 | 77 | ```{.python .input} 78 | #@tab tensorflow 79 | help(tf.ones) 80 | ``` 81 | 82 | ```{.python .input} 83 | #@tab paddle 84 | help(paddle.ones) 85 | ``` 86 | 87 | ```{.python .input} 88 | #@tab mindspore 89 | import mindspore.ops as ops 90 | help(ops.ones) 91 | ``` 92 | 93 | 从文档中,我们可以看到`ones`函数创建一个具有指定形状的新张量,并将所有元素值设置为1。 94 | 下面来[**运行一个快速测试**]来确认这一解释: 95 | 96 | ```{.python .input} 97 | np.ones(4) 98 | ``` 99 | 100 | ```{.python .input} 101 | #@tab pytorch 102 | torch.ones(4) 103 | ``` 104 | 105 | ```{.python .input} 106 | #@tab tensorflow 107 | tf.ones(4) 108 | ``` 109 | 110 | ```{.python .input} 111 | #@tab paddle 112 | paddle.ones([4], dtype='float32') 113 | ``` 114 | 115 | ```{.python .input} 116 | #@tab mindspore 117 | ops.ones((2, 2)) 118 | ``` 119 | 120 | 在Jupyter记事本中,我们可以使用`?`指令在另一个浏览器窗口中显示文档。 121 | 例如,`list?`指令将创建与`help(list)`指令几乎相同的内容,并在新的浏览器窗口中显示它。 122 | 此外,如果我们使用两个问号,如`list??`,将显示实现该函数的Python代码。 123 | 124 | ## 小结 125 | 126 | * 官方文档提供了本书之外的大量描述和示例。 127 | * 可以通过调用`dir`和`help`函数或在Jupyter记事本中使用`?`和`??`查看API的用法文档。 128 | 129 | ## 练习 130 | 131 | 1. 在深度学习框架中查找任何函数或类的文档。请尝试在这个框架的官方网站上找到文档。 132 | 133 | :begin_tab:`mxnet` 134 | [Discussions](https://discuss.d2l.ai/t/1764) 135 | :end_tab: 136 | 137 | :begin_tab:`pytorch` 138 | [Discussions](https://discuss.d2l.ai/t/1765) 139 | :end_tab: 140 | 141 | :begin_tab:`tensorflow` 142 | [Discussions](https://discuss.d2l.ai/t/1763) 143 | :end_tab: 144 | 145 | :begin_tab:`paddle` 146 | [Discussions](https://discuss.d2l.ai/t/11686) 147 | :end_tab: 148 | -------------------------------------------------------------------------------- /chapter_preliminaries/lookup-api_origin.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | :begin_tab:`mxnet` 3 | Due to constraints on the length of this book, we cannot possibly introduce every single MXNet function and class (and you probably would not want us to). The API documentation and additional tutorials and examples provide plenty of documentation beyond the book. In this section we provide you with some guidance to exploring the MXNet API. 4 | :end_tab: 5 | 6 | :begin_tab:`pytorch` 7 | Due to constraints on the length of this book, we cannot possibly introduce every single PyTorch function and class (and you probably would not want us to). The API documentation and additional tutorials and examples provide plenty of documentation beyond the book. In this section we provide you with some guidance to exploring the PyTorch API. 8 | :end_tab: 9 | 10 | :begin_tab:`tensorflow` 11 | Due to constraints on the length of this book, we cannot possibly introduce every single TensorFlow function and class (and you probably would not want us to). The API documentation and additional tutorials and examples provide plenty of documentation beyond the book. In this section we provide you with some guidance to exploring the TensorFlow API. 12 | :end_tab: 13 | 14 | 15 | ## Finding All the Functions and Classes in a Module 16 | 17 | In order to know which functions and classes can be called in a module, we 18 | invoke the `dir` function. For instance, we can (**query all properties in the 19 | module for generating random numbers**): 20 | 21 | ```{.python .input n=1} 22 | from mxnet import np 23 | print(dir(np.random)) 24 | ``` 25 | 26 | ```{.python .input n=1} 27 | #@tab pytorch 28 | import torch 29 | print(dir(torch.distributions)) 30 | ``` 31 | 32 | ```{.python .input n=1} 33 | #@tab tensorflow 34 | import tensorflow as tf 35 | print(dir(tf.random)) 36 | ``` 37 | 38 | Generally, we can ignore functions that start and end with `__` (special objects in Python) or functions that start with a single `_`(usually internal functions). Based on the remaining function or attribute names, we might hazard a guess that this module offers various methods for generating random numbers, including sampling from the uniform distribution (`uniform`), normal distribution (`normal`), and multinomial distribution (`multinomial`). 39 | 40 | ## Finding the Usage of Specific Functions and Classes 41 | 42 | For more specific instructions on how to use a given function or class, we can invoke the `help` function. As an example, let us [**explore the usage instructions for tensors' `ones` function**]. 43 | 44 | ```{.python .input} 45 | help(np.ones) 46 | ``` 47 | 48 | ```{.python .input} 49 | #@tab pytorch 50 | help(torch.ones) 51 | ``` 52 | 53 | ```{.python .input} 54 | #@tab tensorflow 55 | help(tf.ones) 56 | ``` 57 | 58 | From the documentation, we can see that the `ones` function creates a new tensor with the specified shape and sets all the elements to the value of 1. Whenever possible, you should (**run a quick test**) to confirm your interpretation: 59 | 60 | ```{.python .input} 61 | np.ones(4) 62 | ``` 63 | 64 | ```{.python .input} 65 | #@tab pytorch 66 | torch.ones(4) 67 | ``` 68 | 69 | ```{.python .input} 70 | #@tab tensorflow 71 | tf.ones(4) 72 | ``` 73 | 74 | In the Jupyter notebook, we can use `?` to display the document in another 75 | window. For example, `list?` will create content that is almost 76 | identical to `help(list)`, displaying it in a new browser 77 | window. In addition, if we use two question marks, such as 78 | `list??`, the Python code implementing the function will also be 79 | displayed. 80 | 81 | 82 | ## Summary 83 | 84 | * The official documentation provides plenty of descriptions and examples that are beyond this book. 85 | * We can look up documentation for the usage of an API by calling the `dir` and `help` functions, or `?` and `??` in Jupyter notebooks. 86 | 87 | 88 | ## Exercises 89 | 90 | 1. Look up the documentation for any function or class in the deep learning framework. Can you also find the documentation on the official website of the framework? 91 | 92 | 93 | :begin_tab:`mxnet` 94 | [Discussions](https://discuss.d2l.ai/t/38) 95 | :end_tab: 96 | 97 | :begin_tab:`pytorch` 98 | [Discussions](https://discuss.d2l.ai/t/39) 99 | :end_tab: 100 | 101 | :begin_tab:`tensorflow` 102 | [Discussions](https://discuss.d2l.ai/t/199) 103 | :end_tab: 104 | -------------------------------------------------------------------------------- /chapter_preliminaries/pandas.md: -------------------------------------------------------------------------------- 1 | # 数据预处理 2 | :label:`sec_pandas` 3 | 4 | 为了能用深度学习来解决现实世界的问题,我们经常从预处理原始数据开始, 5 | 而不是从那些准备好的张量格式数据开始。 6 | 在Python中常用的数据分析工具中,我们通常使用`pandas`软件包。 7 | 像庞大的Python生态系统中的许多其他扩展包一样,`pandas`可以与张量兼容。 8 | 本节我们将简要介绍使用`pandas`预处理原始数据,并将原始数据转换为张量格式的步骤。 9 | 后面的章节将介绍更多的数据预处理技术。 10 | 11 | ## 读取数据集 12 | 13 | 举一个例子,我们首先(**创建一个人工数据集,并存储在CSV(逗号分隔值)文件**) 14 | `../data/house_tiny.csv`中。 15 | 以其他格式存储的数据也可以通过类似的方式进行处理。 16 | 下面我们将数据集按行写入CSV文件中。 17 | 18 | ```{.python .input} 19 | #@tab all 20 | import os 21 | 22 | os.makedirs(os.path.join('..', 'data'), exist_ok=True) 23 | data_file = os.path.join('..', 'data', 'house_tiny.csv') 24 | with open(data_file, 'w') as f: 25 | f.write('NumRooms,Alley,Price\n') # 列名 26 | f.write('NA,Pave,127500\n') # 每行表示一个数据样本 27 | f.write('2,NA,106000\n') 28 | f.write('4,NA,178100\n') 29 | f.write('NA,NA,140000\n') 30 | ``` 31 | 32 | 要[**从创建的CSV文件中加载原始数据集**],我们导入`pandas`包并调用`read_csv`函数。该数据集有四行三列。其中每行描述了房间数量(“NumRooms”)、巷子类型(“Alley”)和房屋价格(“Price”)。 33 | 34 | ```{.python .input} 35 | #@tab all 36 | # 如果没有安装pandas,只需取消对以下行的注释来安装pandas 37 | # !pip install pandas 38 | import pandas as pd 39 | 40 | data = pd.read_csv(data_file) 41 | print(data) 42 | ``` 43 | 44 | ## 处理缺失值 45 | 46 | 注意,“NaN”项代表缺失值。 47 | [**为了处理缺失的数据,典型的方法包括*插值法*和*删除法*,**] 48 | 其中插值法用一个替代值弥补缺失值,而删除法则直接忽略缺失值。 49 | 在(**这里,我们将考虑插值法**)。 50 | 51 | 通过位置索引`iloc`,我们将`data`分成`inputs`和`outputs`, 52 | 其中前者为`data`的前两列,而后者为`data`的最后一列。 53 | 对于`inputs`中缺少的数值,我们用同一列的均值替换“NaN”项。 54 | 55 | ```{.python .input} 56 | #@tab all 57 | inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2] 58 | inputs = inputs.fillna(inputs.mean()) 59 | print(inputs) 60 | ``` 61 | 62 | [**对于`inputs`中的类别值或离散值,我们将“NaN”视为一个类别。**] 63 | 由于“巷子类型”(“Alley”)列只接受两种类型的类别值“Pave”和“NaN”, 64 | `pandas`可以自动将此列转换为两列“Alley_Pave”和“Alley_nan”。 65 | 巷子类型为“Pave”的行会将“Alley_Pave”的值设置为1,“Alley_nan”的值设置为0。 66 | 缺少巷子类型的行会将“Alley_Pave”和“Alley_nan”分别设置为0和1。 67 | 68 | ```{.python .input} 69 | #@tab all 70 | inputs = pd.get_dummies(inputs, dummy_na=True) 71 | print(inputs) 72 | ``` 73 | 74 | ## 转换为张量格式 75 | 76 | [**现在`inputs`和`outputs`中的所有条目都是数值类型,它们可以转换为张量格式。**] 77 | 当数据采用张量格式后,可以通过在 :numref:`sec_ndarray`中引入的那些张量函数来进一步操作。 78 | 79 | ```{.python .input} 80 | from mxnet import np 81 | 82 | X, y = np.array(inputs.values), np.array(outputs.values) 83 | X, y 84 | ``` 85 | 86 | ```{.python .input} 87 | #@tab pytorch 88 | import torch 89 | 90 | X, y = torch.tensor(inputs.values), torch.tensor(outputs.values) 91 | X, y 92 | ``` 93 | 94 | ```{.python .input} 95 | #@tab tensorflow 96 | import tensorflow as tf 97 | 98 | X, y = tf.constant(inputs.values), tf.constant(outputs.values) 99 | X, y 100 | ``` 101 | 102 | ```{.python .input} 103 | #@tab paddle 104 | import warnings 105 | warnings.filterwarnings(action='ignore') 106 | import paddle 107 | 108 | X, y = paddle.to_tensor(inputs.values), paddle.to_tensor(outputs.values) 109 | X, y 110 | ``` 111 | 112 | ```{.python .input} 113 | #@tab mindspore 114 | import mindspore 115 | 116 | X, y = mindspore.Tensor(inputs.values), mindspore.Tensor(outputs.values) 117 | X, y 118 | ``` 119 | 120 | ## 小结 121 | 122 | * `pandas`软件包是Python中常用的数据分析工具中,`pandas`可以与张量兼容。 123 | * 用`pandas`处理缺失的数据时,我们可根据情况选择用插值法和删除法。 124 | 125 | ## 练习 126 | 127 | 创建包含更多行和列的原始数据集。 128 | 129 | 1. 删除缺失值最多的列。 130 | 2. 将预处理后的数据集转换为张量格式。 131 | 132 | :begin_tab:`mxnet` 133 | [Discussions](https://discuss.d2l.ai/t/1749) 134 | :end_tab: 135 | 136 | :begin_tab:`pytorch` 137 | [Discussions](https://discuss.d2l.ai/t/1750) 138 | :end_tab: 139 | 140 | :begin_tab:`tensorflow` 141 | [Discussions](https://discuss.d2l.ai/t/1748) 142 | :end_tab: 143 | 144 | :begin_tab:`paddle` 145 | [Discussions](https://discuss.d2l.ai/t/11681) 146 | :end_tab: 147 | -------------------------------------------------------------------------------- /chapter_recurrent-modern/index.md: -------------------------------------------------------------------------------- 1 | # 现代循环神经网络 2 | :label:`chap_modern_rnn` 3 | 4 | 前一章中我们介绍了循环神经网络的基础知识, 5 | 这种网络可以更好地处理序列数据。 6 | 我们在文本数据上实现了基于循环神经网络的语言模型, 7 | 但是对于当今各种各样的序列学习问题,这些技术可能并不够用。 8 | 9 | 例如,循环神经网络在实践中一个常见问题是数值不稳定性。 10 | 尽管我们已经应用了梯度裁剪等技巧来缓解这个问题, 11 | 但是仍需要通过设计更复杂的序列模型来进一步处理它。 12 | 具体来说,我们将引入两个广泛使用的网络, 13 | 即*门控循环单元*(gated recurrent units,GRU)和 14 | *长短期记忆网络*(long short-term memory,LSTM)。 15 | 然后,我们将基于一个单向隐藏层来扩展循环神经网络架构。 16 | 我们将描述具有多个隐藏层的深层架构, 17 | 并讨论基于前向和后向循环计算的双向设计。 18 | 现代循环网络经常采用这种扩展。 19 | 在解释这些循环神经网络的变体时, 20 | 我们将继续考虑 :numref:`chap_rnn`中的语言建模问题。 21 | 22 | 事实上,语言建模只揭示了序列学习能力的冰山一角。 23 | 在各种序列学习问题中,如自动语音识别、文本到语音转换和机器翻译, 24 | 输入和输出都是任意长度的序列。 25 | 为了阐述如何拟合这种类型的数据, 26 | 我们将以机器翻译为例介绍基于循环神经网络的 27 | “编码器-解码器”架构和束搜索,并用它们来生成序列。 28 | 29 | ```toc 30 | :maxdepth: 2 31 | 32 | gru 33 | lstm 34 | deep-rnn 35 | bi-rnn 36 | machine-translation-and-dataset 37 | encoder-decoder 38 | seq2seq 39 | beam-search 40 | ``` 41 | -------------------------------------------------------------------------------- /chapter_recurrent-modern/index_origin.md: -------------------------------------------------------------------------------- 1 | # Modern Recurrent Neural Networks 2 | :label:`chap_modern_rnn` 3 | 4 | We have introduced the basics of RNNs, 5 | which can better handle sequence data. 6 | For demonstration, 7 | we implemented RNN-based 8 | language models on text data. 9 | However, 10 | such techniques may not be sufficient 11 | for practitioners when they face 12 | a wide range of sequence learning problems nowadays. 13 | 14 | For instance, 15 | a notable issue in practice 16 | is the numerical instability of RNNs. 17 | Although we have applied implementation tricks 18 | such as gradient clipping, 19 | this issue can be alleviated further 20 | with more sophisticated designs of sequence models. 21 | Specifically, 22 | gated RNNs are much more common in practice. 23 | We will begin by introducing two of such widely-used networks, 24 | namely *gated recurrent units* (GRUs) and *long short-term memory* (LSTM). 25 | Furthermore, we will expand the RNN architecture 26 | with a single undirectional hidden layer 27 | that has been discussed so far. 28 | We will describe deep architectures with 29 | multiple hidden layers, 30 | and discuss the bidirectional design 31 | with both forward and backward recurrent computations. 32 | Such expansions are frequently adopted 33 | in modern recurrent networks. 34 | When explaining these RNN variants, 35 | we continue to consider 36 | the same language modeling problem introduced in :numref:`chap_rnn`. 37 | 38 | In fact, language modeling 39 | reveals only a small fraction of what 40 | sequence learning is capable of. 41 | In a variety of sequence learning problems, 42 | such as automatic speech recognition, text to speech, and machine translation, 43 | both inputs and outputs are sequences of arbitrary length. 44 | To explain how to fit this type of data, 45 | we will take machine translation as an example, 46 | and introduce the encoder-decoder architecture based on 47 | RNNs and beam search for sequence generation. 48 | 49 | ```toc 50 | :maxdepth: 2 51 | 52 | gru 53 | lstm 54 | deep-rnn 55 | bi-rnn 56 | machine-translation-and-dataset 57 | encoder-decoder 58 | seq2seq 59 | beam-search 60 | ``` 61 | 62 | -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/index.md: -------------------------------------------------------------------------------- 1 | # 循环神经网络 2 | :label:`chap_rnn` 3 | 4 | 到目前为止,我们遇到过两种类型的数据:表格数据和图像数据。 5 | 对于图像数据,我们设计了专门的卷积神经网络架构来为这类特殊的数据结构建模。 6 | 换句话说,如果我们拥有一张图像,我们需要有效地利用其像素位置, 7 | 假若我们对图像中的像素位置进行重排,就会对图像中内容的推断造成极大的困难。 8 | 9 | 最重要的是,到目前为止我们默认数据都来自于某种分布, 10 | 并且所有样本都是独立同分布的 11 | (independently and identically distributed,i.i.d.)。 12 | 然而,大多数的数据并非如此。 13 | 例如,文章中的单词是按顺序写的,如果顺序被随机地重排,就很难理解文章原始的意思。 14 | 同样,视频中的图像帧、对话中的音频信号以及网站上的浏览行为都是有顺序的。 15 | 因此,针对此类数据而设计特定模型,可能效果会更好。 16 | 17 | 另一个问题来自这样一个事实: 18 | 我们不仅仅可以接收一个序列作为输入,而是还可能期望继续猜测这个序列的后续。 19 | 例如,一个任务可以是继续预测$2, 4, 6, 8, 10, \ldots$。 20 | 这在时间序列分析中是相当常见的,可以用来预测股市的波动、 21 | 患者的体温曲线或者赛车所需的加速度。 22 | 同理,我们需要能够处理这些数据的特定模型。 23 | 24 | 简言之,如果说卷积神经网络可以有效地处理空间信息, 25 | 那么本章的*循环神经网络*(recurrent neural network,RNN)则可以更好地处理序列信息。 26 | 循环神经网络通过引入状态变量存储过去的信息和当前的输入,从而可以确定当前的输出。 27 | 28 | 许多使用循环网络的例子都是基于文本数据的,因此我们将在本章中重点介绍语言模型。 29 | 在对序列数据进行更详细的回顾之后,我们将介绍文本预处理的实用技术。 30 | 然后,我们将讨论语言模型的基本概念,并将此讨论作为循环神经网络设计的灵感。 31 | 最后,我们描述了循环神经网络的梯度计算方法,以探讨训练此类网络时可能遇到的问题。 32 | 33 | ```toc 34 | :maxdepth: 2 35 | 36 | sequence 37 | text-preprocessing 38 | language-models-and-dataset 39 | rnn 40 | rnn-scratch 41 | rnn-concise 42 | bptt 43 | ``` 44 | -------------------------------------------------------------------------------- /chapter_recurrent-neural-networks/index_origin.md: -------------------------------------------------------------------------------- 1 | # Recurrent Neural Networks 2 | :label:`chap_rnn` 3 | 4 | So far we encountered two types of data: tabular data and image data. 5 | For the latter we designed specialized layers to take advantage of the regularity in them. 6 | In other words, if we were to permute the pixels in an image, it would be much more difficult to reason about its content of something that would look much like the background of a test pattern in the times of analog TV. 7 | 8 | Most importantly, so far we tacitly assumed that our data are all drawn from some distribution, 9 | and all the examples are independently and identically distributed (i.i.d.). 10 | Unfortunately, this is not true for most data. For instance, the words in this paragraph are written in sequence, and it would be quite difficult to decipher its meaning if they were permuted randomly. 11 | Likewise, image frames in a video, the audio signal in a conversation, and the browsing behavior on a website, all follow sequential order. 12 | It is thus reasonable to assume that specialized models for such data will do better at describing them. 13 | 14 | Another issue arises from the fact that we might not only receive a sequence as an input but rather might be expected to continue the sequence. 15 | For instance, the task could be to continue the series $2, 4, 6, 8, 10, \ldots$ This is quite common in time series analysis, to predict the stock market, the fever curve of a patient, or the acceleration needed for a race car. Again we want to have models that can handle such data. 16 | 17 | In short, while CNNs can efficiently process spatial information, *recurrent neural networks* (RNNs) are designed to better handle sequential information. 18 | RNNs introduce state variables to store past information, together with the current inputs, to determine the current outputs. 19 | 20 | Many of the examples for using recurrent networks are based on text data. Hence, we will emphasize language models in this chapter. After a more formal review of sequence data we introduce practical techniques for preprocessing text data. 21 | Next, we discuss basic concepts of a language model and use this discussion as the inspiration for the design of RNNs. 22 | In the end, we describe the gradient calculation method for RNNs to explore problems that may be encountered when training such networks. 23 | 24 | ```toc 25 | :maxdepth: 2 26 | 27 | sequence 28 | text-preprocessing 29 | language-models-and-dataset 30 | rnn 31 | rnn-scratch 32 | rnn-concise 33 | bptt 34 | ``` 35 | 36 | -------------------------------------------------------------------------------- /chapter_references/zreferences.md: -------------------------------------------------------------------------------- 1 | ```eval_rst 2 | 3 | .. only:: html 4 | 5 | 参考文献 6 | ========== 7 | 8 | ``` 9 | 10 | :bibliography:`../d2l.bib` 11 | 12 | -------------------------------------------------------------------------------- /contrib/chapter_recommender-systems/index.md: -------------------------------------------------------------------------------- 1 | # 推荐系统(Recommender Systems) 2 | :label:`chap_recsys` 3 | 4 | **Shuai Zhang** (*亚马逊*), **Aston Zhang** (*亚马逊*), and **Yi Tay** (*谷歌*) 5 | 6 | 推荐系统在工业界有着广泛的应用,它在我们的日常生活中也随处可见。推荐系统应用在了很多领域,例如在线购物网站(例如亚马逊)、音乐电影服务网站(例如网飞和Spotify)、移动应用商店(例如IOS商店和谷歌play)和在线广告等。 7 | 8 | 推荐系统的主要作用是帮助用户发现相关物品,从而创造愉悦的用户体验。这里的物品包括要观看的电影、阅读的文本或者可购买的商品等。此外,推荐系统还是一种可以帮助零售商增加收入的强大的机器学习系统。作为搜索引擎的替代品,推荐系统可以减少用户主动搜索的工作量,向用户推荐从未搜索过的物品并为其带来惊喜。在推荐系统的有效帮助下,许多公司超越了其竞争对手。正因如此,推荐系统不仅在我们的日常生活中至关重要,在工业界中它也是必不可少的。 9 | 10 | 在本章中,我们将介绍推荐系统的基础知识及其改进,并基于不同的数据源探讨构建推荐系统的一些常见基本技术及其实现。具体来说,你将学习到如何预测用户对潜在物品的评分,如何生成推荐物品列表以及如何基于大量特征预测点击率。这些任务在现实世界的应用中都很常见。学习完本章的内容后,你将获得解决现实世界中推荐系统问题的第一手经验,这其中不仅包括经典方法,还包括了基于深度学习的模型方法。 11 | 12 | ```toc 13 | :maxdepth: 2 14 | 15 | recsys-intro 16 | movielens 17 | mf 18 | autorec 19 | ranking 20 | neumf 21 | seqrec 22 | ctr 23 | fm 24 | deepfm 25 | ``` 26 | 27 | -------------------------------------------------------------------------------- /contrib/to-rm-mx-contrib-text/d2lzh/__init__.py: -------------------------------------------------------------------------------- 1 | from . import text 2 | from .utils import * 3 | 4 | __version__ = '1.0.0' 5 | -------------------------------------------------------------------------------- /contrib/to-rm-mx-contrib-text/d2lzh/text/__init__.py: -------------------------------------------------------------------------------- 1 | from . import vocab 2 | from . import embedding -------------------------------------------------------------------------------- /contrib/to-rm-mx-contrib-text/d2lzh/text/embedding.py: -------------------------------------------------------------------------------- 1 | import os 2 | from mxnet import nd, gluon 3 | import tarfile 4 | import zipfile 5 | 6 | DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/' 7 | PRETRAINED_FILE = { 8 | 'glove':{}, 9 | 'fasttext':{} 10 | } 11 | PRETRAINED_FILE['glove']['glove.6b.50d.txt'] = (DATA_URL + 'glove.6B.50d.zip', 12 | '0b8703943ccdb6eb788e6f091b8946e82231bc4d') 13 | PRETRAINED_FILE['glove']['glove.6b.100d.txt'] = (DATA_URL + 'glove.6B.100d.zip', 14 | 'cd43bfb07e44e6f27cbcc7bc9ae3d80284fdaf5a') 15 | PRETRAINED_FILE['glove']['glove.42b.300d.txt'] = (DATA_URL + 'glove.42B.300d.zip', 16 | 'b5116e234e9eb9076672cfeabf5469f3eec904fa') 17 | PRETRAINED_FILE['fasttext']['wiki.en'] = (DATA_URL + 'wiki.en.zip', 18 | 'c1816da3821ae9f43899be655002f6c723e91b88') 19 | 20 | def mkdir_if_not_exist(path): 21 | if not isinstance(path, str): 22 | path = os.path.join(*path) 23 | if not os.path.exists(path): 24 | os.makedirs(path) 25 | 26 | def download(embedding_name, pretrained_file_name, cache_dir=os.path.join('..', 'data')): 27 | url, sha1 = PRETRAINED_FILE[embedding_name][pretrained_file_name] 28 | mkdir_if_not_exist(cache_dir) 29 | return gluon.utils.download(url, cache_dir, sha1_hash=sha1) 30 | 31 | def download_extract(embedding_name, pretrained_file_name, folder=None): 32 | """Download and extract a zip/tar file.""" 33 | fname = download(embedding_name, pretrained_file_name) 34 | base_dir = os.path.dirname(fname) 35 | data_dir, ext = os.path.splitext(fname) 36 | if ext == '.zip': 37 | fp = zipfile.ZipFile(fname, 'r') 38 | elif ext in ('.tar', '.gz'): 39 | fp = tarfile.open(fname, 'r') 40 | else: 41 | assert False, 'Only zip/tar files can be extracted' 42 | fp.extractall(base_dir) 43 | if folder: 44 | return os.path.join(base_dir, folder) 45 | else: 46 | return data_dir 47 | 48 | def get_pretrained_file_names(embedding_name=None): 49 | if embedding_name is not None: 50 | return PRETRAINED_FILE[embedding_name].keys() 51 | else: 52 | return PRETRAINED_FILE 53 | 54 | def create(embedding_name, pretrained_file_name, vocabulary=None): 55 | return TokenEmbedding(embedding_name, pretrained_file_name.lower(), vocabulary) 56 | 57 | class TokenEmbedding: 58 | """Token Embedding.""" 59 | def __init__(self, embedding_name, pretrained_file_name, vocabulary=None): 60 | self.idx_to_token, self.idx_to_vec = self._load_embedding( 61 | embedding_name, pretrained_file_name) 62 | self.unknown_idx = 0 63 | self.token_to_idx = {token: idx for idx, token in 64 | enumerate(self.idx_to_token)} 65 | if vocabulary is not None: 66 | indices = [self.token_to_idx.get(token, self.unknown_idx) 67 | for token in vocabulary.idx_to_token] 68 | self.idx_to_vec = self.idx_to_vec[nd.array(indices)] 69 | self.token_to_idx = vocabulary.token_to_idx 70 | self.idx_to_token = vocabulary.idx_to_token 71 | 72 | def _load_embedding(self, embedding_name, pretrained_file_name): 73 | idx_to_token, idx_to_vec = [''], [] 74 | data_dir = download_extract(embedding_name, pretrained_file_name) 75 | # GloVe website: https://nlp.stanford.edu/projects/glove/ 76 | # fastText website: https://fasttext.cc/ 77 | with open(os.path.join(data_dir, 'vec.txt'), 'r') as f: 78 | for line in f: 79 | elems = line.rstrip().split(' ') 80 | token, elems = elems[0], [float(elem) for elem in elems[1:]] 81 | # Skip header information, such as the top row in fastText 82 | if len(elems) > 1: 83 | idx_to_token.append(token) 84 | idx_to_vec.append(elems) 85 | idx_to_vec = [[0] * len(idx_to_vec[0])] + idx_to_vec 86 | return idx_to_token, nd.array(idx_to_vec) 87 | 88 | def get_vecs_by_tokens(self, tokens): 89 | indices = [self.token_to_idx.get(token, self.unknown_idx) 90 | for token in tokens] 91 | vecs = self.idx_to_vec[nd.array(indices)] 92 | return vecs 93 | 94 | def __len__(self): 95 | return len(self.idx_to_token) -------------------------------------------------------------------------------- /contrib/to-rm-mx-contrib-text/d2lzh/text/vocab.py: -------------------------------------------------------------------------------- 1 | class Vocabulary: 2 | def __init__(self, counter, min_freq=0, reserved_tokens=None): 3 | if reserved_tokens is None: 4 | reserved_tokens = [] 5 | # Sort according to frequencies 6 | self.token_freqs = sorted(counter.items(), key=lambda x: x[0]) 7 | self.token_freqs.sort(key=lambda x: x[1], reverse=True) 8 | self.unk, uniq_tokens = 0, [''] + reserved_tokens 9 | uniq_tokens += [token for token, freq in self.token_freqs 10 | if freq >= min_freq and token not in uniq_tokens] 11 | self.idx_to_token, self.token_to_idx = [], dict() 12 | for token in uniq_tokens: 13 | self.idx_to_token.append(token) 14 | self.token_to_idx[token] = len(self.idx_to_token) - 1 15 | 16 | def __len__(self): 17 | return len(self.idx_to_token) 18 | 19 | def to_indices(self, tokens): 20 | if not isinstance(tokens, (list, tuple)): 21 | return self.token_to_idx.get(tokens, self.unk) 22 | return [self.to_indices(token) for token in tokens] -------------------------------------------------------------------------------- /d2l/__init__.py: -------------------------------------------------------------------------------- 1 | """Saved source code for "Dive into Deep Learing" (https://d2l.ai). 2 | 3 | Please import d2l by one of the following ways: 4 | 5 | from d2l import mxnet as d2l # Use MXNet as the backend 6 | from d2l import torch as d2l # Use PyTorch as the backend 7 | from d2l import tensorflow as d2l # Use TensorFlow as the backend 8 | from d2l import paddle as d2l # Use Paddle as the backend 9 | 10 | """ 11 | 12 | __version__ = "2.0.0" 13 | -------------------------------------------------------------------------------- /graffle/appendix/3dFunc.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/3dFunc.graffle -------------------------------------------------------------------------------- /graffle/appendix/ChainNet1.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/ChainNet1.graffle -------------------------------------------------------------------------------- /graffle/appendix/ChainNet2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/ChainNet2.graffle -------------------------------------------------------------------------------- /graffle/appendix/GridPoints.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/GridPoints.graffle -------------------------------------------------------------------------------- /graffle/appendix/GridTransform.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/GridTransform.graffle -------------------------------------------------------------------------------- /graffle/appendix/GridTransformFilled.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/GridTransformFilled.graffle -------------------------------------------------------------------------------- /graffle/appendix/GridWithArrow.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/GridWithArrow.graffle -------------------------------------------------------------------------------- /graffle/appendix/Marginal.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/Marginal.graffle -------------------------------------------------------------------------------- /graffle/appendix/ParVec.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/ParVec.graffle -------------------------------------------------------------------------------- /graffle/appendix/ProjVec.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/ProjVec.graffle -------------------------------------------------------------------------------- /graffle/appendix/RectTrans.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/RectTrans.graffle -------------------------------------------------------------------------------- /graffle/appendix/SpaceDivision.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/SpaceDivision.graffle -------------------------------------------------------------------------------- /graffle/appendix/SpaceDivision3D.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/SpaceDivision3D.graffle -------------------------------------------------------------------------------- /graffle/appendix/SubArea.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/SubArea.graffle -------------------------------------------------------------------------------- /graffle/appendix/SumOrder.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/SumOrder.graffle -------------------------------------------------------------------------------- /graffle/appendix/VecAdd.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/VecAdd.graffle -------------------------------------------------------------------------------- /graffle/appendix/VecAngle.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/VecAngle.graffle -------------------------------------------------------------------------------- /graffle/appendix/comparing_estimators.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/comparing_estimators.graffle -------------------------------------------------------------------------------- /graffle/appendix/mutual_information.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/mutual_information.graffle -------------------------------------------------------------------------------- /graffle/appendix/negSecDer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/negSecDer.graffle -------------------------------------------------------------------------------- /graffle/appendix/posSecDer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/posSecDer.graffle -------------------------------------------------------------------------------- /graffle/appendix/statistical_power.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/statistical_power.graffle -------------------------------------------------------------------------------- /graffle/appendix/statistical_significance.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/statistical_significance.graffle -------------------------------------------------------------------------------- /graffle/appendix/zeroSecDer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/appendix/zeroSecDer.graffle -------------------------------------------------------------------------------- /graffle/attention/add_norm.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/add_norm.graffle -------------------------------------------------------------------------------- /graffle/attention/attention-output.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/attention-output.graffle -------------------------------------------------------------------------------- /graffle/attention/attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/attention.graffle -------------------------------------------------------------------------------- /graffle/attention/cnn-rnn-self-attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/cnn-rnn-self-attention.graffle -------------------------------------------------------------------------------- /graffle/attention/encoder-decoder.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/encoder-decoder.graffle -------------------------------------------------------------------------------- /graffle/attention/eye-book.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/eye-book.graffle -------------------------------------------------------------------------------- /graffle/attention/eye-coffee.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/eye-coffee.graffle -------------------------------------------------------------------------------- /graffle/attention/multi-head-attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/multi-head-attention.graffle -------------------------------------------------------------------------------- /graffle/attention/positional_encoding.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/positional_encoding.graffle -------------------------------------------------------------------------------- /graffle/attention/qkv.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/qkv.graffle -------------------------------------------------------------------------------- /graffle/attention/self-attention-predict.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/self-attention-predict.graffle -------------------------------------------------------------------------------- /graffle/attention/self-attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/self-attention.graffle -------------------------------------------------------------------------------- /graffle/attention/seq2seq-attention-details.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/seq2seq-attention-details.graffle -------------------------------------------------------------------------------- /graffle/attention/seq2seq_attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/seq2seq_attention.graffle -------------------------------------------------------------------------------- /graffle/attention/transformer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/attention/transformer.graffle -------------------------------------------------------------------------------- /graffle/book-org.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/book-org.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/conv-1x1.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/conv-1x1.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/conv-multi-in.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/conv-multi-in.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/conv-pad.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/conv-pad.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/conv-stride.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/conv-stride.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/correlation.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/correlation.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/lenet-vert.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/lenet-vert.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/lenet.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/lenet.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/pooling.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/pooling.graffle -------------------------------------------------------------------------------- /graffle/cnn-basic/waldo-mask.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/waldo-mask.graffle/data.plist -------------------------------------------------------------------------------- /graffle/cnn-basic/waldo-mask.graffle/image1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-basic/waldo-mask.graffle/image1.jpg -------------------------------------------------------------------------------- /graffle/cnn-modern/ResNetManyFlavor.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/ResNetManyFlavor.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/alexnet.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/alexnet.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/densenet-block.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/densenet-block.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/densenet.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/densenet.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/functionclasses.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/functionclasses.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/inception-full.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/inception-full.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/inception.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/inception.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/nin-compare.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/nin-compare.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/nin.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/nin.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/residual-block.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/residual-block.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/resnet-block.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/resnet-block.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/resnet18.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/resnet18.graffle -------------------------------------------------------------------------------- /graffle/cnn-modern/vgg.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/cnn-modern/vgg.graffle -------------------------------------------------------------------------------- /graffle/computation/asyncgraph.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/asyncgraph.graffle -------------------------------------------------------------------------------- /graffle/computation/blocks.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/blocks.graffle -------------------------------------------------------------------------------- /graffle/computation/computegraph.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/computegraph.graffle -------------------------------------------------------------------------------- /graffle/computation/copyto.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/copyto.graffle -------------------------------------------------------------------------------- /graffle/computation/frontends.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/frontends.graffle -------------------------------------------------------------------------------- /graffle/computation/threading.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/threading.graffle -------------------------------------------------------------------------------- /graffle/computation/twogpu.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/computation/twogpu.graffle -------------------------------------------------------------------------------- /graffle/contribute.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/contribute.graffle -------------------------------------------------------------------------------- /graffle/convert.sh: -------------------------------------------------------------------------------- 1 | find . -iname '*.pdf' | while read f; do pdf2svg $f ${f%.pdf}.svg; done 2 | -------------------------------------------------------------------------------- /graffle/gan/gan.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/gan/gan.graffle -------------------------------------------------------------------------------- /graffle/intro/data-collection.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/intro/data-collection.graffle -------------------------------------------------------------------------------- /graffle/intro/diveintodl.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/intro/diveintodl.graffle -------------------------------------------------------------------------------- /graffle/intro/ml-loop.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/intro/ml-loop.graffle -------------------------------------------------------------------------------- /graffle/intro/rl-environment.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/intro/rl-environment.graffle -------------------------------------------------------------------------------- /graffle/intro/supervised-learning.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/intro/supervised-learning.graffle -------------------------------------------------------------------------------- /graffle/intro/wake-word.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/intro/wake-word.graffle -------------------------------------------------------------------------------- /graffle/linear/fit_linreg.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/linear/fit_linreg.graffle -------------------------------------------------------------------------------- /graffle/linear/neuron.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/linear/neuron.graffle -------------------------------------------------------------------------------- /graffle/linear/singlelayer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/linear/singlelayer.graffle -------------------------------------------------------------------------------- /graffle/linear/singleneuron.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/linear/singleneuron.graffle -------------------------------------------------------------------------------- /graffle/linear/softmaxreg.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/linear/softmaxreg.graffle -------------------------------------------------------------------------------- /graffle/mlp/add_norm.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/mlp/add_norm.graffle -------------------------------------------------------------------------------- /graffle/mlp/capacity_vs_error.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/mlp/capacity_vs_error.graffle -------------------------------------------------------------------------------- /graffle/mlp/dropout2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/mlp/dropout2.graffle -------------------------------------------------------------------------------- /graffle/mlp/forward.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/mlp/forward.graffle -------------------------------------------------------------------------------- /graffle/mlp/mlp.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/mlp/mlp.graffle -------------------------------------------------------------------------------- /graffle/nlp/bert-input.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/bert-input.graffle -------------------------------------------------------------------------------- /graffle/nlp/bert-one-seq.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/bert-one-seq.graffle -------------------------------------------------------------------------------- /graffle/nlp/bert-qa.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/bert-qa.graffle -------------------------------------------------------------------------------- /graffle/nlp/bert-tagging.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/bert-tagging.graffle -------------------------------------------------------------------------------- /graffle/nlp/bert-two-seqs.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/bert-two-seqs.graffle -------------------------------------------------------------------------------- /graffle/nlp/cbow.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/cbow.graffle -------------------------------------------------------------------------------- /graffle/nlp/conv1d-2d.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/conv1d-2d.graffle -------------------------------------------------------------------------------- /graffle/nlp/conv1d-channel.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/conv1d-channel.graffle -------------------------------------------------------------------------------- /graffle/nlp/conv1d.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/conv1d.graffle -------------------------------------------------------------------------------- /graffle/nlp/elmo-gpt-bert.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/elmo-gpt-bert.graffle -------------------------------------------------------------------------------- /graffle/nlp/hi-softmax.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/hi-softmax.graffle -------------------------------------------------------------------------------- /graffle/nlp/nli_attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nli_attention.graffle -------------------------------------------------------------------------------- /graffle/nlp/nlp-map-app.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nlp-map-app.graffle -------------------------------------------------------------------------------- /graffle/nlp/nlp-map-nli-attention.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nlp-map-nli-attention.graffle -------------------------------------------------------------------------------- /graffle/nlp/nlp-map-nli-bert.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nlp-map-nli-bert.graffle -------------------------------------------------------------------------------- /graffle/nlp/nlp-map-pretrain.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nlp-map-pretrain.graffle -------------------------------------------------------------------------------- /graffle/nlp/nlp-map-sa-cnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nlp-map-sa-cnn.graffle -------------------------------------------------------------------------------- /graffle/nlp/nlp-map-sa-rnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/nlp-map-sa-rnn.graffle -------------------------------------------------------------------------------- /graffle/nlp/sentiment-rnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/sentiment-rnn.graffle -------------------------------------------------------------------------------- /graffle/nlp/skip-gram.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/skip-gram.graffle -------------------------------------------------------------------------------- /graffle/nlp/textcnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/nlp/textcnn.graffle -------------------------------------------------------------------------------- /graffle/optimization/convex.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/optimization/convex.graffle -------------------------------------------------------------------------------- /graffle/performance/a77.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/a77.graffle -------------------------------------------------------------------------------- /graffle/performance/bw-hierarchy.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/bw-hierarchy.graffle -------------------------------------------------------------------------------- /graffle/performance/bw-hierarchy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/bw-hierarchy.pdf -------------------------------------------------------------------------------- /graffle/performance/data-parallel.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/data-parallel.graffle -------------------------------------------------------------------------------- /graffle/performance/falseshare.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/falseshare.graffle -------------------------------------------------------------------------------- /graffle/performance/mobo.graffle/data.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/mobo.graffle/data.plist -------------------------------------------------------------------------------- /graffle/performance/mobo.graffle/image1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/mobo.graffle/image1.tiff -------------------------------------------------------------------------------- /graffle/performance/mobo.graffle/preview.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/mobo.graffle/preview.jpeg -------------------------------------------------------------------------------- /graffle/performance/neon128.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/neon128.graffle -------------------------------------------------------------------------------- /graffle/performance/ps-distributed.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps-distributed.graffle -------------------------------------------------------------------------------- /graffle/performance/ps-distributed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps-distributed.pdf -------------------------------------------------------------------------------- /graffle/performance/ps-multimachine.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps-multimachine.graffle -------------------------------------------------------------------------------- /graffle/performance/ps-multimachine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps-multimachine.pdf -------------------------------------------------------------------------------- /graffle/performance/ps-multips.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps-multips.graffle -------------------------------------------------------------------------------- /graffle/performance/ps-multips.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps-multips.pdf -------------------------------------------------------------------------------- /graffle/performance/ps.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps.graffle -------------------------------------------------------------------------------- /graffle/performance/ps.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/ps.pdf -------------------------------------------------------------------------------- /graffle/performance/splitting.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/performance/splitting.graffle -------------------------------------------------------------------------------- /graffle/preliminaries/polygon_circle.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/preliminaries/polygon_circle.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-caser.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-caser.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-deepfm.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-deepfm.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-intro.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-intro.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-mf.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-mf.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-neumf.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-neumf.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-ranking.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-ranking.graffle -------------------------------------------------------------------------------- /graffle/recsys/rec-seq-data.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/recsys/rec-seq-data.graffle -------------------------------------------------------------------------------- /graffle/rnn/beam-search.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/beam-search.graffle -------------------------------------------------------------------------------- /graffle/rnn/birnn-ORIGINAL.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/birnn-ORIGINAL.graffle -------------------------------------------------------------------------------- /graffle/rnn/birnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/birnn.graffle -------------------------------------------------------------------------------- /graffle/rnn/deep-rnn-ORIGINAL.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/deep-rnn-ORIGINAL.graffle -------------------------------------------------------------------------------- /graffle/rnn/deep-rnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/deep-rnn.graffle -------------------------------------------------------------------------------- /graffle/rnn/hmm.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/hmm.graffle -------------------------------------------------------------------------------- /graffle/rnn/lang-model-data.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/lang-model-data.graffle -------------------------------------------------------------------------------- /graffle/rnn/rnn-bptt.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/rnn-bptt.graffle -------------------------------------------------------------------------------- /graffle/rnn/rnn-train.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/rnn-train.graffle -------------------------------------------------------------------------------- /graffle/rnn/rnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/rnn.graffle -------------------------------------------------------------------------------- /graffle/rnn/s2s-prob1.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/s2s-prob1.graffle -------------------------------------------------------------------------------- /graffle/rnn/s2s-prob2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/s2s-prob2.graffle -------------------------------------------------------------------------------- /graffle/rnn/seq2seq-details.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/seq2seq-details.graffle -------------------------------------------------------------------------------- /graffle/rnn/seq2seq.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/seq2seq.graffle -------------------------------------------------------------------------------- /graffle/rnn/seq2seq_predict.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/seq2seq_predict.graffle -------------------------------------------------------------------------------- /graffle/rnn/sequence-model.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/sequence-model.graffle -------------------------------------------------------------------------------- /graffle/rnn/timemachine-5gram.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/timemachine-5gram.graffle -------------------------------------------------------------------------------- /graffle/rnn/truncated-bptt.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/rnn/truncated-bptt.graffle -------------------------------------------------------------------------------- /graffle/transformer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/transformer.graffle -------------------------------------------------------------------------------- /graffle/vision/anchor-label.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/anchor-label.graffle -------------------------------------------------------------------------------- /graffle/vision/fast-rcnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/fast-rcnn.graffle -------------------------------------------------------------------------------- /graffle/vision/faster-rcnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/faster-rcnn.graffle -------------------------------------------------------------------------------- /graffle/vision/fcn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/fcn.graffle -------------------------------------------------------------------------------- /graffle/vision/finetune.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/finetune.graffle -------------------------------------------------------------------------------- /graffle/vision/iou.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/iou.graffle -------------------------------------------------------------------------------- /graffle/vision/mask-rcnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/mask-rcnn.graffle -------------------------------------------------------------------------------- /graffle/vision/neural-style.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/neural-style.graffle -------------------------------------------------------------------------------- /graffle/vision/r-cnn.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/r-cnn.graffle -------------------------------------------------------------------------------- /graffle/vision/roi.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/roi.graffle -------------------------------------------------------------------------------- /graffle/vision/segmentation.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/segmentation.graffle -------------------------------------------------------------------------------- /graffle/vision/ssd.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/ssd.graffle -------------------------------------------------------------------------------- /graffle/vision/style-transfer.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/style-transfer.graffle -------------------------------------------------------------------------------- /graffle/vision/trans_conv.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/trans_conv.graffle -------------------------------------------------------------------------------- /graffle/vision/trans_conv_2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/trans_conv_2.graffle -------------------------------------------------------------------------------- /graffle/vision/trans_conv_pad1_2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/trans_conv_pad1_2.graffle -------------------------------------------------------------------------------- /graffle/vision/trans_conv_stride2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/trans_conv_stride2.graffle -------------------------------------------------------------------------------- /graffle/vision/trans_conv_stride2_2.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/graffle/vision/trans_conv_stride2_2.graffle -------------------------------------------------------------------------------- /img/autumn-oak.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/autumn-oak.jpg -------------------------------------------------------------------------------- /img/aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/aws.png -------------------------------------------------------------------------------- /img/banana.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/banana.jpg -------------------------------------------------------------------------------- /img/cat-dog-pixels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/cat-dog-pixels.png -------------------------------------------------------------------------------- /img/cat1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/cat1.jpg -------------------------------------------------------------------------------- /img/cat2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/cat2.jpg -------------------------------------------------------------------------------- /img/cat3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/cat3.jpg -------------------------------------------------------------------------------- /img/catdog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/catdog.jpg -------------------------------------------------------------------------------- /img/chmod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/chmod.png -------------------------------------------------------------------------------- /img/colab-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/colab-2.png -------------------------------------------------------------------------------- /img/colab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/colab.png -------------------------------------------------------------------------------- /img/connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/connect.png -------------------------------------------------------------------------------- /img/cuda101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/cuda101.png -------------------------------------------------------------------------------- /img/death-cap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/death-cap.jpg -------------------------------------------------------------------------------- /img/deeplearning-amazon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/deeplearning-amazon.jpg -------------------------------------------------------------------------------- /img/disk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/disk.png -------------------------------------------------------------------------------- /img/dog1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/dog1.jpg -------------------------------------------------------------------------------- /img/dog2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/dog2.jpg -------------------------------------------------------------------------------- /img/ec2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/ec2.png -------------------------------------------------------------------------------- /img/edit-file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/edit-file.png -------------------------------------------------------------------------------- /img/eye-book.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/eye-book.png -------------------------------------------------------------------------------- /img/eye-coffee.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/eye-coffee.png -------------------------------------------------------------------------------- /img/filters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/filters.png -------------------------------------------------------------------------------- /img/frontends.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends.png -------------------------------------------------------------------------------- /img/frontends/image10.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends/image10.tiff -------------------------------------------------------------------------------- /img/frontends/image2.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends/image2.tiff -------------------------------------------------------------------------------- /img/frontends/image3.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends/image3.tiff -------------------------------------------------------------------------------- /img/frontends/image4.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends/image4.tiff -------------------------------------------------------------------------------- /img/frontends/image5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends/image5.pdf -------------------------------------------------------------------------------- /img/frontends/image8.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontends/image8.tiff -------------------------------------------------------------------------------- /img/frontpage/jd-190715-en.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-190715-en.png -------------------------------------------------------------------------------- /img/frontpage/jd-190715-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-190715-zh.png -------------------------------------------------------------------------------- /img/frontpage/jd-20230208-zh-1(day).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-20230208-zh-1(day).png -------------------------------------------------------------------------------- /img/frontpage/jd-20230208-zh-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-20230208-zh-1.png -------------------------------------------------------------------------------- /img/frontpage/jd-20230208-zh-5(day).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-20230208-zh-5(day).png -------------------------------------------------------------------------------- /img/frontpage/jd-20230208-zh-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-20230208-zh-5.png -------------------------------------------------------------------------------- /img/frontpage/jd-20230208-zh-6(day).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-20230208-zh-6(day).png -------------------------------------------------------------------------------- /img/frontpage/jd-20230208-zh-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/frontpage/jd-20230208-zh-6.png -------------------------------------------------------------------------------- /img/ftse100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/ftse100.png -------------------------------------------------------------------------------- /img/git-clone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/git-clone.png -------------------------------------------------------------------------------- /img/git-createpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/git-createpr.png -------------------------------------------------------------------------------- /img/git-fork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/git-fork.png -------------------------------------------------------------------------------- /img/git-forked.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/git-forked.png -------------------------------------------------------------------------------- /img/git-newpr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/git-newpr.png -------------------------------------------------------------------------------- /img/house-pricing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/house-pricing.png -------------------------------------------------------------------------------- /img/jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter.png -------------------------------------------------------------------------------- /img/jupyter00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter00.png -------------------------------------------------------------------------------- /img/jupyter01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter01.png -------------------------------------------------------------------------------- /img/jupyter02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter02.png -------------------------------------------------------------------------------- /img/jupyter03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter03.png -------------------------------------------------------------------------------- /img/jupyter04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter04.png -------------------------------------------------------------------------------- /img/jupyter05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter05.png -------------------------------------------------------------------------------- /img/jupyter06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/jupyter06.png -------------------------------------------------------------------------------- /img/kaggle-cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/kaggle-cifar10.png -------------------------------------------------------------------------------- /img/kaggle-dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/kaggle-dog.jpg -------------------------------------------------------------------------------- /img/kaggle-submit2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/kaggle-submit2.png -------------------------------------------------------------------------------- /img/kaggle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/kaggle.png -------------------------------------------------------------------------------- /img/keypair.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/keypair.png -------------------------------------------------------------------------------- /img/koebel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/koebel.jpg -------------------------------------------------------------------------------- /img/latencynumbers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/latencynumbers.png -------------------------------------------------------------------------------- /img/launching.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/launching.png -------------------------------------------------------------------------------- /img/limits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/limits.png -------------------------------------------------------------------------------- /img/neural-style.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/neural-style.jpg -------------------------------------------------------------------------------- /img/nonconvex.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /img/p2x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/p2x.png -------------------------------------------------------------------------------- /img/pikachu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/pikachu.jpg -------------------------------------------------------------------------------- /img/polygon-circle.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /img/popvssoda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/popvssoda.png -------------------------------------------------------------------------------- /img/rainier.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/rainier.jpg -------------------------------------------------------------------------------- /img/sagemaker-create-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-create-2.png -------------------------------------------------------------------------------- /img/sagemaker-create-3-pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-create-3-pytorch.png -------------------------------------------------------------------------------- /img/sagemaker-create-3-tensorflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-create-3-tensorflow.png -------------------------------------------------------------------------------- /img/sagemaker-create-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-create-3.png -------------------------------------------------------------------------------- /img/sagemaker-create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-create.png -------------------------------------------------------------------------------- /img/sagemaker-open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-open.png -------------------------------------------------------------------------------- /img/sagemaker-stop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-stop.png -------------------------------------------------------------------------------- /img/sagemaker-terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker-terminal.png -------------------------------------------------------------------------------- /img/sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/sagemaker.png -------------------------------------------------------------------------------- /img/speech.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/speech.png -------------------------------------------------------------------------------- /img/stackedanimals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/stackedanimals.png -------------------------------------------------------------------------------- /img/tensorcore.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/tensorcore.jpg -------------------------------------------------------------------------------- /img/turing-processing-block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/turing-processing-block.png -------------------------------------------------------------------------------- /img/turing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/turing.png -------------------------------------------------------------------------------- /img/ubuntu-new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/ubuntu-new.png -------------------------------------------------------------------------------- /img/waldo-mask.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/waldo-mask.jpg -------------------------------------------------------------------------------- /img/where-wally-walker-books.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/img/where-wally-walker-books.jpg -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | 《动手学深度学习》 2 | ======================== 3 | 4 | ```eval_rst 5 | .. raw:: html 6 | :file: frontpage.html 7 | ``` 8 | 9 | 10 | ```toc 11 | :maxdepth: 1 12 | 13 | chapter_preface/index 14 | chapter_installation/index 15 | chapter_notation/index 16 | ``` 17 | 18 | 19 | ```toc 20 | :maxdepth: 2 21 | :numbered: 22 | 23 | chapter_introduction/index 24 | chapter_preliminaries/index 25 | chapter_linear-networks/index 26 | chapter_multilayer-perceptrons/index 27 | chapter_deep-learning-computation/index 28 | chapter_convolutional-neural-networks/index 29 | chapter_convolutional-modern/index 30 | chapter_recurrent-neural-networks/index 31 | chapter_recurrent-modern/index 32 | chapter_attention-mechanisms/index 33 | chapter_optimization/index 34 | chapter_computational-performance/index 35 | chapter_computer-vision/index 36 | chapter_natural-language-processing-pretraining/index 37 | chapter_natural-language-processing-applications/index 38 | chapter_appendix-tools-for-deep-learning/index 39 | 40 | 41 | ``` 42 | 43 | 44 | ```toc 45 | :maxdepth: 1 46 | 47 | chapter_references/zreferences 48 | ``` 49 | 50 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import d2l 3 | 4 | requirements = [ 5 | 'jupyter==1.0.0', 6 | 'numpy==1.21.5', 7 | 'matplotlib==3.5.1', 8 | 'requests==2.25.1', 9 | 'pandas==1.2.4' 10 | ] 11 | 12 | setup( 13 | name='d2l', 14 | version=d2l.__version__, 15 | python_requires='>=3.5', 16 | author='D2L Developers', 17 | author_email='d2l.devs@gmail.com', 18 | url='https://d2l.ai', 19 | description='Dive into Deep Learning', 20 | license='MIT-0', 21 | packages=find_packages(), 22 | zip_safe=True, 23 | install_requires=requirements, 24 | ) 25 | -------------------------------------------------------------------------------- /static/build.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - python=3.9 3 | - pip 4 | - pip: 5 | - d2l==0.17.6 6 | - git+https://github.com/d2l-ai/d2l-book 7 | - mxnet-cu102==1.7.0 8 | - torch==1.12.0+cu102 9 | - -f https://download.pytorch.org/whl/torch_stable.html 10 | - torchvision==0.13.0+cu102 11 | - -f https://download.pytorch.org/whl/torch_stable.html 12 | - tensorflow==2.9.1 13 | - tensorflow-probability==0.17.0 14 | - paddlepaddle-gpu==2.3.2.post112 15 | - -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html 16 | - opencv-python==4.6.0.66 17 | -------------------------------------------------------------------------------- /static/build_html.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | rm -rf _build/rst _build/html 6 | d2lbook build rst --tab all 7 | cp static/frontpage/frontpage.html _build/rst_all/ 8 | d2lbook build html --tab all 9 | cp -r static/frontpage/_images/* _build/html/_images/ 10 | 11 | for fn in `find _build/html/_images/ -iname '*.svg' `; do 12 | if [[ $fn == *'qr_'* ]] ; then # || [[ $fn == *'output_'* ]] 13 | continue 14 | fi 15 | # rsvg-convert installed on ubuntu changes unit from px to pt, so evening no 16 | # change of the size makes the svg larger... 17 | rsvg-convert -z 1 -f svg -o tmp.svg $fn 18 | mv tmp.svg $fn 19 | done 20 | 21 | # Add SageMaker Studio Lab buttons 22 | for f in _build/html/chapter*/*.html; do 23 | sed -i s/Open\ the\ notebook\ in\ Colab\<\\\/div\>\<\\\/div\>\<\\\/div\>\<\\\/h1\>/Open\ the\ notebook\ in\ Colab\<\\\/div\>\<\\\/div\>\<\\\/div\>\\ \\ \\<\\\/i\>\ SageMaker\ Studio\ Lab\ \<\\\/button\>\<\\\/a\>\\ Open\ the\ notebook\ in\ SageMaker\ Studio\ Lab\<\\\/div\>\<\\\/h1\>/g $f 24 | done 25 | -------------------------------------------------------------------------------- /static/cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -ne 2 ]; then 4 | echo "usage:" 5 | echo " $0 store dir" 6 | echo " $0 restore dir" 7 | exit -1 8 | fi 9 | 10 | cmd=$1 11 | dir=$2 12 | saved_dir="${dir///data/_data}" 13 | 14 | if [ $cmd == "store" ]; then 15 | if [ -e $dir ]; then 16 | rm -rf $saved_dir 17 | mv $dir $saved_dir 18 | fi 19 | echo "Saved $dir to $saved_dir" 20 | elif [ $cmd == "restore" ]; then 21 | if [ -e $saved_dir ]; then 22 | rm -rf $dir 23 | mv $saved_dir $dir 24 | fi 25 | echo "Restored $dir from $saved_dir" 26 | else 27 | echo "unknown command $1, should be either store or restore" 28 | exit -1 29 | fi 30 | -------------------------------------------------------------------------------- /static/favicon-blue-background.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/favicon-blue-background.jpg -------------------------------------------------------------------------------- /static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/favicon.png -------------------------------------------------------------------------------- /static/frontpage/_images/alex.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/alex.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/anirudh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/anirudh.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/aston.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/aston.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/brent.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/brent.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/code.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/code.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/eq.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/eq.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/figure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/figure.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/forum.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/forum.gif -------------------------------------------------------------------------------- /static/frontpage/_images/forum.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/forum.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/forum.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/forum.mp4 -------------------------------------------------------------------------------- /static/frontpage/_images/front.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/front.png -------------------------------------------------------------------------------- /static/frontpage/_images/huliujun.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/huliujun.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/laptop_jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/laptop_jupyter.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/colab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/colab.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg1-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg1-zh.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg1.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg2-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg2-zh.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg2.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg3-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg3-zh.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg3.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg4-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg4-zh.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg4.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg5-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg5-zh.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg5.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg6-zh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg6-zh.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg6.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/logoimg7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/logoimg7.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/sagemaker-studio-lab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/sagemaker-studio-lab.png -------------------------------------------------------------------------------- /static/frontpage/_images/logos/sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/logos/sagemaker.png -------------------------------------------------------------------------------- /static/frontpage/_images/map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/map.png -------------------------------------------------------------------------------- /static/frontpage/_images/mu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/mu.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/notebook.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/notebook.gif -------------------------------------------------------------------------------- /static/frontpage/_images/notebook.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/notebook.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/notebook.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/notebook.mp4 -------------------------------------------------------------------------------- /static/frontpage/_images/rachel.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/rachel.jpeg -------------------------------------------------------------------------------- /static/frontpage/_images/shuai.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/shuai.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/wugaosheng.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/wugaosheng.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/xiaoting.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/xiaoting.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/xiejiehang.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/xiejiehang.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/yi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/yi.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/yuan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/yuan.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/zack.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/zack.jpg -------------------------------------------------------------------------------- /static/frontpage/_images/zhangge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/frontpage/_images/zhangge.jpg -------------------------------------------------------------------------------- /static/frontpage/attachments/hardcopy.txt: -------------------------------------------------------------------------------- 1 | If you use D2L to teach (or plan to) and would like to receive a free hardcopy, please use your work email to contact us at d2lbook.en@gmail.com with 2 | 3 | i) your school name, your course and semester where you use (or will use) D2L to teach; 4 | ii) your mailing address for receiving the hardcopy; 5 | iii) (optional) how you use (or will use) D2L in your teaching and your course website (if any). 6 | -------------------------------------------------------------------------------- /static/frontpage/attachments/sagemaker.txt: -------------------------------------------------------------------------------- 1 | If you plan to use D2L to teach your class in the 2021 Spring semester, you may apply for free computing resources on AWS for your class, such as Amazon SageMaker and other AWS ML/AI services. 2 | 3 | Please email to sagemaker-edu@amazon.com by 11/22/2020 with 4 | 5 | i) your name, email, title, department, and school; 6 | ii) your course name/level, class start/end date, and the number of students; 7 | iii) course content and how D2L and Amazon SageMaker will be used in teaching. 8 | -------------------------------------------------------------------------------- /static/latex-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/latex-logo.png -------------------------------------------------------------------------------- /static/logo-with-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/logo-with-text.png -------------------------------------------------------------------------------- /static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindspore-courses/d2l-zh/3abddaebbf3188d66d7ffef154c02c9b27d4c5f8/static/logo.png -------------------------------------------------------------------------------- /static/post_latex/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import regex 4 | import sys 5 | 6 | def _unnumber_chaps_and_secs(lines): 7 | def _startswith_unnumbered(l): 8 | UNNUMBERED = {'\\section{小结', 9 | '\\section{练习', 10 | '\\subsection{小结', 11 | '\\subsection{练习'} 12 | for unnum in UNNUMBERED: 13 | if l.startswith(unnum): 14 | return True 15 | return False 16 | 17 | # Preface, Installation, and Notation are unnumbered chapters 18 | NUM_UNNUMBERED_CHAPS = 3 19 | # Prelimilaries 20 | TOC2_START_CHAP_NO = 5 21 | 22 | preface_reached = False 23 | ch2_reached = False 24 | num_chaps = 0 25 | for i, l in enumerate(lines): 26 | if l.startswith('\\chapter{'): 27 | num_chaps += 1 28 | # Unnumber unnumbered chapters 29 | if num_chaps <= NUM_UNNUMBERED_CHAPS: 30 | chap_name = re.split('{|}', l)[1] 31 | lines[i] = ('\\chapter*{' + chap_name 32 | + '}\\addcontentsline{toc}{chapter}{' 33 | + chap_name + '}\n') 34 | # Set tocdepth to 2 after Chap 1 35 | elif num_chaps == TOC2_START_CHAP_NO: 36 | lines[i] = ('\\addtocontents{toc}{\\protect\\setcounter{tocdepth}{2}}\n' 37 | + lines[i]) 38 | # Unnumber all sections in unnumbered chapters 39 | elif 1 <= num_chaps <= NUM_UNNUMBERED_CHAPS: 40 | if (l.startswith('\\section') or l.startswith('\\subsection') 41 | or l.startswith('\\subsubsection')): 42 | lines[i] = l.replace('section{', 'section*{') 43 | # Unnumber summary, references, exercises, qr code in numbered chapters 44 | elif _startswith_unnumbered(l): 45 | lines[i] = l.replace('section{', 'section*{') 46 | # Since we inserted '\n' in some lines[i], re-build the list 47 | lines = '\n'.join(lines).split('\n') 48 | 49 | 50 | # If label is of chap*/index.md title, its numref is Chapter X instead of Section X 51 | def _sec_to_chap(lines): 52 | for i, l in enumerate(lines): 53 | # e.g., {Section \ref{\detokenize{chapter_dlc/index:chap-dlc}}} matches 54 | # {Section \ref{\detokenize{chapter_prelim/nd:sec-nd}}} does not match 55 | # Note that there can be multiple {Section } in one line 56 | 57 | longest_balanced_braces = regex.findall('\{(?>[^{}]|(?R))*\}', l) 58 | for src in longest_balanced_braces: 59 | if src.startswith('{Section \\ref') and 'index:' in src: 60 | tgt = src.replace('Section \\ref', 'Chapter \\ref') 61 | lines[i] = lines[i].replace(src, tgt) 62 | 63 | 64 | # Remove date 65 | def _edit_titlepage(pdf_dir): 66 | smanual = os.path.join(pdf_dir, 'sphinxmanual.cls') 67 | with open(smanual, 'r') as f: 68 | lines = f.read().split('\n') 69 | 70 | for i, l in enumerate(lines): 71 | lines[i] = lines[i].replace('\\@date', '') 72 | 73 | with open(smanual, 'w') as f: 74 | f.write('\n'.join(lines)) 75 | 76 | 77 | def delete_lines(lines, deletes): 78 | return [line for i, line in enumerate(lines) if i not in deletes] 79 | 80 | 81 | def _delete_discussions_title(lines): 82 | deletes = [] 83 | to_delete = False 84 | for i, l in enumerate(lines): 85 | if 'section*{Discussion' in l or 'section{Discussion' in l: 86 | to_delete = True 87 | elif to_delete and '\\sphinxincludegraphics' in l: 88 | to_delete = False 89 | if to_delete: 90 | deletes.append(i) 91 | return delete_lines(lines, deletes) 92 | 93 | 94 | def main(): 95 | tex_file = sys.argv[1] 96 | with open(tex_file, 'r') as f: 97 | lines = f.read().split('\n') 98 | 99 | _unnumber_chaps_and_secs(lines) 100 | _sec_to_chap(lines) 101 | #lines = _delete_discussions_title(lines) 102 | 103 | with open(tex_file, 'w') as f: 104 | f.write('\n'.join(lines)) 105 | 106 | pdf_dir = os.path.dirname(tex_file) 107 | #_edit_titlepage(pdf_dir) 108 | 109 | if __name__ == "__main__": 110 | main() 111 | --------------------------------------------------------------------------------