├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── bcc-en
    ├── 1.bcc
    ├── 10.bcc
    ├── 11.bcc
    ├── 12.bcc
    ├── 13.bcc
    ├── 14.bcc
    ├── 15.bcc
    ├── 16.bcc
    ├── 17.bcc
    ├── 18.bcc
    ├── 19.bcc
    ├── 2.bcc
    ├── 20.bcc
    ├── 3.bcc
    ├── 4.bcc
    ├── 5.bcc
    ├── 6.bcc
    ├── 7.bcc
    ├── 8.bcc
    └── 9.bcc
├── docs
    ├── Lecture 11
    │   ├── Lecture11.md
    │   └── cs224n-2019-notes08-CNN.pdf
    ├── Lecture1
    │   ├── Lecture1.md
    │   ├── cs224n-2019-lecture01-wordvecs1.pdf
    │   └── media
    │   │   ├── 0f678a32a897d8a7cbdd958b5e73047a.png
    │   │   ├── 1ed8e2c6f59f03f227a0a96783bee07c.png
    │   │   ├── 26cef4d801bc54c1939a7e0cc8f734aa.png
    │   │   ├── 2f999b6467c4c018c0306069f60b3625.png
    │   │   ├── 40677ebb4b091966d2f8e550b4ca6d30.png
    │   │   ├── 4275e279e6d646246334a84434ebf9dd.png
    │   │   ├── 756020be8c51e4f89a721f20dca825bd.png
    │   │   ├── 86f7fdf808b1a7958f0fa3c15c413ab1.png
    │   │   ├── 8b5e9d716ea43a7b82695a73ad497674.png
    │   │   ├── 90138e84ea3b961732b0d5e70e032f4a.png
    │   │   ├── 939133082e8b9ec6e39ac058a84e8cdd.png
    │   │   ├── a1d0273bc4309db805739d88a64fbc92.png
    │   │   ├── d2b95dae5dedcbc90fad260cbc1da976.png
    │   │   ├── d3737e074d0d735dae202cbddb31623f.png
    │   │   └── f6ee3acc5c552202abf5a7bd033cbbe2.png
    ├── Lecture17
    │   ├── Lecture17.md
    │   ├── cs224n-2019-lecture17-multitask.pdf
    │   └── media
    │   │   ├── 1.png
    │   │   ├── 10.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   ├── 7.png
    │   │   ├── 8.png
    │   │   └── 9.png
    ├── Lecture3
    │   ├── Lecture3.md
    │   ├── cs224n-2019-lecture03-neuralnets.pdf
    │   └── media
    │   │   ├── 00926e51622c73d0563cc3cc060dd73e.png
    │   │   ├── 0281e3d94efe17bfc2de3fa5105d2361.png
    │   │   ├── 035f4ab4e1518d2cae60fd4323acab2b.png
    │   │   ├── 054c5b4d2dd02c55bc1f2d47cdd70e42.png
    │   │   ├── 0702867eeabce64498a5fa2e63b1d477.png
    │   │   ├── 0f505c06650ee3043870614c3105a838.png
    │   │   ├── 0f73073a689e8fab9468665a1a6e246c.png
    │   │   ├── 105645bb8bfaa51181b72ae1b079eb50.png
    │   │   ├── 110bc0fa8979ea058cf287f81eef7d5e.png
    │   │   ├── 13540962cd8157f7281f048a7ad46455.png
    │   │   ├── 160ae1c3f9d59caacfced0221cdee4d9.png
    │   │   ├── 1923af5f08d15eeb5de0fbc3dd5b2bd5.png
    │   │   ├── 1e538630d605e2f089c9d524d5acee3b.png
    │   │   ├── 1ec77f45e7be8c6d9f2ba23f8eb1fa23.png
    │   │   ├── 22df0c2f3a3d9a274aca272e9cf0408f.png
    │   │   ├── 2efef90c3b837a85886132f237d76bc7.png
    │   │   ├── 30aabfb3b1eea5c2c04e77eb1b7b4758.png
    │   │   ├── 319ea145bf96365b0515dd9be7c1c334.png
    │   │   ├── 38ac5c9ea7ad5634312929dbd417434a.png
    │   │   ├── 4202f10e49f3cc0db6498e1b5b89d0cd.png
    │   │   ├── 44e5ecedcf7a99b323f502acaf360c7e.png
    │   │   ├── 4d31a5484902aead763880a4abe72a1f.png
    │   │   ├── 4f37bd2361d0dee284462c8e0d6e4fdf.png
    │   │   ├── 54a21854bef6f355d126bc656cd3dd22.png
    │   │   ├── 5939769736ba68cd97dcdc7f0d1391c0.png
    │   │   ├── 5f6c3b7cf264b3c4c586f975cf4ceab4.png
    │   │   ├── 6343eafbf305f43451e50461459a36fb.png
    │   │   ├── 67d49eeec459f53e6c9d5fb11bcf1010.png
    │   │   ├── 6878311d9fdbde542222229b4b4ac7c5.png
    │   │   ├── 715eb662e8e68f5a583ba39839eedc92.png
    │   │   ├── 750c7b5f8979dc080c7ff2298fcd8d12.png
    │   │   ├── 7b7ff896cd8e7d2d064d3a97e9b02270.png
    │   │   ├── 7e254490edee480daef55bc5303a7d57.png
    │   │   ├── 84ed44ccba0edbe63db9a571b4226915.png
    │   │   ├── 856c6c04b2d3649150d76bb37d22d4a9.png
    │   │   ├── 866df69980cdccd0ce8bc940d3bd2054.png
    │   │   ├── 87772a5c28cfd59c44dc0ef8d34961db.png
    │   │   ├── 8a0f8c21c2507b4ae62dafd6a4c71b67.png
    │   │   ├── 8cc04c52651e08dcc01451e47d42de0e.png
    │   │   ├── 96a122e9da19cfc6bc97efeda03ece9c.png
    │   │   ├── 976134334aab52a94ea54d0c4cc767af.png
    │   │   ├── 98312cf7f6090bff1993a77d45e6ccb0.png
    │   │   ├── 99b519d629888f23581e060eedd63f46.png
    │   │   ├── 9c3f64b3061eb3db00d7b7562ca13fee.png
    │   │   ├── 9ecab7c5a5ba7aabcc70f7a43d69a381.png
    │   │   ├── a284ec7c020044950833d2e8a9d9d367.png
    │   │   ├── a3c41b35f66c5d446e684795c3270c8d.png
    │   │   ├── b67da1c942280ddc8081ed9dc2751504.png
    │   │   ├── b78effc9001b7923749a983618b495cc.png
    │   │   ├── bdc31f98a8404cd2e8fe291faa3c38f9.png
    │   │   ├── c067e3fbb58837d28b58111da022f4de.png
    │   │   ├── c08bad74522c73d821815c841a9e1194.png
    │   │   ├── c1af263b21867990ab8ae486e0617f3c.png
    │   │   ├── c47e0f819d26ba99a8b5640c9dd2d769.png
    │   │   ├── c4f930a4361aeb4a372c653153079ae8.png
    │   │   ├── c5b21293bcd8cccae6a7ed922e0240e8.png
    │   │   ├── c6b3a588fcfd667ea0cae9eae3adc841.png
    │   │   ├── c83e92b3bf2cbe6098224880124ec1f4.png
    │   │   ├── cc97acf5ea5e08357a191bcc967acd06.png
    │   │   ├── d1ec37f9eb213c0879dcbd0ec2775fe1.png
    │   │   ├── d612c5fc03f9f90f6d9c1f2f65561f4e.png
    │   │   ├── d75aa946ba69c5e99840a82583970f2e.png
    │   │   ├── da33062ab60813d3c73930b9c96ca721.png
    │   │   ├── eaea194ebe26bf2a8e69d68cd01e1a78.png
    │   │   ├── eed5783c39bf1a3512f1d22f0032a5f3.png
    │   │   ├── f11058bd5fbe80a556f19100c22c7635.png
    │   │   ├── f2111a572741c3c6c6c96e352991e4ce.png
    │   │   └── f6801d8f4f478bc7a7d87c3248cf603c.png
    ├── Lecture4
    │   ├── Lecture 4.md
    │   ├── cs224n-2019-lecture04-backprop.pdf
    │   └── media
    │   │   ├── 1.png
    │   │   ├── 10.png
    │   │   ├── 11.png
    │   │   ├── 12.png
    │   │   ├── 13.png
    │   │   ├── 14.png
    │   │   ├── 15.png
    │   │   ├── 16.png
    │   │   ├── 17.png
    │   │   ├── 18.png
    │   │   ├── 19.png
    │   │   ├── 2.png
    │   │   ├── 20.png
    │   │   ├── 21.png
    │   │   ├── 22.png
    │   │   ├── 23.png
    │   │   ├── 24.png
    │   │   ├── 25.png
    │   │   ├── 26.png
    │   │   ├── 27.png
    │   │   ├── 28.png
    │   │   ├── 29.png
    │   │   ├── 3.png
    │   │   ├── 30.png
    │   │   ├── 31.png
    │   │   ├── 32.png
    │   │   ├── 33.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   ├── 6.png
    │   │   ├── 7.png
    │   │   ├── 8.png
    │   │   └── 9.png
    ├── Lecture5
    │   ├── Lecture 5.md
    │   ├── cs224n-2019-lecture05-dep-parsing.pdf
    │   └── media
    │   │   ├── 1.png
    │   │   ├── 2.png
    │   │   ├── 3.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   └── 6.png
    ├── Lecture6
    │   ├── Lecture 6.md
    │   ├── cs224n-2019-lecture06-rnnlm.pdf
    │   └── media
    │   │   ├── 1.png
    │   │   ├── 10.png
    │   │   ├── 11.png
    │   │   ├── 12.png
    │   │   ├── 13.png
    │   │   ├── 14.png
    │   │   ├── 2.png
    │   │   ├── 3.png
    │   │   ├── 4.png
    │   │   ├── 5.png
    │   │   ├── 6.png
    │   │   ├── 7.png
    │   │   ├── 8.png
    │   │   └── 9.png
    └── lecture 20
    │   ├── Lecture20_part1.md
    │   ├── Lecture20_part2.md
    │   ├── Word_Translation_without_Parallel_Data.pdf
    │   ├── lecture20_part3.md
    │   └── media
    │       ├── 00594abd6ebca941e5e33b1d1ecf7242.png
    │       ├── 0c13b354bd2f25497ff75e7433a8db0b.png
    │       ├── 16d1c888725fe434f06485e4b8a0a47e.png
    │       ├── 1e5d555592b54279f9571143f2a1bc1a.png
    │       ├── 1fc8b424ade235725a60af4f056558b7.png
    │       ├── 22fd011d4ca155b84b651df09e7aa675.png
    │       ├── 24daaa12a59b6f4adf3446cc556c055c.png
    │       ├── 29d3c9fa71562a0d68ed4568cfa3b8fc.png
    │       ├── 3017ff5e27e32edc63d73a563f988a25.png
    │       ├── 36429d6dd9868d07d9d92cc712d26690.png
    │       ├── 4698fa83c1a4f0c5fdde96eccea1648b.png
    │       ├── 49490589f03b3454efff6aceed677d59.png
    │       ├── 49a12531f44200f697663ba7da938af0.png
    │       ├── 52313442e024b1d78d8f7fe85bb1c83c.png
    │       ├── 5b6b327867947bbaacf320e8183363b8.png
    │       ├── 5ccf67442a3c6f56f661544b9718d5be.png
    │       ├── 5e21808db8af3416351121f745477256.png
    │       ├── 707177c98672fbd53c9be53ac13cd78c.png
    │       ├── 70bd9c1c9f1b8aeb64495385072364cf.png
    │       ├── 7429800d68f22424891570eaecd970b3.png
    │       ├── 7d41ff65016bd11b9fbeb32918f13b0b.png
    │       ├── 863d03b91f0a5659e4f8e2a054936e45.png
    │       ├── 87ba75b9ceeb74f88f5d92abbec5c1fb.png
    │       ├── 883a1f598728ca6deb4101b097107b21.png
    │       ├── 8b074b58ce68a16643e0e3ba231ced83.png
    │       ├── 94d73800652ad75fbe429812a903ebab.png
    │       ├── 9a8bb5e3c768fa5db9036117cc7f75bb.png
    │       ├── a45da67f31b5f0894dd05ee257c7a457.png
    │       ├── a963ae4807b619f2b301b9566069f4bc.png
    │       ├── aa7868f8bd9dd5d3f8ebdd08e413d78c.png
    │       ├── b28cc037a458e62e70e28df3da4db868.png
    │       ├── b3d54f5f91d45e47215ce3a4a8219dbd.png
    │       ├── bfa990c95fe70d27430257291a8e5793.png
    │       ├── cc8b2e483b89e4e7f585053d9ae16901.png
    │       ├── d8b5c15ef6c09b4e5e6221c08a77a117.png
    │       ├── d976819e250653f03cf7a4a9439cac96.png
    │       ├── ed979ece44c470f0e90d399c89168997.png
    │       └── f8a9cc51dd30a1550b288b3304e510a4.png
├── styles
    └── ebook.css
└── update.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | .DS_Store
103 | 
104 | # gitbook
105 | _book
106 | 
107 | # node.js
108 | node_modules
109 | 
110 | # windows
111 | Thumbs.db
112 | 
113 | # word
114 | ~$*.docx
115 | ~$*.doc
116 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # 贡献指南
 2 | 
 3 | > 请您勇敢地去翻译和改进翻译。虽然我们追求卓越，但我们并不要求您做到十全十美，因此请不要担心因为翻译上犯错——在大部分情况下，我们的服务器已经记录所有的翻译，因此您不必担心会因为您的失误遭到无法挽回的破坏。（改编自维基百科）
 4 | 
 5 | 课程视频：
 6 | 
 7 | +   [斯坦福 CS224n 深度学习自然语言处理课程 2019](https://www.bilibili.com/video/av46216519)
 8 | +   [字幕（BCC 格式）](https://github.com/apachecn/stanford-cs224n-notes-zh/tree/master/bcc-en)
 9 | 
10 | 负责人：
11 | 
12 | +   [AllenZYJ](https://github.com/AllenZYJ)
13 | 
14 | ## 章节列表
15 | 
16 | +   [Lecture 1](https://www.bilibili.com/video/av46216519/?p=1)
17 | +   [Lecture 2](https://www.bilibili.com/video/av46216519/?p=2)
18 | +   [Lecture 3](https://www.bilibili.com/video/av46216519/?p=3)
19 | +   [Lecture 4](https://www.bilibili.com/video/av46216519/?p=4)
20 | +   [Lecture 5](https://www.bilibili.com/video/av46216519/?p=5)
21 | +   [Lecture 6](https://www.bilibili.com/video/av46216519/?p=6)
22 | +   [Lecture 7](https://www.bilibili.com/video/av46216519/?p=7)
23 | +   [Lecture 8](https://www.bilibili.com/video/av46216519/?p=8)
24 | +   [Lecture 9](https://www.bilibili.com/video/av46216519/?p=9)
25 | +   [Lecture 10](https://www.bilibili.com/video/av46216519/?p=10)
26 | +   [Lecture 11](https://www.bilibili.com/video/av46216519/?p=11)
27 | +   [Lecture 12](https://www.bilibili.com/video/av46216519/?p=12)
28 | +   [Lecture 13](https://www.bilibili.com/video/av46216519/?p=13)
29 | +   [Lecture 14](https://www.bilibili.com/video/av46216519/?p=14)
30 | +   [Lecture 15](https://www.bilibili.com/video/av46216519/?p=15)
31 | +   [Lecture 16](https://www.bilibili.com/video/av46216519/?p=16)
32 | +   [Lecture 17](https://www.bilibili.com/video/av46216519/?p=17)
33 | +   [Lecture 18](https://www.bilibili.com/video/av46216519/?p=18)
34 | +   [Lecture 19](https://www.bilibili.com/video/av46216519/?p=19)
35 | +   [Lecture 20](https://www.bilibili.com/video/av46216519/?p=20)
36 | 
37 | ## 流程
38 | 
39 | ### 一、认领
40 | 
41 | 首先查看[整体进度](https://github.com/apachecn/stanford-cs224n-notes-zh/issues/1)，确认没有人认领了你想认领的章节。
42 |  
43 | 然后回复 ISSUE，注明“章节 + QQ 号”。
44 | 
45 | ### 二、整理笔记
46 | 
47 | +   [下载英文字幕](https://github.com/apachecn/stanford-cs224n-notes-zh/tree/master/bcc-en)
48 | +   翻译（可以利用[谷歌翻译](https://translate.google.cn)，但一定要把它变得可读）
49 | +   排版成段落，并添加视频截图
50 | 
51 | ### 三、提交
52 | 
53 | +   `fork` Github 项目
54 | +   将文档（**Markdown 格式**）放在`docs`中。
55 | +   `push`
56 | +   `pull request`
57 | 
58 | 请见 [Github 入门指南](https://github.com/apachecn/kaggle/blob/master/docs/GitHub)。
59 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License (CC BY-NC-SA 4.0)
  2 | 
  3 | Copyright © 2020 ApacheCN(apachecn@163.com)
  4 | 
  5 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
  6 | 
  7 | Section 1 – Definitions.
  8 | 
  9 | a.  Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
 10 | b.  Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
 11 | c.  BY-NC-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License.
 12 | d.  Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
 13 | e.  Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
 14 | f.  Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
 15 | g.  License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution, NonCommercial, and ShareAlike.
 16 | h.  Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
 17 | i.  Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
 18 | j.  Licensor means the individual(s) or entity(ies) granting rights under this Public License.
 19 | k.  NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange.
 20 | l.  Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
 21 | m.  Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
 22 | n.  You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
 23 | 
 24 | Section 2 – Scope.
 25 | 
 26 | a.  License grant.
 27 |     1.  Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
 28 |         A.  reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and
 29 |         B.  produce, reproduce, and Share Adapted Material for NonCommercial purposes only.
 30 |     2.  Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
 31 |     3.  Term. The term of this Public License is specified in Section 6(a).
 32 |     4.  Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
 33 |     5.  Downstream recipients.
 34 |         A.  Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
 35 |         B.  Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.
 36 |         C.  No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
 37 |     6.  No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
 38 | b.  Other rights.
 39 |     1.  Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
 40 |     2.  Patent and trademark rights are not licensed under this Public License.
 41 |     3.  To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes.
 42 | 
 43 | Section 3 – License Conditions.
 44 | 
 45 | Your exercise of the Licensed Rights is expressly made subject to the following conditions.
 46 | 
 47 | a.  Attribution.
 48 |     1.  If You Share the Licensed Material (including in modified form), You must:
 49 |         A.  retain the following if it is supplied by the Licensor with the Licensed Material:
 50 |             i.  identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
 51 |            ii.  a copyright notice;
 52 |           iii.  a notice that refers to this Public License;
 53 |            iv.  a notice that refers to the disclaimer of warranties;
 54 |             v.  a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
 55 |         B.  indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
 56 |         C.  indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
 57 |     2.  You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
 58 |     3.  If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
 59 | b.  ShareAlike.
 60 |     In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
 61 |     1.  The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-NC-SA Compatible License.
 62 |     2.  You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
 63 |     3.  You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
 64 | 
 65 | Section 4 – Sui Generis Database Rights.
 66 | 
 67 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
 68 | 
 69 | a.  for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only;
 70 | b.  if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
 71 | c.  You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
 72 | 
 73 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
 74 | 
 75 | Section 5 – Disclaimer of Warranties and Limitation of Liability.
 76 | 
 77 | a.  Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.
 78 | b.  To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.
 79 | c.  The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
 80 | 
 81 | Section 6 – Term and Termination.
 82 | 
 83 | a.  This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
 84 | b.  Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
 85 |     1.  automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
 86 |     2.  upon express reinstatement by the Licensor.
 87 |     For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
 88 | c.  For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
 89 | d.  Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
 90 | 
 91 | Section 7 – Other Terms and Conditions.
 92 | 
 93 | a.  The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
 94 | b.  Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
 95 | 
 96 | Section 8 – Interpretation.
 97 | 
 98 | a.  For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
 99 | b.  To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
100 | c.  No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
101 | d.  Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 斯坦福 CS224n 自然语言处理中文笔记
 2 | 
 3 | > 课程：[斯坦福 CS224n 深度学习自然语言处理课程 2019](https://www.bilibili.com/video/av46216519)
 4 | > 
 5 | > 协议：[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/)
 6 | > 
 7 | > 欢迎任何人参与和完善：一个人可以走的很快，但是一群人却可以走的更远。
 8 | 
 9 | +   [斯坦福 CS229 机器学习中文笔记](http://ai-start.com/ml2014/)
10 | +   [DeepLearning.ai 深度学习中文笔记](http://ai-start.com/dl2017/)
11 | +   [斯坦福 CS234 强化学习中文讲义](https://github.com/apachecn/stanford-cs234-notes-zh)
12 | +   [ApacheCN 机器学习交流群 629470233](http://shang.qq.com/wpa/qunwpa?idkey=30e5f1123a79867570f665aa3a483ca404b1c3f77737bc01ec520ed5f078ddef)
13 | +   [ApacheCN 学习资源](http://www.apachecn.org/)
14 | 
15 | ## 贡献指南
16 | 
17 | 笔记整理活动正在进行，请查看[贡献指南](CONTRIBUTING.md)，并在[整体进度](https://github.com/apachecn/stanford-cs224n-notes-zh/issues/1)中领取任务。
18 | 
19 | > 请您勇敢地去翻译和改进翻译。虽然我们追求卓越，但我们并不要求您做到十全十美，因此请不要担心因为翻译上犯错——在大部分情况下，我们的服务器已经记录所有的翻译，因此您不必担心会因为您的失误遭到无法挽回的破坏。（改编自维基百科）
20 | 
21 | ## 联系方式
22 | 
23 | ### 负责人
24 | 
25 | +   [AllenZYJ](https://github.com/AllenZYJ)
26 | 
27 | ### 其他
28 | 
29 | *   认领翻译和项目进度-地址: <https://github.com/apachecn/stanford-cs224n-notes-zh/issues/1>
30 | *   在我们的 [apachecn/stanford-cs224n-notes-zh](https://github.com/apachecn/stanford-cs224n-notes-zh) github 上提 issue.
31 | *   发邮件到 Email: `apachecn@163.com`.
32 | *   在我们的 [组织学习交流群](http://www.apachecn.org/organization/348.html) 中联系群主/管理员即可.
33 | 
34 | ## 赞助我们
35 | 
36 | ![](http://data.apachecn.org/img/about/donate.jpg)


--------------------------------------------------------------------------------
/bcc-en/12.bcc:
--------------------------------------------------------------------------------
1 | {"font_size":0.4,"font_color":"#FFFFFF","background_alpha":0.5,"background_color":"#9C27B0","Stroke":"none","body":[]}


--------------------------------------------------------------------------------
/docs/Lecture 11/Lecture11.md:
--------------------------------------------------------------------------------
  1 | # Lecture 11: NLP中的卷积神经网络
  2 | ## 从循环神经网络到卷积神经网络
  3 | 循环神经网络不能捕获没有前文的短语
  4 | 
  5 | 通常在最后的向量中捕获太多最后的单词
  6 | - E.g., softmax通常在最后一步使用
  7 | 
  8 | 卷积层的**主要思想**：
  9 | 如果我们对于每个特定长度的所有可能的单词子序列都计算向量呢？
 10 | - 举个例子: 对"tentative deal reached to keep government open" 这句话会计算的短语向量如下:
 11 | - - tentative deal reached, deal reached to, reached to keep, to keep government, keep government open
 12 | 
 13 | 不在乎短语是否合乎语法
 14 | 
 15 | 在语言学或认知学上都不太可信
 16 | 
 17 | > 有关卷积在其他领域的含义以及它在图像领域的研究在此不展开，有兴趣的自行搜索
 18 | 
 19 | ## 卷积神经网络在NLP中的应用
 20 | 下面将以上面提到的英文句子为例，直观地看一些基本的、常见的用于文本的卷积操作
 21 | #### a) 用于文本处理的一维卷积
 22 | ![pic4](https://github.com/Originval/Learning/blob/master/pics/pic4.png?raw=true)
 23 | 
 24 | #### b) 带有填充(padding)的用于文本处理的一维卷积
 25 | ![pic5](https://github.com/Originval/Learning/blob/master/pics/pic5.png?raw=true)
 26 | 
 27 | #### c) 通道数为3，padding为1的一维卷积
 28 | ![pic6](https://github.com/Originval/Learning/blob/master/pics/pic6.png?raw=true)
 29 | 
 30 | #### d) 一维卷积，带填充和基于时间的最大池化
 31 | ![pic7](https://github.com/Originval/Learning/blob/master/pics/pic7.png?raw=true)
 32 | 
 33 | #### e) 一维卷积，带填充和基于时间的平均池化
 34 | ![pic8](https://github.com/Originval/Learning/blob/master/pics/pic8.png?raw=true)
 35 | 
 36 | ### 使用PyTorch的相关参数
 37 | 
 38 | ```
 39 | batch_size = 16
 40 | word_embed_size = 4
 41 | seq_len = 7
 42 | input = torch.randn(batch_size, word_embed_size, seq_len)
 43 | conv1 = Conv1d(in_channels=word_embed_size, out_channels=3, kernel_size=3)   # can add: padding=1
 44 | hidden1 = conv1(input)
 45 | hidden2 = torch.max(hidden1, dim=2)     # max pool
 46 | ```
 47 | 
 48 | ## 用于句子分类的单层卷积神经网络
 49 | 参考论文：
 50 | 1. [Convolutional Neural Networks for Sentence Classification](https://arxiv.org/pdf/1408.5882.pdf) Yoon Kim (2014)  EMNLP. 
 51 | 2. A variant of convolutional NNs of Collobert, Weston et al. (2011)
 52 | 
 53 | 目标：句子分类
 54 | - 主要对句子是积极的还是消极的进行情感分类
 55 | - 其他的任务有：
 56 | - - 主语和客观语句的分类
 57 | - - 对有关任务，地点，数字等问题的分类
 58 | 
 59 | > 其他的一些符号或说明可查看相关的论文
 60 | 
 61 | ### 单层卷积神经网络
 62 | 过滤器w会被用到所有可能的窗口上 (连接的向量)
 63 | 
 64 | 卷积层中计算特征（单通道）的公式：![image](https://github.com/Originval/Learning/blob/master/pics/1.png?raw=true)
 65 | 
 66 | 句子的表示形式：![image](https://github.com/Originval/Learning/blob/master/pics/2.png?raw=true)
 67 | 
 68 | 所有可能的窗口长度为h：![image](https://github.com/Originval/Learning/blob/master/pics/3.png?raw=true)
 69 | 
 70 | 返回结果是一个特征映射：![image](https://github.com/Originval/Learning/blob/master/pics/4.png?raw=true)
 71 | 
 72 | ### 池化和通道
 73 | 池化采用的是基于时间的最大池化
 74 | 
 75 | 思路：捕获最重要的激活值
 76 | 
 77 | 通过特征映射 ![image](https://github.com/Originval/Learning/blob/master/pics/4.png?raw=true)
 78 | 
 79 | 经池化得到的单个数字：![image](https://github.com/Originval/Learning/blob/master/pics/6.png?raw=true)
 80 | 
 81 | 使用的多个过滤器的权重为w，采用不同的窗口尺寸h会很有用，因为最大池化![image](https://github.com/Originval/Learning/blob/master/pics/6.png?raw=true)，c的无关的长度![image](https://github.com/Originval/Learning/blob/master/pics/4.png?raw=true)，所以我们可以用一些过滤器来观察1元语法，2元语法，3元语法，等等。
 82 | 
 83 | ### 多通道输入的思路
 84 | - 用预训练词向量初始化（word2vec或Glove）
 85 | - 从两份副本开始
 86 | - 只通过其中一个集合反向传播，其他保持静态
 87 | - 在最大池化前，把两个通道集都加到ci
 88 | 
 89 | ### 在一层卷积层之后分类
 90 | ![(p18)](https://github.com/Originval/Learning/blob/master/pics/pic18.png?raw=true)
 91 | - 首先是一个卷积层，然后是最大池化层
 92 | - 获取最终的特征向量（使用100个特征映射，每个尺寸是3，4，5）
 93 | - 最后是简单的softmax层
 94 | 
 95 | 参考论文：[Zhang and Wallace(2015) A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural Networks for Sentence Classification](https://arxiv.org/pdf/1510.03820.pdf)
 96 | 
 97 | ### 正则化
 98 | 使用Dropout：为概率p（超参数）为1的伯努利随机变量创建mask向量r；在训练过程中删除一些特征：![image](https://github.com/Originval/Learning/blob/master/pics/7.png?raw=true)。这样做是为了防止共适应（对于特定特征群的过度拟合）。但是在进行测试时，不需要dropout，按照概率p缩放最终向量![image](https://github.com/Originval/Learning/blob/master/pics/8.png?raw=true)
 99 | 
100 | 也可以对每个类的权重向量（softmax权重中的每行）使用l2正则化约束为固定值s（也是超参数）。如果![image](https://github.com/Originval/Learning/blob/master/pics/9.png?raw=true)，就重新缩放![image](https://github.com/Originval/Learning/blob/master/pics/10.png?raw=true)
101 | 
102 | ### 模型比较
103 | - Bag of Vectors: 对简单的分类问题而言是很好的基准模型。尤其在其后加上ReLU层
104 | - Window Model：对不需要广泛上下文的单个词语的分类问题很不错。例如：词性标注，命名实体识别
105 | - CNNs： 分类的效果很好，对一些较短的短语需要0填充，较难解释，但比较容易用GPU并行训练。高效且功能丰富强大
106 | - Recurrent Neural Networks：在认知上似是而非，不适合分类（如果只使用最后的状态），比卷积神经网络慢很多，适合序列标记和分类，对语言模型很好，与注意力机制结合会很出色
107 | 
108 | ### 门控单元的垂直使用
109 | ![(p45)](https://github.com/Originval/Learning/blob/master/pics/45.png?raw=true)
110 | - 我们在LSTMs和GRUs里看到的门控/跳过是一个总体概念，现在在很多地方使用
111 | - 关键思想——用捷径连接对候选更新进行求和，是让很深的网络工作所必须的
112 | 
113 | ### 批归一化(BatchNorm)
114 | - 通过使用卷积神经网络
115 | - 通过缩放激活值使其均值和单位方差为0来转换批次的卷积输出
116 | - - 这是统计学中常见的z变换
117 | - - 但是每批次更新一次，所以波动不会影响太多
118 | - 使用批归一化使模型对参数初始化不那么敏感，因为输出使自动重新标度的
119 | - - 这也使对学习率的微调更简单
120 | - 在PyTorch中使用nn.BatchNorm1d
121 | 
122 | ### 1x1卷积
123 | 1x1卷积，即网络中的网络(Network-in-Network)连接，具有kernel_size=1的卷积内核,它给你跨越通道的全连接线性层，也可用于从多个通道映射到更少的通道。
124 | 
125 | 此外，1x1卷积只增加了很少的额外参数，就增加了额外的神经网络层，不像全连接(FC)那样增加了很多参数。
126 | 
127 | ### CNN的应用：翻译
128 | ![(p44)](https://github.com/Originval/Learning/blob/master/pics/44.png?raw=true)
129 | 
130 | - 最早成功的神经机器翻译之一
131 | - 使用CNN进行编码，RNN进行解码
132 | 
133 | #### 论文 Learning Character-level Representations for Part-of-Speech Tagging
134 | - 对字符的卷积生成词嵌入
135 | - 使用PoS标记的固定词嵌入窗口长度
136 | 
137 | #### 论文 Character-Aware Neural Language Models
138 | - 基于字符的词嵌入
139 | - 利用卷积，Highway网络和LSTM
140 | 
141 | ## Quasi-Recurrent 神经网络
142 | ![(p42)](https://github.com/Originval/Learning/blob/master/pics/42.png?raw=true)
143 | - 尝试将LSTM和CNN这两个模型的优点结合在一起 
144 | - 跨越时间的并行卷积
145 | - 跨通道并行的元素门控伪递归是在池化层中完成的
146 | 
147 | ### Q-RNN实验：语言模型
148 | 介绍了论文(ICLR 2017) Quasi Recurrent Neural Networks
149 | ![(p43)](https://github.com/Originval/Learning/blob/master/pics/43.png?raw=true)
150 | 
151 | 
152 | ### Q-RNNs在情感分析中的应用
153 | 通常比LSTM更好，更快
154 | 
155 | 可解释性较好
156 | 
157 | ### QRNN的局限性
158 | 1. 在字符级的语言模型中不能表现得像LSTM一样好
159 | - 在更长的依赖中有困难
160 | 
161 | 2. 通常需要更深的网络才能取得和LSTM一样的性能
162 | - 当更深的时候它们仍然很快
163 | - 它们用深度代替真正的循环很有效
164 | 
165 | ### TransformersRNNs的缺点&Transformer的积极性
166 | ![(p41)](https://github.com/Originval/Learning/blob/master/pics/41.png?raw=true)
167 | - 我们想要并行化，但是循环神经网络本质是顺序的
168 | - 尽管有GRUs和LSTMs，循环神经网络仍然从处理长距离依赖的注意力机制中获益——状态之间的路径长度随顺序增长
169 | 
170 | 


--------------------------------------------------------------------------------
/docs/Lecture 11/cs224n-2019-notes08-CNN.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture 11/cs224n-2019-notes08-CNN.pdf


--------------------------------------------------------------------------------
/docs/Lecture1/Lecture1.md:
--------------------------------------------------------------------------------
  1 | **引言：**
  2 | 
  3 | **CS224N-2019的开放，ApacheCN团队对此课程进行了笔记的整理，希望可以帮助到更多的小伙伴。（本次对于CS224N笔记的整理只是希望可以帮助到更多的人，希望我们的开源精神可以得到更多人的支持与加入；如果在笔记中存在问题，欢迎随时提出问题供我们改正，我们希望可以有更多的人参与进来一起完善笔记，在NLP邻域贡献出自己的力量。）**
  4 | 
  5 | **一，前言：**
  6 | 
  7 | **本章节主要对讲解了自然语言处理（NLP）发展与深度学习的简介。**
  8 | 
  9 | **二，正文：**
 10 | 
 11 | **WordNet（分类词典）的缺点:**
 12 | 
 13 | 不能准确识别同义词之间的细微差别。
 14 | 
 15 | 对词语的理解不够精确（单词的词义不能及时更新，WordNet资源也不完整）。
 16 | 
 17 | 主观性太强（只有固定的离散同义词集）。
 18 | 
 19 | 耗费大量的人力资源进行标注，来构建 。
 20 | 
 21 | 难以准确计算字词之间的相似性。
 22 | 
 23 | One-hot vectors:
 24 | 
 25 | ![](media/d3737e074d0d735dae202cbddb31623f.png)
 26 | 
 27 | 列如：
 28 | 
 29 | [猫，狗，鸡，鸭，汽车旅馆，鸟，骆驼，酒店]，对这个数组进行编码之后；
 30 | 
 31 | Motel（汽车旅馆）对应的编码应是：
 32 | 
 33 | Motel=[0,0,0,0,1,0,0,0];
 34 | 
 35 | 同理，Hotel(酒店)对应的编码应是：
 36 | 
 37 | Hotel=[0,0,0,0,0,0,0,1];
 38 | 
 39 | 在上面汽车旅馆和酒店的意思是相似，但是对Motel=[0,0,0,0,1,0,0,0]与Hotel=[0,0,0,0,0,0,0,1]内积为0，也就是完全不相关；所以这种方式并不能表示出这种关系。
 40 | 
 41 | ![](media/26cef4d801bc54c1939a7e0cc8f734aa.png)
 42 | 
 43 | 由于ONE-Hot满足不了相关性，（J.R.Firth）提出了Distributional
 44 | semantics(分布式语义):单词的意思，应该结合它的上下文来确定的;现代统计NLP成功的理念之一。
 45 | 
 46 | e:
 47 | 
 48 | 用‘banking’许多的上下文来构建它的意思：
 49 | 
 50 | ![](media/d2b95dae5dedcbc90fad260cbc1da976.png)
 51 | 
 52 | **词向量：**
 53 | 
 54 | 将选择的每个单词构建了密集的向量，为了更好的预测上下文的意思：
 55 | 
 56 | ![](media/40677ebb4b091966d2f8e550b4ca6d30.png)
 57 | 
 58 | ‘banking’对应的维度是8；每一个单词都有一个向量维度，所有单词在一起将会有一个向量空间。
 59 | 
 60 | ![](media/90138e84ea3b961732b0d5e70e032f4a.png)
 61 | 
 62 | 向量中的每个单词具有不同的基础；
 63 | 
 64 | Word2vec:是一个学习单词向量的框架。
 65 | 
 66 | ![](media/756020be8c51e4f89a721f20dca825bd.png)
 67 | 
 68 | 想法;
 69 | 
 70 | 我们有大量的文本。
 71 | 
 72 | 固定词汇表中的每一个单词都由一个向量表示
 73 | 
 74 | 遍历文本中每一个位置,其中有一个中心词C与外部词O。
 75 | 
 76 | 用单词的向量的相似性来计算O与C的相同（或想反）的概率。
 77 | 
 78 | 不断的调整单词向量，使概率值最大化。
 79 | 
 80 | 用迭代来实现这个算法，要确定一个单词的意思，要结合上下文的意思来确定，我们要遍历文本中的每个位置，通过移动单词向量来实现；重复10亿次之后，得到了好的结果；
 81 | 
 82 | 计算过程：
 83 | 
 84 | ![](media/4275e279e6d646246334a84434ebf9dd.png)
 85 | 
 86 | 目标函数（损失函数或成本函数）：
 87 | 
 88 | ![](media/939133082e8b9ec6e39ac058a84e8cdd.png)
 89 | 
 90 | 对于给定的数据集T，固定了窗口的大小（每个面几个字），给定中心单词Wj;
 91 | 
 92 | 模型的概率
 93 | 
 94 | ![](media/1ed8e2c6f59f03f227a0a96783bee07c.png)
 95 | 
 96 | 最小化目标函数==最大化概率值；
 97 | 
 98 | ![](media/86f7fdf808b1a7958f0fa3c15c413ab1.png)
 99 | 
100 | 这个模型中，唯一的一个参数就是我们给定的一个变量；
101 | 
102 | ![](media/f6ee3acc5c552202abf5a7bd033cbbe2.png)
103 | 
104 | 最小化损失函数；
105 | 
106 | 将每个单词使用俩个向量：当W是中心词是，当W是上下文词时；
107 | 
108 | 然后是中心词C与上下文词O；
109 | 
110 | e:
111 | 
112 | 定义一个某个单词的上下文模型：
113 | 
114 | >   P（context\|Wt）=.....
115 | 
116 | 定义损失函数：
117 | 
118 | >   J=1-P（Wt+j\|Wt）(Wt+j与Wt为上下文)
119 | 
120 | 如果预测结果正确，目标函数为0；
121 | 
122 | 在词料库中的不同位置来训练，调整词向量，最小化目标函数：
123 | 
124 | 比如：我喜欢打篮球，也喜欢打羽毛球。
125 | 
126 | 当出现“打篮球”时，预测出现“打羽毛球”的可能性；概率模型为：
127 | 
128 | >   P（打羽毛球\|打篮球）（也就是在“打篮球”的条件下“打羽毛球”的概率）
129 | 
130 | 目标函数：
131 | 
132 | >   J=1-p(W\|打篮球)
133 | 
134 | 如果w=“打羽毛球”，概率为1，目标函数为0。
135 | 
136 | ![](media/2f999b6467c4c018c0306069f60b3625.png)
137 | 
138 | 分子是：通过点积来计算O与C的相似性；分母是对整个词汇表进行标准化后给出的概率分布；
139 | 
140 | 这是一个SOFEMAX函数R\^N——R\^n的例子：
141 | 
142 | ![](media/a1d0273bc4309db805739d88a64fbc92.png)
143 | 
144 | 在SOFTMAX函数中：
145 | 
146 | “max”是将一些大的概率表示为最大的概率Xi;
147 | 
148 | “soft”是将一些小的概率表示为更小的概率Xj;
149 | 
150 | 经常用于深度学习；
151 | 
152 | **通过优化训练参数训练模型：**
153 | 
154 | 为了更好的训练模型，调整参数使目标函数最小化；
155 | 
156 | 也就是用梯度下降来找到最优点；
157 | 
158 | ![](media/8b5e9d716ea43a7b82695a73ad497674.png)
159 | 
160 | **计算所有向量维度：**
161 | 
162 | 在D维向量与V维向量中，有许多单词：
163 | 
164 | ![](media/0f678a32a897d8a7cbdd958b5e73047a.png)
165 | 
166 | 每个单词都有俩个向量；沿着这个梯度进行优化；
167 | 


--------------------------------------------------------------------------------
/docs/Lecture1/cs224n-2019-lecture01-wordvecs1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/cs224n-2019-lecture01-wordvecs1.pdf


--------------------------------------------------------------------------------
/docs/Lecture1/media/0f678a32a897d8a7cbdd958b5e73047a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/0f678a32a897d8a7cbdd958b5e73047a.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/1ed8e2c6f59f03f227a0a96783bee07c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/1ed8e2c6f59f03f227a0a96783bee07c.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/26cef4d801bc54c1939a7e0cc8f734aa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/26cef4d801bc54c1939a7e0cc8f734aa.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/2f999b6467c4c018c0306069f60b3625.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/2f999b6467c4c018c0306069f60b3625.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/40677ebb4b091966d2f8e550b4ca6d30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/40677ebb4b091966d2f8e550b4ca6d30.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/4275e279e6d646246334a84434ebf9dd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/4275e279e6d646246334a84434ebf9dd.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/756020be8c51e4f89a721f20dca825bd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/756020be8c51e4f89a721f20dca825bd.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/86f7fdf808b1a7958f0fa3c15c413ab1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/86f7fdf808b1a7958f0fa3c15c413ab1.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/8b5e9d716ea43a7b82695a73ad497674.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/8b5e9d716ea43a7b82695a73ad497674.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/90138e84ea3b961732b0d5e70e032f4a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/90138e84ea3b961732b0d5e70e032f4a.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/939133082e8b9ec6e39ac058a84e8cdd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/939133082e8b9ec6e39ac058a84e8cdd.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/a1d0273bc4309db805739d88a64fbc92.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/a1d0273bc4309db805739d88a64fbc92.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/d2b95dae5dedcbc90fad260cbc1da976.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/d2b95dae5dedcbc90fad260cbc1da976.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/d3737e074d0d735dae202cbddb31623f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/d3737e074d0d735dae202cbddb31623f.png


--------------------------------------------------------------------------------
/docs/Lecture1/media/f6ee3acc5c552202abf5a7bd033cbbe2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture1/media/f6ee3acc5c552202abf5a7bd033cbbe2.png


--------------------------------------------------------------------------------
/docs/Lecture17/Lecture17.md:
--------------------------------------------------------------------------------
  1 | # 一 引言
  2 | 
  3 | ![](media\1.png)
  4 | 
  5 |  近几年在数据集，任务，模型，度量等方面上有很大提高。只要数据集够大，模型就可以达到局部最优，我们从单个模型中学习，模型通常由随机或部分预训练开始。一般如果你拥有的输出类数量的1,000倍，效果可能达到准确率高达80％到90％，如ImageNet中，有1000个不同的计算机视觉课程，
  6 | 1,000个不同的类，每个类有1,000个图像。因此，如果有大约一百万张图像，那么效果显著。但在机器翻译中，理想情况下，有几十万字，每个单词的数百万个例子，却是不适用的。
  7 | 
  8 | 单个nlp模型集中于单个模型的度量和任务完成，不能形成典范。为了统一的模型，我们引入了NLP通用模型自然语言十项全能(decaNLP)
  9 | 
 10 | # 二 自然语言通用模型decaNLP
 11 | 
 12 | ###### 任务范围
 13 | 
 14 | ![](media/5.png)
 15 | 
 16 | decaNLP十个任务：问答 （question answering） 、机器翻译（machine translation）、语义角色标注（semantic role labeling）、关系抽取（zero-shot relation extraction ）、摘要（summarization）、任务驱动多轮对话（goal-oriented dialogue）、自然语言推理（natural language inference）、情感分析(sentiment analysis)、语义解析（semantic parsing）和代词消解（Pronoun Resolution）。
 17 | 
 18 | ###### 任务概述
 19 | 
 20 | ![](media/4.png)**问答** 
 21 | 
 22 | 问答(QA)模型接收一个问题和一个包含输出所需答案所需信息的上下文。我们使用斯坦福问答数据集(SQuAD)来完成这项任务。上下文是取自英文维基百科的段落，而答案是来自上下文的单词序列。SQuAD使用一个normalizedF1 (nF1)指标，去掉了文章和标点符号。
 23 | 
 24 | **机器翻译**
 25 | 
 26 | 机器翻译模型接收源语言的输入文本，该文本必须被翻译成目标语言。我们使用为国际口语翻译研讨会(IWSLT)准备的2016年英语到德语培训数据。例子来自转录的演讲，涵盖广泛的话题与会话语言。我们分别以2013年和2014年测试集的语料库级BLEU评分作为验证和测试集进行评估.
 27 | 
 28 | **摘要**
 29 | 
 30 | 摘要模型将纳入文档并输出该文档的摘要。对于摘要的最新进展，最重要的是将CNN / DailyMail（CNN / DM）语料库转换为摘要数据集。
 31 | 
 32 | **自然语言推理**
 33 | 
 34 | 自然语言推理(NLI)模型接受两种输入句子:前提和假设。然后，模型必须将两者之间的推论关系归类为隐含关系、中立关系或矛盾关系。我们使用多体裁自然语言推理语料库(MNLI)，该语料库提供了来自多个领域(转录的演讲、通俗小说、政府报告)的训练示例，以及来各个领域文献的测试对。MNLI使用精确匹配(EM)分数。
 35 | 
 36 | **情感分析**
 37 | 
 38 | 训练了情绪分析模型，对输入文本所表达的情绪进行分类。斯坦福情绪树库(Stanford Sentiment Treebank, SST)由具有相应情绪(积极、中立、消极)的影评moviereviews组成。我们使用未解析的二进制版本。SST也使用EM评分.
 39 | 
 40 | **语义角色标注**
 41 | 
 42 | 语义角色标记(SRL)模型被赋予一个句子和谓语(通常是一个动词)，并且必须决定谁对谁做了什么，什么时候做，以及在哪里做。我们使用一个SRL数据集，将任务视为问题回答，QA-SRL 。该数据集涵盖了news和Wikipedia域，但我们只使用后者，以确保所有用于decaNLP的数据都可以免费下载。我们用nF1metric来评估QA-SRL。
 43 | 
 44 | **关系抽取**
 45 | 
 46 | 关系提取系统接收一段非结构化的文本和从该文本中提取的关系。在这种情况下，重要的是模型可以识别关系是否存在，即语义关系,并且进一步判断。与SRL一样，我们使用将关系映射到一组问题的数据集，以便关系提取可以被视为问题回答:QA-ZRE 。数据集的评估是为了测量在新的关系类型上的零样本性能而设计的。
 47 | 
 48 | **任务驱动多轮对话**
 49 | 
 50 | 对话状态跟踪是任务驱动多轮对话系统的关键组成部分。基于用户的话语、已经采取的行动和对话历史，对话状态跟踪器跟踪用户对对话系统预定义目标，以及用户在系统和用户交互时发出的请求类型。我们使用的是英文版的WOZ餐厅预订服务，它提供了事先设定的关于食物、日期、时间、地址和其他信息的本体，可以帮助代理商为客户进行预订。
 51 | 
 52 | **语义解析**
 53 | 
 54 | SQL查询生成与语义解析相关。基于WikiSQL数据集的模型将自然语言问题转换为结构化SQL查询，以便用户可以使用自然语言与数据库交互。
 55 | 
 56 | **代词消解**
 57 | 
 58 | 我们的最终任务是基于Winograd模式，该模式需要代名词解析：“琼（Joan）一定要感谢苏珊（Sugi）给予的帮助。谁给了苏珊（Susan）或琼（Joan）？”我们从Winograd SchemaChallenge 的示例开始，并对其进行了修改，以确保答案是上下文中的单个单词。
 59 | 
 60 | 这种经过改进的Winograd Schema Challenge（MWSC）可确保分数既不会因短语中的措辞或上下文，问题和答案之间的不一致而增加或减少，我们会通过EM分数进行评估
 61 | 
 62 | **十项全能得分（decaScore）**
 63 | 
 64 | 在decaNLP上竞争的模型使用每个特定任务度量的附加组合进行评估。所有的度量标准都在0和100之间，因此decaS-core对于10个任务来说自然在0和1000之间。使用加法组合避免了因权衡不同指标而产生的问题。所有的度量都是大小写不敏感的。
 65 | 
 66 | ###### 多任务问答网络（MQAN）
 67 | 
 68 | ![](media/7.png)
 69 | 
 70 | 由于每个任务都被构造成问答的形式，并且是联合训练的，因此我们将我们的模型称为多任务问答网络(MQAN)。一个多任务问题回答网络，它没有任何针对特定任务的参数和模块。
 71 | 
 72 | MQAN用BiLSTM（双向LSTM）编码，使用额外的共同关注层对两个序列的条件进行表示，用另两个BiLSTM压缩所有这些信息，使其能够更高层进行计算，我们使用self-attention关注层，这样解码器就能感知到之前的输出(或者在没有之前输出的情况下是一个特殊的初始化token)和对上下文的关注，从而为下一个输出做好准备。,用自我关注的方式来收集这种长距离依赖关系，然后使用两个BiLSTM对问题和背景环境的进行最终的表示。多指针生成器解码器着重于问题、上下文以及先前输出象征来决定是否从问题中复制，还是从上下文复制，或者从有限的词汇表中生成。
 73 | 
 74 | 在训练期间，MQAN接受三个序列作为输入:一个contextcwithltoken、一个questionqwithmtoken和一个answerawithntoken。每一个都由一个矩阵表示，其中矩阵的表示对应于序列中第i个标记的全维嵌入(例如字或字符向量)。
 75 | 
 76 | ###### 评分与分析
 77 | 
 78 | ![](media/9.png)
 79 | 
 80 | 在我们的框架中，训练示例是(问题、上下文、答案)三元组。我们的第一个基线是apointer-generator sequence-to-sequence (S2S)模型。S2S模型只接受单个输入序列，因此我们将该模型的上下文和问题连接起来。在表中，验证指标显示S2S模型在小队中表现不佳。在WikiSQL上，它获得了比之前的序列-序列基线更高的分数，但与MQAN (+QPtr)和其他基线相比，它是低的。
 81 | 
 82 | Vaswani等人用自注意（带有SAtt）编码器和解码器层增强S2S模型，提高了模型集成来自上下文和问题的信息的能力。这样可以提高SQuAD,QA-SRL和WikiSQL的性能。
 83 | 
 84 | 对于WikiSQL，此模型几乎无需使用结构化方法即可匹配72.4％的现有最新验证结果。
 85 | 
 86 | 接下来，我们将探索如何将上下文和问题分割成两个输入序列，并使用一个coattention机制(+CAtt)来扩展s2s模型。小队和QA-SRL的表现增加了超过5nf1。但这并没有改善其他的任务，而且它显著地影响了MNLI和MWSC的性能。
 87 | 
 88 | 为了解决这个问题，我们在前面的基线上添加了一个问题指针(+QPtr)，它还提高大部分性能，利用直接跨度监督。这使得它成为在不显式地将问题建模为跨度提取的最高性能的问题回答模型。
 89 | 
 90 | 在多任务设置中，我们看到了类似的结果，但是我们也注意到了一些其他的显著特征。与最高的单任务模型相比，QA-ZRE的性能提高了11点，这支持了多任务学习可以更好地得到零样本情况下的假设。
 91 | 
 92 | 在将问题指针添加到模型之前，需要大量使用外部词汇表的任务的性能会比S2S基线下降50%以上。除了coattendedcontext之外，这个问题指针还有一个共同参与的问题，它允许问题中的信息直接输入解码器。我们假设对问题的更直接的访问使模型更容易决定何时生成输出标记比复制更合适。
 93 | 
 94 | ###### 预训练MQAN优于随机初始化
 95 | 
 96 | ![](media/10.png)
 97 | 
 98 | SQuAD，QA-SRL和WikiSQL，该模型主要从上下文复制。直观的认为，这是因为从上下文中包含了正确回答这些数据集所必需的所有令牌token。
 99 | 
100 | 对于SST、MNLI和MWSC，模型更喜欢问题指针，因为问题包含可接受类的标记。因为模型是这样学习使用问题指针的。对于IWSLT和WOZ，模型更喜欢从词汇表中生成，因为德语单词和对话状态字段很少出现在上下文中。该模型还避免了对QA-ZRE的复制;这些例子中有一半需要从外部词汇中产生无法回答的答案。
101 | 
102 | 因为MNLI包含在decaNLP中，所以可以采用相关的Stanford Natural Language Inference Corpus (SNLI) 。对在decaNLP上预先训练的MQAN进行微调，可以获得87%的精确匹配分数，比随机初始化训练多出2个百分点，比最先进状态多出2个百分点]。更值得注意的是，即使没有对SNLI进行任何微调，一个经过预先训练的MQAN对decanlp仍然可以获得62%的精确匹配分数。
103 | 
104 | 因为decaNLP包含SST，所以它也可以很好地执行其他二元情绪分类任务。在亚马逊和Yelp的评论中，在没有任何微调的情况下，在decaNLP上预训练的MQAN分别获得了82.1%和80.8%的精确匹配分数。
105 | 
106 | 这些结果表明，在decaNLP上训练的模型有可能同时针对多个任务同时推广到域外上下文和问题，甚至适应于不可见的类进行文本分类。输入和输出空间中的这种零样本自适应表明，decaNLP中任务的广度泛化超出了通过训练单个任务可以实现的范围。
107 | 
108 | 
109 | 
110 | 如果感兴趣的，可以看原论文：https://arxiv.org/abs/1806.08730.pdf
111 | 
112 | 涉及论文对应github:https://github.com/salesforce/decaNLP


--------------------------------------------------------------------------------
/docs/Lecture17/cs224n-2019-lecture17-multitask.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/cs224n-2019-lecture17-multitask.pdf


--------------------------------------------------------------------------------
/docs/Lecture17/media/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/1.png


--------------------------------------------------------------------------------
/docs/Lecture17/media/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/10.png


--------------------------------------------------------------------------------
/docs/Lecture17/media/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/4.png


--------------------------------------------------------------------------------
/docs/Lecture17/media/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/5.png


--------------------------------------------------------------------------------
/docs/Lecture17/media/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/7.png


--------------------------------------------------------------------------------
/docs/Lecture17/media/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/8.png


--------------------------------------------------------------------------------
/docs/Lecture17/media/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture17/media/9.png


--------------------------------------------------------------------------------
/docs/Lecture3/Lecture3.md:
--------------------------------------------------------------------------------
  1 | **CS224N(第三节):**
  2 | 
  3 | >   **一，前言：**
  4 | 
  5 | **本章节主要讲解了神经网络的基础知识，并解决NLP任务中的经典问题。**
  6 | 
  7 | 二，**正文：**
  8 | 
  9 | **Word Window Classification, Neural Nets and Calculus**
 10 | 
 11 | **Classification setup and notation：**
 12 | 
 13 | ![](media/9c3f64b3061eb3db00d7b7562ca13fee.png)
 14 | 
 15 | 通常我们有一个由样本组成的训练数据集：
 16 | 
 17 | ![](media/105645bb8bfaa51181b72ae1b079eb50.png)
 18 | 
 19 | xi 是输入，例如单词（索引或是向量），句子，文档等等，维度为 d
 20 | 
 21 | yi是我们将要尝试预测的标签（C个类别中的一个），例如：
 22 | 
 23 | 类别：情绪，命名实体，买入/卖出的决策
 24 | 
 25 | 其他单词
 26 | 
 27 | 之后：多词序列的
 28 | 
 29 | **Classification intuition：**
 30 | 
 31 | ![](media/6343eafbf305f43451e50461459a36fb.png)
 32 | 
 33 | 训练数据：
 34 | 
 35 | ![](media/84ed44ccba0edbe63db9a571b4226915.png)
 36 | 
 37 | 简单的说明案例：
 38 | 
 39 | 固定的二维单词向量分类
 40 | 
 41 | 使用softmax/logistic回归
 42 | 
 43 | 线性决策边界
 44 | 
 45 | （单词向量是二维的，使用逻辑回归来进行分类，最后找到线性的决策边界）
 46 | 
 47 | 传统的ML/统计学方法：假设xi是固定的，训练somtmax/logistic回归的权重W来确定边界。
 48 | 
 49 | 方法：对于每个X，预测：
 50 | 
 51 | ![](media/4d31a5484902aead763880a4abe72a1f.png)
 52 | 
 53 | Softmax分类器的详细信息：
 54 | 
 55 | ![](media/eaea194ebe26bf2a8e69d68cd01e1a78.png)
 56 | 
 57 | 将预测函数分为两个步骤：
 58 | 
 59 | 1.将W的y行与x中对应行相乘（权重矩阵的某一行乘输入向量的点积）：
 60 | 
 61 | ![](media/c5b21293bcd8cccae6a7ed922e0240e8.png)
 62 | 
 63 | 计算所有的fc, c = 1,………,c
 64 | 
 65 | 2.使用softmax函数得到归一化概率：
 66 | 
 67 | ![](media/035f4ab4e1518d2cae60fd4323acab2b.png)
 68 | 
 69 | Training with soft and cross-entropy loss（使用softmax与交叉熵损失进行训练）
 70 | 
 71 | 对于每个训练样本（x，y），我们的目标是最大化正确类y的概率，或是最小化这个类的对数概率：
 72 | 
 73 | ![](media/160ae1c3f9d59caacfced0221cdee4d9.png)
 74 | 
 75 | Corss entropy loss/error(交叉熵的损失/误差)
 76 | 
 77 | 交叉熵”的概念来源于信息论，衡量两个分布之间的差异
 78 | 
 79 | 令真实概率分布为 p
 80 | 
 81 | 令我们计算的模型概率为 q
 82 | 
 83 | 交叉熵为：
 84 | 
 85 | ![](media/67d49eeec459f53e6c9d5fb11bcf1010.png)
 86 | 
 87 | 假设 groud truth (or true or gold or
 88 | target)的概率分布在正确的类上为1，在其他任何地方为0：p=[0,…,0,1,0,…0]
 89 | 
 90 | 因为 p 是one-hot向量，所以唯一剩下的项是真实类的负对数概率。
 91 | 
 92 | **Classification over a full dataset：**
 93 | 
 94 | 在整个数据集上的交叉熵损失函数，是所有样本的交叉熵的均值：
 95 | 
 96 | ![](media/a284ec7c020044950833d2e8a9d9d367.png)
 97 | 
 98 | 我们不使用
 99 | 
100 | ![](media/38ac5c9ea7ad5634312929dbd417434a.png)
101 | 
102 | 我们使用矩阵来表示f
103 | 
104 | ![](media/30aabfb3b1eea5c2c04e77eb1b7b4758.png)
105 | 
106 | **Traditional ML optimization:**
107 | 
108 | 一般机器学习的参数 θ 通常只由W的列组成
109 | 
110 | ![](media/d612c5fc03f9f90f6d9c1f2f65561f4e.png)
111 | 
112 | 用以下方法来更新决策边界：
113 | 
114 | ![](media/c83e92b3bf2cbe6098224880124ec1f4.png)
115 | 
116 | Neural Network Classifiers
117 | 
118 | ![](media/750c7b5f8979dc080c7ff2298fcd8d12.png)
119 | 
120 | 单独使用Softmax(≈logistic回归)并不十分强大
121 | 
122 | Softmax只给出线性决策边界
123 | 
124 | Neural Nets for the Win!
125 | 
126 | ![](media/715eb662e8e68f5a583ba39839eedc92.png)
127 | 
128 | 右图为神经网络分类的结果（神经网络可以学习更复杂的函数和非线性决策边界）
129 | 
130 | **更高级的分类需求**
131 | 
132 | 1.词向量
133 | 
134 | 2.更深层次的深度神经网络
135 | 
136 | **Classification difference with word vectors：**
137 | 
138 | 一般在NLP深度学习中
139 | 
140 | 1.我们学习了矩阵 W 和词向量 x
141 | 
142 | 2.我们学习传统参数和表示
143 | 
144 | 3.词向量是对独热向量的重新表示——在中间层向量空间中移动它们—以便使用(线性)softmax分类器通过
145 | x = Le
146 | 层进行分类，即将词向量理解为一层神经网络，输入单词的独热向量并获得单词的词向量表示，并且我们需要对其进行更新。其中，Vd （非常多的参数）
147 | 
148 | ![](media/cc97acf5ea5e08357a191bcc967acd06.png)
149 | 
150 | **Neural computation**
151 | 
152 | ![](media/13540962cd8157f7281f048a7ad46455.png)
153 | 
154 | **An artificial neuron：**
155 | 
156 | 神经网络有自己的术语包
157 | 
158 | 但如果你了解 softmax 模型是如何工作的，那么你就可以很容易地理解神经元的操作
159 | 
160 | ![](media/87772a5c28cfd59c44dc0ef8d34961db.png)
161 | 
162 | X为输入值，wi为来自各个神经元的权重值，经过激活函数的处理之后，输出传向下一级神经元。
163 | 
164 | **A neuron can be a binary logistic regression unit：**
165 | 
166 | ![](media/0f73073a689e8fab9468665a1a6e246c.png)
167 | 
168 | W为权重值，b为偏置，h为隐藏层，x为输入
169 | 
170 | ![](media/6878311d9fdbde542222229b4b4ac7c5.png)
171 | 
172 | b为：我们可以有一个“总是打开”的特性，它给出一个先验类，或者将它作为一个偏向项分离出来。
173 | 
174 | ![](media/eed5783c39bf1a3512f1d22f0032a5f3.png)
175 | 
176 | 上图为softmax图像
177 | 
178 | ![](media/9ecab7c5a5ba7aabcc70f7a43d69a381.png)
179 | 
180 | W,b是这个神经元的参数
181 | 
182 | **A neural network** **= running several logistic regressions at the same
183 | time：**
184 | 
185 | ![](media/1e538630d605e2f089c9d524d5acee3b.png)
186 | 
187 | 输入一个向量，经过一系列逻辑回归函数，会得到一个输出变量。但是我们不需要提前决定这些函数在预测的变量是什么。
188 | 
189 | ![](media/c067e3fbb58837d28b58111da022f4de.png)
190 | 
191 | 我们只需要把预测结果传递给下一层网络中，由损失函数自动决定它们预测什么，从而传向下一层。
192 | 
193 | ![](media/c4f930a4361aeb4a372c653153079ae8.png)
194 | 
195 | **Matrix notation for a layer：**
196 | 
197 | ![](media/8cc04c52651e08dcc01451e47d42de0e.png)
198 | 
199 | ![](media/866df69980cdccd0ce8bc940d3bd2054.png)
200 | 
201 | ![](media/b78effc9001b7923749a983618b495cc.png)
202 | 
203 | ![](media/1923af5f08d15eeb5de0fbc3dd5b2bd5.png)
204 | 
205 | **Non-linearities (aka “f ”): Why they’re needed：**
206 | 
207 | 非线性函数可以更好的捕捉很复杂的数据：
208 | 
209 | ![](media/c1af263b21867990ab8ae486e0617f3c.png)
210 | 
211 | ![](media/bdc31f98a8404cd2e8fe291faa3c38f9.png)
212 | 
213 | ![](media/98312cf7f6090bff1993a77d45e6ccb0.png)
214 | 
215 | 例如：函数近似，如回归或分类
216 | 
217 | 没有非线性，深度神经网络只能做线性变换
218 | 
219 | 多个线性变换可以组成一个的线性变换 W1W2x=Wx
220 | 
221 | 因为线性变换是以某种方式旋转和拉伸空间，多次的旋转和拉伸可以融合为一次线性变换
222 | 
223 | 对于非线性函数而言，使用更多的层，他们可以近似更复杂的函数**Named Entity
224 | Recognition (NER):**
225 | 
226 | 任务：例如，查找和分类文本中的名称
227 | 
228 | ![](media/0702867eeabce64498a5fa2e63b1d477.png)
229 | 
230 | 可能的用途
231 | 
232 | 跟踪文档中提到的特定实体（组织、个人、地点、歌曲名、电影名等）
233 | 
234 | 对于问题回答，答案通常是命名实体
235 | 
236 | 许多需要的信息实际上是命名实体之间的关联
237 | 
238 | 同样的技术可以扩展到其他 slot-filling 槽填充 分类
239 | 
240 | 通常后面是命名实体链接/规范化到知识库
241 | 
242 | **Named Entity Recognition on word sequences**
243 | 
244 | 通过结合上下文中对单词进行分类，然后提取实体单词的子序列来预测。
245 | 
246 | ![](media/319ea145bf96365b0515dd9be7c1c334.png)
247 | 
248 | **Window classification**
249 | 
250 | 思想：在\*\*相邻词的上下文窗口\*\*中对一个词进行分类
251 | 
252 | 例如，上下文中一个单词的命名实体分类
253 | 
254 | 人、地点、组织、没有
255 | 
256 | 在上下文中对单词进行分类的一个简单方法可能是对窗口中的单词向量进行\*\*平均\*\*，并对平均向量进行分类
257 | 
258 | 问题：**这会丢失位置信息**
259 | 
260 | **Window classification: Softmax**
261 | 
262 | 训练softmax分类器对中心词进行分类，方法是在一个窗口内\*\*将中心词周围的词向量串联起来\*\*
263 | 
264 | 例子：在这句话的上下文中对“Paris”进行分类，窗口长度为2
265 | 
266 | ![](media/a3c41b35f66c5d446e684795c3270c8d.png)
267 | 
268 | **Simplest window classifier: Softmax**
269 | 
270 | ![](media/7e254490edee480daef55bc5303a7d57.png)
271 | 
272 | 使用softmax分类器
273 | 
274 | **Binary classification with unnormalized scores**
275 | 
276 | ![](media/8a0f8c21c2507b4ae62dafd6a4c71b67.png)
277 | 
278 | 假设我们要对中心词是否为一个地点，进行分类
279 | 
280 | 与word2vec类似，我们将遍历语料库中的所有位置。但这一次，它将受到监督，只有一些位置能够得到高分。
281 | 
282 | 例如，在他们的中心有一个实际的NER Location的位置是“真实的”位置会获得高分。
283 | 
284 | **Neural Network Feed-forward Computation：**
285 | 
286 | 使用神经激活 aa 简单地给出一个非标准化的分数 ：
287 | 
288 | ![](media/c08bad74522c73d821815c841a9e1194.png)
289 | 
290 | 三层神经网络计算窗口得分：
291 | 
292 | ![](media/4f37bd2361d0dee284462c8e0d6e4fdf.png)
293 | 
294 | ![](media/4202f10e49f3cc0db6498e1b5b89d0cd.png)
295 | 
296 | ![](media/00926e51622c73d0563cc3cc060dd73e.png)
297 | 
298 | a是激活函数:
299 | 
300 | ![](media/c6b3a588fcfd667ea0cae9eae3adc841.png)
301 | 
302 | **The max-margin loss：**
303 | 
304 | 最小化损失函数，让真实的窗口得分更高，破坏窗口得分更低：
305 | 
306 | ![](media/54a21854bef6f355d126bc656cd3dd22.png)
307 | 
308 | 最小化：
309 | 
310 | ![](media/c47e0f819d26ba99a8b5640c9dd2d769.png)
311 | 
312 | 使用SGD更新参数：
313 | 
314 | ![](media/110bc0fa8979ea058cf287f81eef7d5e.png)
315 | 
316 | 每个中心有NER位置的窗口的得分应该比中心没有位置的窗口高1分：
317 | 
318 | ![](media/96a122e9da19cfc6bc97efeda03ece9c.png)
319 | 
320 | **Jacobian Matrix: Generalization of the Gradient：**
321 | 
322 | 给定一个函数，有m个输出与n个输入：
323 | 
324 | ![](media/054c5b4d2dd02c55bc1f2d47cdd70e42.png)
325 | 
326 | 雅可比矩阵可以等效为一个m\*n 的偏导矩阵：
327 | 
328 | ![](media/7b7ff896cd8e7d2d064d3a97e9b02270.png)
329 | 
330 | **Chain Rule：**
331 | 
332 | 对于单变量函数：乘以导数：
333 | 
334 | ![](media/b67da1c942280ddc8081ed9dc2751504.png)
335 | 
336 | 对于一次处理多个变量：乘以雅可比矩阵：
337 | 
338 | ![](media/d75aa946ba69c5e99840a82583970f2e.png)
339 | 
340 | **Example Jacobian: Elementwise activation Function：**
341 | 
342 | 使用element-wise:
343 | 
344 | 函数有n个输入与n个输出=n\*n 的雅可比矩阵
345 | 
346 | ![](media/f11058bd5fbe80a556f19100c22c7635.png)
347 | 
348 | **Other Jacobians：**
349 | 
350 | ![](media/0281e3d94efe17bfc2de3fa5105d2361.png)
351 | 
352 | 上图是正确的雅可比公式，答案为h
353 | 
354 | **Back to our Neural Net!**
355 | 
356 | ![](media/44e5ecedcf7a99b323f502acaf360c7e.png)
357 | 
358 | ![](media/2efef90c3b837a85886132f237d76bc7.png)
359 | 
360 | 计算分数的梯度
361 | 
362 | **Break up equations into simple pieces：**
363 | 
364 | ![](media/976134334aab52a94ea54d0c4cc767af.png)
365 | 
366 | **Apply the chain rule：**
367 | 
368 | ![](media/f6801d8f4f478bc7a7d87c3248cf603c.png)
369 | 
370 | ![](media/22df0c2f3a3d9a274aca272e9cf0408f.png)
371 | 
372 | ![](media/5939769736ba68cd97dcdc7f0d1391c0.png)
373 | 
374 | 计算过程：
375 | 
376 | ![](media/5f6c3b7cf264b3c4c586f975cf4ceab4.png)
377 | 
378 | δ是局部误差符号。
379 | 
380 | **Derivative with respect to Matrix：Output shape：**
381 | 
382 | ∂s/∂W输出形状是
383 | 
384 | ![](media/f2111a572741c3c6c6c96e352991e4ce.png)
385 | 
386 | 1个输出，n\*m 个输入：1 × n\*m 的雅可比矩阵？
387 | 
388 | 不方便更新参数 
389 | 
390 | 而是遵循惯例：导数的形状是参数的形状 （形状约定）
391 | 
392 | 形状是n\*m
393 | 
394 | ![](media/856c6c04b2d3649150d76bb37d22d4a9.png)
395 | 
396 | **Derivative with respect to Matrix：**
397 | 
398 | ![](media/1ec77f45e7be8c6d9f2ba23f8eb1fa23.png)
399 | 
400 | ![](media/0f505c06650ee3043870614c3105a838.png)
401 | 
402 | 得到：
403 | 
404 | ![](media/d1ec37f9eb213c0879dcbd0ec2775fe1.png)
405 | 
406 | δ是局部误差信号;
407 | 
408 | X是局部输入信号；
409 | 
410 | **Why the Transposes?**
411 | 
412 | ![](media/99b519d629888f23581e060eedd63f46.png)
413 | 
414 | 粗糙的回答是：这样就可以解决尺寸问题了
415 | 
416 | 检查工作的有用技巧
417 | 
418 | 课堂讲稿中有完整的解释
419 | 
420 | 每个输入到每个输出——你得到的是外部积
421 | 
422 | ![](media/da33062ab60813d3c73930b9c96ca721.png)
423 | 


--------------------------------------------------------------------------------
/docs/Lecture3/cs224n-2019-lecture03-neuralnets.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/cs224n-2019-lecture03-neuralnets.pdf


--------------------------------------------------------------------------------
/docs/Lecture3/media/00926e51622c73d0563cc3cc060dd73e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/00926e51622c73d0563cc3cc060dd73e.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/0281e3d94efe17bfc2de3fa5105d2361.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/0281e3d94efe17bfc2de3fa5105d2361.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/035f4ab4e1518d2cae60fd4323acab2b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/035f4ab4e1518d2cae60fd4323acab2b.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/054c5b4d2dd02c55bc1f2d47cdd70e42.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/054c5b4d2dd02c55bc1f2d47cdd70e42.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/0702867eeabce64498a5fa2e63b1d477.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/0702867eeabce64498a5fa2e63b1d477.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/0f505c06650ee3043870614c3105a838.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/0f505c06650ee3043870614c3105a838.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/0f73073a689e8fab9468665a1a6e246c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/0f73073a689e8fab9468665a1a6e246c.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/105645bb8bfaa51181b72ae1b079eb50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/105645bb8bfaa51181b72ae1b079eb50.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/110bc0fa8979ea058cf287f81eef7d5e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/110bc0fa8979ea058cf287f81eef7d5e.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/13540962cd8157f7281f048a7ad46455.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/13540962cd8157f7281f048a7ad46455.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/160ae1c3f9d59caacfced0221cdee4d9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/160ae1c3f9d59caacfced0221cdee4d9.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/1923af5f08d15eeb5de0fbc3dd5b2bd5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/1923af5f08d15eeb5de0fbc3dd5b2bd5.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/1e538630d605e2f089c9d524d5acee3b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/1e538630d605e2f089c9d524d5acee3b.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/1ec77f45e7be8c6d9f2ba23f8eb1fa23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/1ec77f45e7be8c6d9f2ba23f8eb1fa23.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/22df0c2f3a3d9a274aca272e9cf0408f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/22df0c2f3a3d9a274aca272e9cf0408f.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/2efef90c3b837a85886132f237d76bc7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/2efef90c3b837a85886132f237d76bc7.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/30aabfb3b1eea5c2c04e77eb1b7b4758.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/30aabfb3b1eea5c2c04e77eb1b7b4758.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/319ea145bf96365b0515dd9be7c1c334.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/319ea145bf96365b0515dd9be7c1c334.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/38ac5c9ea7ad5634312929dbd417434a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/38ac5c9ea7ad5634312929dbd417434a.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/4202f10e49f3cc0db6498e1b5b89d0cd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/4202f10e49f3cc0db6498e1b5b89d0cd.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/44e5ecedcf7a99b323f502acaf360c7e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/44e5ecedcf7a99b323f502acaf360c7e.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/4d31a5484902aead763880a4abe72a1f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/4d31a5484902aead763880a4abe72a1f.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/4f37bd2361d0dee284462c8e0d6e4fdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/4f37bd2361d0dee284462c8e0d6e4fdf.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/54a21854bef6f355d126bc656cd3dd22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/54a21854bef6f355d126bc656cd3dd22.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/5939769736ba68cd97dcdc7f0d1391c0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/5939769736ba68cd97dcdc7f0d1391c0.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/5f6c3b7cf264b3c4c586f975cf4ceab4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/5f6c3b7cf264b3c4c586f975cf4ceab4.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/6343eafbf305f43451e50461459a36fb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/6343eafbf305f43451e50461459a36fb.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/67d49eeec459f53e6c9d5fb11bcf1010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/67d49eeec459f53e6c9d5fb11bcf1010.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/6878311d9fdbde542222229b4b4ac7c5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/6878311d9fdbde542222229b4b4ac7c5.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/715eb662e8e68f5a583ba39839eedc92.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/715eb662e8e68f5a583ba39839eedc92.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/750c7b5f8979dc080c7ff2298fcd8d12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/750c7b5f8979dc080c7ff2298fcd8d12.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/7b7ff896cd8e7d2d064d3a97e9b02270.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/7b7ff896cd8e7d2d064d3a97e9b02270.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/7e254490edee480daef55bc5303a7d57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/7e254490edee480daef55bc5303a7d57.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/84ed44ccba0edbe63db9a571b4226915.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/84ed44ccba0edbe63db9a571b4226915.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/856c6c04b2d3649150d76bb37d22d4a9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/856c6c04b2d3649150d76bb37d22d4a9.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/866df69980cdccd0ce8bc940d3bd2054.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/866df69980cdccd0ce8bc940d3bd2054.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/87772a5c28cfd59c44dc0ef8d34961db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/87772a5c28cfd59c44dc0ef8d34961db.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/8a0f8c21c2507b4ae62dafd6a4c71b67.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/8a0f8c21c2507b4ae62dafd6a4c71b67.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/8cc04c52651e08dcc01451e47d42de0e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/8cc04c52651e08dcc01451e47d42de0e.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/96a122e9da19cfc6bc97efeda03ece9c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/96a122e9da19cfc6bc97efeda03ece9c.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/976134334aab52a94ea54d0c4cc767af.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/976134334aab52a94ea54d0c4cc767af.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/98312cf7f6090bff1993a77d45e6ccb0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/98312cf7f6090bff1993a77d45e6ccb0.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/99b519d629888f23581e060eedd63f46.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/99b519d629888f23581e060eedd63f46.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/9c3f64b3061eb3db00d7b7562ca13fee.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/9c3f64b3061eb3db00d7b7562ca13fee.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/9ecab7c5a5ba7aabcc70f7a43d69a381.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/9ecab7c5a5ba7aabcc70f7a43d69a381.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/a284ec7c020044950833d2e8a9d9d367.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/a284ec7c020044950833d2e8a9d9d367.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/a3c41b35f66c5d446e684795c3270c8d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/a3c41b35f66c5d446e684795c3270c8d.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/b67da1c942280ddc8081ed9dc2751504.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/b67da1c942280ddc8081ed9dc2751504.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/b78effc9001b7923749a983618b495cc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/b78effc9001b7923749a983618b495cc.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/bdc31f98a8404cd2e8fe291faa3c38f9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/bdc31f98a8404cd2e8fe291faa3c38f9.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c067e3fbb58837d28b58111da022f4de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c067e3fbb58837d28b58111da022f4de.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c08bad74522c73d821815c841a9e1194.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c08bad74522c73d821815c841a9e1194.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c1af263b21867990ab8ae486e0617f3c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c1af263b21867990ab8ae486e0617f3c.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c47e0f819d26ba99a8b5640c9dd2d769.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c47e0f819d26ba99a8b5640c9dd2d769.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c4f930a4361aeb4a372c653153079ae8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c4f930a4361aeb4a372c653153079ae8.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c5b21293bcd8cccae6a7ed922e0240e8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c5b21293bcd8cccae6a7ed922e0240e8.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c6b3a588fcfd667ea0cae9eae3adc841.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c6b3a588fcfd667ea0cae9eae3adc841.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/c83e92b3bf2cbe6098224880124ec1f4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/c83e92b3bf2cbe6098224880124ec1f4.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/cc97acf5ea5e08357a191bcc967acd06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/cc97acf5ea5e08357a191bcc967acd06.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/d1ec37f9eb213c0879dcbd0ec2775fe1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/d1ec37f9eb213c0879dcbd0ec2775fe1.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/d612c5fc03f9f90f6d9c1f2f65561f4e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/d612c5fc03f9f90f6d9c1f2f65561f4e.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/d75aa946ba69c5e99840a82583970f2e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/d75aa946ba69c5e99840a82583970f2e.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/da33062ab60813d3c73930b9c96ca721.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/da33062ab60813d3c73930b9c96ca721.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/eaea194ebe26bf2a8e69d68cd01e1a78.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/eaea194ebe26bf2a8e69d68cd01e1a78.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/eed5783c39bf1a3512f1d22f0032a5f3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/eed5783c39bf1a3512f1d22f0032a5f3.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/f11058bd5fbe80a556f19100c22c7635.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/f11058bd5fbe80a556f19100c22c7635.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/f2111a572741c3c6c6c96e352991e4ce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/f2111a572741c3c6c6c96e352991e4ce.png


--------------------------------------------------------------------------------
/docs/Lecture3/media/f6801d8f4f478bc7a7d87c3248cf603c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture3/media/f6801d8f4f478bc7a7d87c3248cf603c.png


--------------------------------------------------------------------------------
/docs/Lecture4/Lecture 4.md:
--------------------------------------------------------------------------------
 1 | # 一. 前言
 2 | 
 3 | 本章节基于第三讲的命名实体识别 (NER)，而本章节内容讲述其反向传播。前一章的知识点就不多叙述，本章节会直接引用。
 4 | 
 5 | # 二. 正篇
 6 | 
 7 | 反向传播是一种利用微分链式法则来计算模型上任意参数的损失梯度的方法。为了更能容易的理解反向传播，我们先看下图中的一个网络
 8 | 
 9 | ![](media/1.png)
10 | 
11 | 来看一下符号定义:
12 | Xi是神经网络的输入；S是神经网络的输出；每层的神经元都接收一个输入和生成一个输出。第k层的第j个神经元接收标量输入![](media/2.png)和生成一个标量激活输出![](media/3.png)。我们把反向传播误差在![](media/2.png)的计算定义为![](media/4.png)。第1层认为是输入层而不是第1个隐藏层。对输入层，![](media/5.png)。
13 | 
14 | 假设损失函数![](media/14.png)为正值，我们想更新参数![](media/15.png)，我们看到![](media/15.png)只参与了![](media/16.png)和![](media/17.png)的计算。这点对于理解反向传播是非常重要的-参数的反向传播梯度只和参与了前向计算中的参数的值有关系，![](media/17.png)在随后的前向计算中和![](media/18.png)相乘计算得分。我们可以从最大间隔损失看到：![](media/19.png)
15 | 
16 | ### 偏置更新
17 | 
18 | 偏置项和其他权值在数学形式是等价的，只是在计算下一层神经![](media/16.png)元输入时相乘的值是常量1。因此在第k层的第i个神经元的偏置的梯度时![](media/20.png)。例如在上面的例子中，我们更新的是![](media/21.png)而不是![](media/15.png)，那么这个梯度为![](media/22.png)。
19 | 
20 | 我们有从![](media/23.png)向后传播的误差![](media/20.png)，如下图所示![](media/24.png)
21 | 
22 | 我们通过把![](media/20.png)与路径上的权值![](media/26.png)相乘，将这个误差反向传播到![](media/27.png)。因此在![](media/27.png)接收的误差是![](media/28.png)。然而，![](media/27.png)在前向计算可能出下图的情况，会参与下一层中的多个神经元的计算。那么第k层的第m个神经元的误差也要使用上一步方法将误差反向传播到![](media/27.png)上。
23 | 
24 | ![](media/25.png)
25 | 
26 | 因此现在在![](media/27.png)接收的误差是![](media/29.png)。实际上，我们可以把上面误差和简化为![](media/30.png)。现在我们有在![](media/27.png)接正确的误差，然后将其与局部梯度![](media/31.png)相乘，把误差信息反向传到第k-1层的第j个神经元上。因此到达![](media/33.png)的误差为![](media/32.png)。
27 | 
28 | ## Dropout层
29 | 
30 | Dropout是一种强大的正则化技术。在训练过程中，以一定的概率随机“丢弃”一个子集。然后在测试过程中，使用整个网络来预测。这样网络通常能从数据中学习更有意义的信息，不太可能过拟合，且通常能获得更高的整体性能。这种技术之所以如此有效一个直观的原因是，dropout所做的，本质上是同时训练成指数级的许多较小的网络，并对预测进行平均。引入dropout的方法：取每一层神经元的输出h，并保持每个神经元的概率为p，否则将其设置为0。然后，在反向传播过程中，只通过在正向传播过程中保持活性的神经元传递梯度。最后，在测试过程中，用网络中的所有神经元来计算正向传递。为了让dropout有效，预期的输出神经元在测试期间应该差不多。因此，通常必须在测试期间将每个神经元的输出除以某个值。
31 | 
32 | ## 神经单元
33 | 
34 | 包含sigmoid的神经网络，以引入非线性。在许多应用中，可以使用其他激活函数来设计网络。
35 | 
36 | ### sigmoid
37 | 
38 | ![](media/6.png)
39 | 
40 | ![](media/7.png)
41 | 
42 | ### Tanh
43 | 
44 | ![](media/8.png)
45 | 
46 | ![](media/9.png)
47 | 
48 | ### Hard tanh
49 | 
50 | ![](media/10.png)
51 | 
52 | ![](media/11.png)
53 | 
54 | ## ReLU
55 | 
56 | ![](media/12.png)
57 | 
58 | ![](media/13.png)


--------------------------------------------------------------------------------
/docs/Lecture4/cs224n-2019-lecture04-backprop.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/cs224n-2019-lecture04-backprop.pdf


--------------------------------------------------------------------------------
/docs/Lecture4/media/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/1.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/10.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/11.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/12.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/13.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/14.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/15.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/15.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/16.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/17.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/17.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/18.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/19.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/2.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/20.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/21.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/22.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/23.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/23.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/24.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/24.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/25.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/26.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/26.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/27.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/27.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/28.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/28.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/29.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/29.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/3.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/30.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/31.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/32.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/33.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/33.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/4.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/5.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/6.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/7.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/8.png


--------------------------------------------------------------------------------
/docs/Lecture4/media/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture4/media/9.png


--------------------------------------------------------------------------------
/docs/Lecture5/Lecture 5.md:
--------------------------------------------------------------------------------
 1 | # 一. 前言
 2 | 
 3 | 本章节讲述了对于句法结构分析，其主要有两种方式：Constituency Parsing和Dependency Parsing。本章节主要对后者进行了详细的表述。
 4 | 
 5 | # 二. Dependency Parsing
 6 | 
 7 | ## 基本概念
 8 | 
 9 | Dependency Structure展示了词语之前的依赖关系,其有两种表现形式，一种是用箭头表示其依存关系，有时也会在箭头上标出其具体的语法关系，如是主语还是宾语关系等。
10 | 
11 | ![](media/1.png)
12 | 
13 | 二是将其做成树状机构 (Dependency Tree Graph)
14 | 
15 | ![](media/2.png)
16 | 
17 | Dependency Parsing可以看做是给定输入句子S = W0·W1...Wn构建对应的Dependency Tree Graph的任务。一个有效构建这个树的方法是Transition-based Dependency Parsing。
18 | 
19 | ## Transition-based Dependency Parsing
20 | 
21 | Transition-based Dependency Parsing可以看做是state machine，对于S = W0·W1...Wn，state有三部分组成 (θ，β，A)。θ是S中若干Wi构成的stack。β为S中若干Wi构成的buffer。A则是dependency arc 构成的集合，每一条边的形式是 (Wi，r，Wj)，其中r描述了节点的依存关系。
22 | state之间的transition有三类：
23 | 
24 | 1.SHIFT：将buffer中的第一个词移出并放到stack上。
25 | 
26 | 2.LEFT-ARC：将(Wi，r，Wj)加入边的集合A，其中Wi是stack上的次顶层的词,Wj是stack上的最顶层的词。
27 | 
28 | 3.RIGHT-ARC:将(Wi，r，Wj)加入边的集合A，其中Wi是stack上的次顶层的词，Wj是stack上的最顶层的词。
29 | 
30 | ![](media/5.png)
31 | 
32 | 不断的进行上述三类操作，直到从初始态达到最终态。当我们考虑到LEFT-ARC与RIGHT-ARC各有|R|（|R|为r的类的个数）种class，我们可以将其看做是class数为2|R|+1的分类问题，可以用SVM等传统机器学习方法解决。
33 | 
34 | ## 评估
35 | 
36 | 当我们有了Dependency Parsing的模型后，就开始着手准备评估。我们有两个metric，一个是LAS（labeled attachment score）即只有arc的箭头方向以及语法关系均正确时才算正确，以及UAS（unlabeled attachment score）即只要arc的箭头方向正确即可。
37 | 
38 | ![](media/3.png)
39 | 
40 | 图示为LAS
41 | 
42 | ## Neural Dependency Parsing
43 | 
44 | 对于Neural Dependency Parser，其输入特征通常包含三种：
45 | 1.stack和buffer中的单词及其dependent word，我们基于stack/buffer的位置来提取令牌。
46 | 
47 | ![](media/4.png)
48 | 
49 | 2.我们将其转换为向量并将它们联结起来作为输入层，再经过若干非线性的隐藏层，最后加入softmax layer得到每个类的概率。
50 | 
51 | ![](media/6.png)
52 | 
53 | 利用这样简单的前馈神经网络，我们就可以减少feature engineering并提高准确度。


--------------------------------------------------------------------------------
/docs/Lecture5/cs224n-2019-lecture05-dep-parsing.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/cs224n-2019-lecture05-dep-parsing.pdf


--------------------------------------------------------------------------------
/docs/Lecture5/media/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/media/1.png


--------------------------------------------------------------------------------
/docs/Lecture5/media/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/media/2.png


--------------------------------------------------------------------------------
/docs/Lecture5/media/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/media/3.png


--------------------------------------------------------------------------------
/docs/Lecture5/media/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/media/4.png


--------------------------------------------------------------------------------
/docs/Lecture5/media/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/media/5.png


--------------------------------------------------------------------------------
/docs/Lecture5/media/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture5/media/6.png


--------------------------------------------------------------------------------
/docs/Lecture6/Lecture 6.md:
--------------------------------------------------------------------------------
 1 | # 一. 前言
 2 | 
 3 | 第六章主要探讨了语言模型以及循环神经网络在语言模型中的应用。
 4 | 
 5 | 语言模型计算特定序列中多个单词的出现概率。一个m个单词的序列{W1,...,Wn}的概率定义为P(W1,...,Wn)。句子中一个单词的之前会有一定是数量的单词，单词Wi的特性会根据它在文档中的位置而改变，P(W1,...,Wn)一般只考虑前n个单词而不是考虑全部之前的单词。
 6 | 
 7 | ![](media/1.png)
 8 | 
 9 | # 二. 经典语言模型
10 | 
11 | ## n-gram模型
12 | 
13 | n-gram的定义就是连续的n个单词。对于the students opened their __ 这段话，把这段话分成连续的几部分：
14 | 
15 | ![](media/2.png)
16 | 
17 | 以此类推。该模型的核心思想是n-gram的概率应正比于其出现的频率，并且假设P(Xt+1)仅仅依赖于它之前的n-1个单词，即，
18 | 
19 | ![](media/3.png)
20 | 
21 | 其中count是通过处理大量文本对相应的n-gram出现次数计数得到的。
22 | 
23 | 但这个模型有两个主要问题：稀疏性和存储。
24 | 
25 | 1.稀疏问题：Sparsity Problem。在我们之前的大量文本中，可能分子或分母的组合没有出现过，则其计数为零。并且随着n的增大，稀疏性更严重。
26 | 
27 | 2.必须存储所有的n-gram对应的计数，随着n的增大，模型存储量也会增大。
28 | 
29 | 这些限制了n的大小，但如果n过小，则我们无法体现稍微远一些的词语对当前词语的影响，这会极大的限制处理语言问题中很多需要依赖相对长程的上文来推测当前单词的任务的能力。
30 | 
31 | ## Window-based Neural Language Model
32 | 
33 | 这个模型能够通过学习单词的分布式表示，以及用这些表示来表示单词的概率函数。下图展示了对应的神经网络结构，在这个模型中，输入向量在隐藏层和输出层中都被使用。
34 | 
35 | 神经网络结构图
36 | 
37 | ![](media/5.png)
38 | 
39 | 下面公式展示了由标准 tanh 函数(即隐藏层)组成的 softmax 函数的参数以及线性函数W3x+b3，捕获所有前面n个输入词向量。
40 | 
41 | ![](media/6.png)
42 | 
43 | 权值矩阵W1是应用在词向量上(上图中的绿色实线箭头)，W2是应用在隐藏层(也是绿色箭头)和W3是应用在词向量(绿色箭头)。
44 | 
45 | 与n-gram模型相比较，它解决了稀疏问题与存储问题，但它仍然存在一些问题：窗口大小固定，扩大窗口会使矩阵W变大，且X1，X2与W的不同列相乘，没有任何可共享的参数。
46 | 
47 | 简化版网络结构图
48 | 
49 | ![](media/7.png)
50 | 
51 | ## Recurrent Neural Networks (RNN)
52 | 
53 | 传统的翻译模型只能以有限窗口大小的前n个单词作为条件进行语言模型建模，循环神经网络与其不同，RNN 有能力以语料库中所有前面的单词为条件进行语言模型建模。其基本结构如下图所示：
54 | 
55 | ![](media/8.png)
56 | 
57 | 每个这样的隐藏层都有若干个神经元，每个神经元对输入向量用一个线性矩阵运算然后通过非线性变化（例如 tanh 函数）得到输出。在每一个时间步，隐藏层都有两个输入：前一个时间步的隐藏层ht-1和当前时间步的输入Xt，前一个时间步的隐藏层ht-1通过和权值矩阵W(hh)相乘和当前时间步的输入Xt和权值矩阵W(hx)相乘得到当前时间步的隐藏层ht，然后再将ht和权值矩阵W(s)相乘，接着对整个词表通过softmax计算得到下一个单词的预测结果y^，如下面公式所示：
58 | 
59 | ![](media/9.png)
60 | 
61 | RNN的训练过程同样依赖于大量的文本，在每个时刻t计算模型预测的输出y(t)与真实值y^(t)即X(t+1)的cross-entropy loss，即
62 | 
63 | ![](media/10.png)
64 | 
65 | 对于文本量为T的总的损失即为所有交叉熵损失的平均值：
66 | 
67 | ![](media/11.png)
68 | 
69 | 每个神经元的输入和输出如下图所示：
70 | 
71 | ![](media/12.png)
72 | 
73 | 图示为RNN模型
74 | 
75 | ![](media/13.png)
76 | 
77 | 模型评估指标
78 | 
79 | ![](media/14.png)
80 | 
81 | ### 总结
82 | 
83 | RNN 有以下优点：
84 | 
85 | 1.它可以处理任意长度的序列。
86 | 
87 | 2.对更长的输入序列不会增加模型的参数大小。
88 | 
89 | 3.对时间步 t 的计算理论上可以利用前面很多时间步的信息。
90 | 
91 | 4.对输入的每个时间步都应用相同的权重，因此在处理输入时具有对称性。
92 | 
93 | RNN 有以下缺点：
94 | 
95 | 1.计算速度很慢-因为它每一个时间步需要依赖上一个时间步，所以不能并行化。
96 | 
97 | 2.在实际中因为梯度弥散和梯度爆炸，很难利用到前面时间步的信息。


--------------------------------------------------------------------------------
/docs/Lecture6/cs224n-2019-lecture06-rnnlm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/cs224n-2019-lecture06-rnnlm.pdf


--------------------------------------------------------------------------------
/docs/Lecture6/media/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/1.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/10.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/11.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/12.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/13.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/14.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/2.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/3.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/4.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/5.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/6.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/7.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/8.png


--------------------------------------------------------------------------------
/docs/Lecture6/media/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/Lecture6/media/9.png


--------------------------------------------------------------------------------
/docs/lecture 20/Lecture20_part1.md:
--------------------------------------------------------------------------------
 1 | 一 NLP深度学习的过去
 2 | ====================
 3 | 
 4 | ![](media/cc8b2e483b89e4e7f585053d9ae16901.png)
 5 | 
 6 | 很多被现在认为非常核心技术的想法在当时是不存在的，如Seq2Seq、注意力机制、大规模问答系统/阅读理解数据集，甚至是TonsorFlow或Pytorch等框架。
 7 | 
 8 | **Seq2Seq**
 9 | 
10 | seq2seq 是一个Encoder–Decoder 结构的网络，它的输入是一个序列，输出也是一个序列，
11 | Encoder 中将一个可变长度的信号序列变为固定长度的向量表达，Decoder
12 | 将这个固定长度的向量变成可变长度的目标的信号序列。
13 | 
14 | **Attention**
15 | 
16 | Attention函数的本质可以被描述为一个查询（query）到一系列（键key-值value）对的映射。
17 | 
18 | 二 NLP深度学习的未来
19 | ====================
20 | 
21 | ![](media/b28cc037a458e62e70e28df3da4db868.png)
22 | 
23 | 1、本课程的关键思想：在训练NLP系统时利用未标记的例子。
24 | 
25 | 2、扩大规模的深度学习模型OpenAl和GPT-2
26 | 
27 | 3、NLP的社会影响
28 | 
29 | 4、NLP在未来的研究领域发挥的重要作用
30 | 
31 | 三 深度学习的发展
32 | =================
33 | 
34 | ![](media/5ccf67442a3c6f56f661544b9718d5be.png)
35 | 
36 | 近年来，深度学习发展迅速关键在于其扩展能力的提高，增加模型的大小和相关数据集，其准确性得到极大的提升。在80年代和90年代，就有很多关于神经网络的研究。
37 | 
38 | ![](media/5b6b327867947bbaacf320e8183363b8.png)
39 | 
40 | ![](media/8b074b58ce68a16643e0e3ba231ced83.png)
41 | 
42 | 深度学习在图像识别、机器翻译以及游戏。因此，对于ImageNet，对于图像识别，ImageNet数据集有1400万个图像，机器翻译数据集通常有数百万个例子。对于游戏，实际上可以生成尽可能多的训练数据，只需在游戏中一遍又一遍地运行代理即可。
43 | 
44 | 四 NLP数据集
45 | ============
46 | 
47 | ![](media/00594abd6ebca941e5e33b1d1ecf7242.png)
48 | 
49 | NLP数据存在的原因**只适用于英语**。
50 | 
51 | 绝大多数数据是英语，实际上不到世界人口的10％，说英语是他们的第一语言。因此，如果您查看存在的全部语言，那么小数据集的这些问题才会复杂化。
52 | 
53 | 因此，当受到这些数据的限制时，希望利用深度学习规模并训练最大的模型。最近成功的**流行解决方案是使用未标记的数据**。因为与标记数据不同，语言非常容易获取。在某些情况下，甚至可以要求像语言学这样的专家来注释该数据。
54 | 
55 | 五 使用未标记的数据进行翻译
56 | ===========================
57 | 
58 | 应用利用未标记数据的想法，将NLP模型改进为机器翻译任务。
59 | 
60 | ![](media/f8a9cc51dd30a1550b288b3304e510a4.png)
61 | 
62 | 机器翻译确实需要相当大的数据集。而这些数据集是NLP研究人员为了训练其模型而注释了文本，训练模型受到标记数据的限制，但我们可以很容易找到未标记的数据，准确地查看一些文本并决定它所使用的语言并训练分类器来做到这一点。
63 | 
64 | ![](media/a45da67f31b5f0894dd05ee257c7a457.png)
65 | 
66 | ![](media/9a8bb5e3c768fa5db9036117cc7f75bb.png)
67 | 
68 | **预训练**
69 | 
70 | 预训练——通过语言建模来预训练。
71 | 
72 | 将从一种语言翻译为另一种语言，需要为这两种语言收集大型数据集，然后训练两种语言模型，每种语言模型一次，最后使用那些预先训练的语言模型作为机器翻译系统的初始化。
73 | 
74 | 编码器对输入语言进行检测，同时对其语言模型的权重进行初始化，而解码器对目标语言模型的权重进行初始化，这将提高模型的性能。
75 | 
76 | ![](media/70bd9c1c9f1b8aeb64495385072364cf.png)
77 | 
78 | ![](media/0c13b354bd2f25497ff75e7433a8db0b.png)
79 | 
80 | 预训练的问题——预训练中，两个独立的语言模型在未标记的语料库上运行时，两者之间从未真正进行任何交互。
81 | 
82 | **自我训练**
83 | 
84 | 将原始的单语句和机器提供的翻译视为人为提供的翻译，并在此示例中正常训练机器学习模型。
85 | 
86 | **反向翻译**
87 | 
88 | ![](media/49490589f03b3454efff6aceed677d59.png)
89 | 
90 | 翻译系统从源语言到目标语言，还将训练从目标语言到源语言的模型。
91 | 
92 | ![](media/a963ae4807b619f2b301b9566069f4bc.png)
93 | 
94 | 这是来自Facebook的英语到德语的翻译，他们使用了500万个带标签的句子对，也使用了230个没有翻译的单语句子。你可以看到，与以前的技术水平相比，如果你将它与之前的研究和机器转机翻译进行比较，它们可以获得6个BLEU点改进。
95 | 


--------------------------------------------------------------------------------
/docs/lecture 20/Lecture20_part2.md:
--------------------------------------------------------------------------------
 1 | 一 双语数据
 2 | ===========
 3 | 
 4 | ![](media/d976819e250653f03cf7a4a9439cac96.png)
 5 | 
 6 | 这里的目标是用一种语言给出一个单词，找到它的翻译，但不使用任何标记数据。
 7 | 
 8 | 尝试解决此任务的方法称为**跨语言嵌入**。
 9 | 
10 | 目标是学习两种语言中单词的单词向量，希望这些单词向量具有已经学过的有关单词向量的所有好的属性，同时也希望特定语言的单词向量能够接近到它的翻译的单词矢量。
11 | 
12 | ![](media/bfa990c95fe70d27430257291a8e5793.png)
13 | 
14 | 解决这个问题的关键方法：**word2vec**
15 | 
16 | **嵌入空间的结构具有很多规律性，利用这种规律性来帮助找到那些嵌入空间之间的对齐**。
17 | 
18 | ![](media/5e21808db8af3416351121f745477256.png)
19 | 
20 | ![](media/7d41ff65016bd11b9fbeb32918f13b0b.png)
21 | 
22 | 这是两组字嵌入的图片，红色有英文单词，蓝色有意大利语单词。虽然现在的矢量空间看起来彼此非常不同，但是看到它们有一个非常相似的结构。尝试将转换的英语嵌入集旋转，以便它们与意大利嵌入相匹配。
23 | 
24 | 从数学上来说，这意味着需要一个学习矩阵W，用英语中的cat表示矢量，我们将它乘以W，最终得到西班牙语或意大利语的gatto矢量。将W约束为正交，这意味着几何上W只会对X中的向量进行旋转。
25 | 
26 | 二 如何学习W
27 | ============
28 | 
29 | ![](media/22fd011d4ca155b84b651df09e7aa675.png)
30 | 
31 | 实际上有很多技术可以用来学习这个W矩阵，其中一个就是称为**对抗训练**。
32 | 
33 | 它的工作方式如下：
34 | 
35 | 除了试图学习W矩阵之外，还将尝试学习一个被称为鉴别器的模型。采用一个向量，预测该向量最初是英文单词嵌入还是原来是意大利语嵌入。
36 | 
37 | 如果没有W矩阵，这对于鉴别器来说是一个非常简单的任务，清楚地分开英语和意大利语的嵌入字。如果学习W矩阵成功地将所有这些嵌入对齐，那么鉴别器将永远不会做得很好。
38 | 
39 | 在训练期间，首先要稍微训练一下鉴别器，这意味着确保它在区分英语和意大利语单词时尽可能好，然后训练W，而训练W的目标是将鉴别器混淆为尽可能多。
40 | 
41 | ![](media/d8b5c15ef6c09b4e5e6221c08a77a117.png)
42 | 
43 | **seq2seq模型**
44 | 
45 | 对标准的seq2srq模型进行了一处更改，即我们将使用相同的编码器和编码器，无论输入和输出语言如何。它将具有英语单词和法语单词的向量表示。
46 | 
47 | 对于解码器，我们需要给它一些关于它应该生成什么语言的信息。这样做的方法是输入一个特殊的标记，这个标记在括号中表示告诉模型的语法。在底部，它输入法语句子作为输入，它只是生成法语作为输出，这意味着只是再现原始输入序列。
48 | 
49 | ![](media/29d3c9fa71562a0d68ed4568cfa3b8fc.png)
50 | 
51 | 三 如何训练seq2seq模型
52 | ======================
53 | 
54 | 去噪自动编码器
55 | 
56 | 它被扰乱之前重新生成句子实际上是什么。也许有一个想法为什么这将是一个有用的训练目标，是因为有一个没有注意的编码器-解码器。编码器正在将整个源句子转换成单个向量，自动编码器做的是确保向量包含有关句子的所有信息，以便能够从编码器产生的向量中恢复原始句子的内容。
57 | 
58 | ![](media/36429d6dd9868d07d9d92cc712d26690.png)
59 | 
60 | 用没有标签的句子，不提供任何人工翻译，假设一个英语句子或者说一个法语句子，给出一个法语句子，可以将它翻译成英语，使用模型的当前状态，然后要求该模型从英语翻译或将该英语翻译回法语。
61 | 
62 | ![](media/16d1c888725fe434f06485e4b8a0a47e.png)
63 | 
64 | 使用这些跨语言嵌入来初始化机器翻译模型使用共享编码器。在顶部，只有一个自动编码目标，由于嵌入看起来非常相似，并且因为编码器是相同的，所以模型对这个法语句子的表示实际上应该与英语句子的表示非常相似。因此，当这种表示传递到解码器时，我们希望获得与以前相同的输出。
65 | 
66 | ![](media/ed979ece44c470f0e90d399c89168997.png)
67 | 
68 | 另一种思考方式是希望我们的模型能够对句子进行编码，这样表达式就像是一种通用的国际语言。我们的自动编码器和我们的反向翻译示例中，这里的目标序列是相同的。
69 | 
70 | ![](media/87ba75b9ceeb74f88f5d92abbec5c1fb.png)
71 | 
72 | 毫无疑问，在给定大量监督数据的情况下，受监督的机器翻译模型比无监督的机器翻译模型工作得更好。但是无监督的机器翻译模型实际上仍然做得很好。
73 | 
74 | ![](media/aa7868f8bd9dd5d3f8ebdd08e413d78c.png)
75 | 
76 | 使用无监督机器翻译模型可以做的另一件好事是**属性转移**。
77 | 
78 | ![](media/94d73800652ad75fbe429812a903ebab.png)
79 | 
80 | 因此，如果你看一个不同的语言对，让我们说英语到土耳其语，这两种语言的语言学是完全不同的，这些方法在某种程度上仍然有效，所以他们可以说五个BLEU点，但他们不会几乎和其他设置一样工作。
81 | 
82 | 纯粹的监督学习仍然存在巨大差距。
83 | 


--------------------------------------------------------------------------------
/docs/lecture 20/Word_Translation_without_Parallel_Data.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/Word_Translation_without_Parallel_Data.pdf


--------------------------------------------------------------------------------
/docs/lecture 20/lecture20_part3.md:
--------------------------------------------------------------------------------
 1 | 一 BERT
 2 | =======
 3 | 
 4 | ![](media/1fc8b424ade235725a60af4f056558b7.png)
 5 | 
 6 | 这是常规BERT，使用英语一些句子（其中某些单词被删除），要求使用BERT模型来填补空白并预测这些单词。
 7 | 
 8 | 谷歌实际上已经完成了多语言BERT训练。采用的是连接一大堆不同语言的语料库，然后训练一个模型在所有语言上使用模型。最近，由Facebook提出的这种新的扩展，实际上是将LM培训目标与翻译相结合。
 9 | 
10 | 在这种情况下，给这个模型一个英文序列和一个法语序列，删除一些单词，要求模型填写它，更好地使模型理解两种语言之间的关系。
11 | 
12 | ![](media/863d03b91f0a5659e4f8e2a054936e45.png)
13 | 
14 | 因此像BERT用于NLP中的其他任务一样，基本上都采用这种跨语言BERT，将其用作无监督机器翻译系统的初始化，并获得了大约10个BLEU点的增益，这样就可以实现无人监督的机器翻译。
15 | 
16 | 二 Huge Models and GPT-2
17 | ========================
18 | 
19 | ![](media/707177c98672fbd53c9be53ac13cd78c.png)
20 | 
21 | 首先，这是一些不同大小的NLP模型，也许几年前标准的LSTM中型模型大约有1000万个参数。在OpenAl论文之前，这个GPT-2大约是它的10倍，大约相当于另一个数量级。当然，、神经网络中的突触和权重是完全不同的。
22 | 
23 | ![](media/7429800d68f22424891570eaecd970b3.png)
24 | 
25 | 该图显示了x轴是时间，y轴是用日志来衡量用于训练该模型的petaFLOPS的数量。这意味着至少目前的趋势是机器学习模型的计算能力呈现出指数增长。
26 | 
27 | ![](media/3017ff5e27e32edc63d73a563f988a25.png)
28 | 
29 | 这结果来自一个视觉的生成性对抗网络，它已经在很多数据上进行了训练，并且已经在大规模上进行了训练，这是ELMo和BERT之间的大型模型。如果你感兴趣的是https://thispersondoesnotexist.com/。
30 | 
31 | ![](media/883a1f598728ca6deb4101b097107b21.png)
32 | 
33 | 这是谷歌最近的工作，他们训练了一个有5亿个参数的图像网络模型。这里的图表显示x轴上的日志缩放参数数量，然后ImageNet在y轴上的准确性，这种大型模型表现得更好。并且似乎成为一种趋势，其精度随着模型尺寸的对数而增加。
34 | 
35 | ![](media/49a12531f44200f697663ba7da938af0.png)
36 | 
37 | 硬件在很大程度上扩展模型和训练模型。特别是，越来越多的公司正在开发深度学习的硬件。实际上另一种扩展模型的方法是利用并行性。
38 | 
39 | 一种是数据并行性。在这种情况下，GPU将拥有该模型的数据副本，将正在训练的数据分成小批量到这些模型中，这样就可以更快地训练模型。
40 | 
41 | 另一种并行性是模型并行性。在这种情况下，您实际上需要将模型拆分为多个计算单元。
42 | 
43 | ![](media/1e5d555592b54279f9571143f2a1bc1a.png)
44 | 
45 | ![](media/4698fa83c1a4f0c5fdde96eccea1648b.png)
46 | 
47 | 拥有像GPT-2这样超级庞大的语言模型，可以用它进行语言建模。并在基准测试上运行这种语言模型。如果想评估语言模型，首先在Penn
48 | Treebank上训练，然后评估这个组合。在这种情况下，GPT-2只是因为看过这么多的文字并且是如此大的模型，优于其他的先前，即使它没有那些数据也能在不同的语言基准中测试。
49 | 
50 | ![](media/52313442e024b1d78d8f7fe85bb1c83c.png)
51 | 
52 | 零射击学习只是尝试完成一项任务，而不需要对它进行训练。通过设计一个提示需要输入的语言模型，然后让它从那里生成，希望它生成与你想要解决的任务相关的语言。
53 | 
54 | ![](media/24daaa12a59b6f4adf3446cc556c055c.png)
55 | 
56 | x轴是对数缩放的模型大小，y轴是精确度，虚线基本上对应于这些任务的现有工作。
57 | 
58 | ![](media/b3d54f5f91d45e47215ce3a4a8219dbd.png)
59 | 
60 | 通常在NLP的历史中，如果想将一种世界知识带入一个NLP系统，你需要一个类似于事实的大数据库，它仍然可以通过阅读大量文本而无需明确地获取一些世界知识将这些知识付诸于模型。
61 | 


--------------------------------------------------------------------------------
/docs/lecture 20/media/00594abd6ebca941e5e33b1d1ecf7242.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/00594abd6ebca941e5e33b1d1ecf7242.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/0c13b354bd2f25497ff75e7433a8db0b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/0c13b354bd2f25497ff75e7433a8db0b.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/16d1c888725fe434f06485e4b8a0a47e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/16d1c888725fe434f06485e4b8a0a47e.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/1e5d555592b54279f9571143f2a1bc1a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/1e5d555592b54279f9571143f2a1bc1a.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/1fc8b424ade235725a60af4f056558b7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/1fc8b424ade235725a60af4f056558b7.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/22fd011d4ca155b84b651df09e7aa675.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/22fd011d4ca155b84b651df09e7aa675.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/24daaa12a59b6f4adf3446cc556c055c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/24daaa12a59b6f4adf3446cc556c055c.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/29d3c9fa71562a0d68ed4568cfa3b8fc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/29d3c9fa71562a0d68ed4568cfa3b8fc.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/3017ff5e27e32edc63d73a563f988a25.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/3017ff5e27e32edc63d73a563f988a25.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/36429d6dd9868d07d9d92cc712d26690.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/36429d6dd9868d07d9d92cc712d26690.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/4698fa83c1a4f0c5fdde96eccea1648b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/4698fa83c1a4f0c5fdde96eccea1648b.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/49490589f03b3454efff6aceed677d59.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/49490589f03b3454efff6aceed677d59.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/49a12531f44200f697663ba7da938af0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/49a12531f44200f697663ba7da938af0.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/52313442e024b1d78d8f7fe85bb1c83c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/52313442e024b1d78d8f7fe85bb1c83c.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/5b6b327867947bbaacf320e8183363b8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/5b6b327867947bbaacf320e8183363b8.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/5ccf67442a3c6f56f661544b9718d5be.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/5ccf67442a3c6f56f661544b9718d5be.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/5e21808db8af3416351121f745477256.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/5e21808db8af3416351121f745477256.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/707177c98672fbd53c9be53ac13cd78c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/707177c98672fbd53c9be53ac13cd78c.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/70bd9c1c9f1b8aeb64495385072364cf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/70bd9c1c9f1b8aeb64495385072364cf.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/7429800d68f22424891570eaecd970b3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/7429800d68f22424891570eaecd970b3.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/7d41ff65016bd11b9fbeb32918f13b0b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/7d41ff65016bd11b9fbeb32918f13b0b.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/863d03b91f0a5659e4f8e2a054936e45.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/863d03b91f0a5659e4f8e2a054936e45.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/87ba75b9ceeb74f88f5d92abbec5c1fb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/87ba75b9ceeb74f88f5d92abbec5c1fb.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/883a1f598728ca6deb4101b097107b21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/883a1f598728ca6deb4101b097107b21.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/8b074b58ce68a16643e0e3ba231ced83.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/8b074b58ce68a16643e0e3ba231ced83.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/94d73800652ad75fbe429812a903ebab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/94d73800652ad75fbe429812a903ebab.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/9a8bb5e3c768fa5db9036117cc7f75bb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/9a8bb5e3c768fa5db9036117cc7f75bb.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/a45da67f31b5f0894dd05ee257c7a457.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/a45da67f31b5f0894dd05ee257c7a457.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/a963ae4807b619f2b301b9566069f4bc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/a963ae4807b619f2b301b9566069f4bc.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/aa7868f8bd9dd5d3f8ebdd08e413d78c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/aa7868f8bd9dd5d3f8ebdd08e413d78c.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/b28cc037a458e62e70e28df3da4db868.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/b28cc037a458e62e70e28df3da4db868.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/b3d54f5f91d45e47215ce3a4a8219dbd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/b3d54f5f91d45e47215ce3a4a8219dbd.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/bfa990c95fe70d27430257291a8e5793.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/bfa990c95fe70d27430257291a8e5793.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/cc8b2e483b89e4e7f585053d9ae16901.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/cc8b2e483b89e4e7f585053d9ae16901.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/d8b5c15ef6c09b4e5e6221c08a77a117.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/d8b5c15ef6c09b4e5e6221c08a77a117.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/d976819e250653f03cf7a4a9439cac96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/d976819e250653f03cf7a4a9439cac96.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/ed979ece44c470f0e90d399c89168997.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/ed979ece44c470f0e90d399c89168997.png


--------------------------------------------------------------------------------
/docs/lecture 20/media/f8a9cc51dd30a1550b288b3304e510a4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apachecn/stanford-cs224n-notes-zh/8725301180de9719f10a4241e29e00df9ee55b1d/docs/lecture 20/media/f8a9cc51dd30a1550b288b3304e510a4.png


--------------------------------------------------------------------------------
/styles/ebook.css:
--------------------------------------------------------------------------------
  1 | /* GitHub stylesheet for MarkdownPad (http://markdownpad.com) */
  2 | /* Author: Nicolas Hery - http://nicolashery.com */
  3 | /* Version: b13fe65ca28d2e568c6ed5d7f06581183df8f2ff */
  4 | /* Source: https://github.com/nicolahery/markdownpad-github */
  5 | 
  6 | /* RESET
  7 | =============================================================================*/
  8 | 
  9 | html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video {
 10 |   margin: 0;
 11 |   padding: 0;
 12 |   border: 0;
 13 | }
 14 | 
 15 | /* BODY
 16 | =============================================================================*/
 17 | 
 18 | body {
 19 |   font-family: Helvetica, arial, freesans, clean, sans-serif;
 20 |   font-size: 14px;
 21 |   line-height: 1.6;
 22 |   color: #333;
 23 |   background-color: #fff;
 24 |   padding: 20px;
 25 |   max-width: 960px;
 26 |   margin: 0 auto;
 27 | }
 28 | 
 29 | body>*:first-child {
 30 |   margin-top: 0 !important;
 31 | }
 32 | 
 33 | body>*:last-child {
 34 |   margin-bottom: 0 !important;
 35 | }
 36 | 
 37 | /* BLOCKS
 38 | =============================================================================*/
 39 | 
 40 | p, blockquote, ul, ol, dl, table, pre {
 41 |   margin: 15px 0;
 42 | }
 43 | 
 44 | /* HEADERS
 45 | =============================================================================*/
 46 | 
 47 | h1, h2, h3, h4, h5, h6 {
 48 |   margin: 20px 0 10px;
 49 |   padding: 0;
 50 |   font-weight: bold;
 51 |   -webkit-font-smoothing: antialiased;
 52 | }
 53 | 
 54 | h1 tt, h1 code, h2 tt, h2 code, h3 tt, h3 code, h4 tt, h4 code, h5 tt, h5 code, h6 tt, h6 code {
 55 |   font-size: inherit;
 56 | }
 57 | 
 58 | h1 {
 59 |   font-size: 24px;
 60 |   border-bottom: 1px solid #ccc;
 61 |   color: #000;
 62 | }
 63 | 
 64 | h2 {
 65 |   font-size: 18px;
 66 |   color: #000;
 67 | }
 68 | 
 69 | h3 {
 70 |   font-size: 14px;
 71 | }
 72 | 
 73 | h4 {
 74 |   font-size: 14px;
 75 | }
 76 | 
 77 | h5 {
 78 |   font-size: 14px;
 79 | }
 80 | 
 81 | h6 {
 82 |   color: #777;
 83 |   font-size: 14px;
 84 | }
 85 | 
 86 | body>h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h4:first-child, body>h5:first-child, body>h6:first-child {
 87 |   margin-top: 0;
 88 |   padding-top: 0;
 89 | }
 90 | 
 91 | a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 {
 92 |   margin-top: 0;
 93 |   padding-top: 0;
 94 | }
 95 | 
 96 | h1+p, h2+p, h3+p, h4+p, h5+p, h6+p {
 97 |   margin-top: 10px;
 98 | }
 99 | 
100 | /* LINKS
101 | =============================================================================*/
102 | 
103 | a {
104 |   color: #4183C4;
105 |   text-decoration: none;
106 | }
107 | 
108 | a:hover {
109 |   text-decoration: underline;
110 | }
111 | 
112 | /* LISTS
113 | =============================================================================*/
114 | 
115 | ul, ol {
116 |   padding-left: 30px;
117 | }
118 | 
119 | ul li > :first-child, 
120 | ol li > :first-child, 
121 | ul li ul:first-of-type, 
122 | ol li ol:first-of-type, 
123 | ul li ol:first-of-type, 
124 | ol li ul:first-of-type {
125 |   margin-top: 0px;
126 | }
127 | 
128 | ul ul, ul ol, ol ol, ol ul {
129 |   margin-bottom: 0;
130 | }
131 | 
132 | dl {
133 |   padding: 0;
134 | }
135 | 
136 | dl dt {
137 |   font-size: 14px;
138 |   font-weight: bold;
139 |   font-style: italic;
140 |   padding: 0;
141 |   margin: 15px 0 5px;
142 | }
143 | 
144 | dl dt:first-child {
145 |   padding: 0;
146 | }
147 | 
148 | dl dt>:first-child {
149 |   margin-top: 0px;
150 | }
151 | 
152 | dl dt>:last-child {
153 |   margin-bottom: 0px;
154 | }
155 | 
156 | dl dd {
157 |   margin: 0 0 15px;
158 |   padding: 0 15px;
159 | }
160 | 
161 | dl dd>:first-child {
162 |   margin-top: 0px;
163 | }
164 | 
165 | dl dd>:last-child {
166 |   margin-bottom: 0px;
167 | }
168 | 
169 | /* CODE
170 | =============================================================================*/
171 | 
172 | pre, code, tt {
173 |   font-size: 12px;
174 |   font-family: Consolas, "Liberation Mono", Courier, monospace;
175 | }
176 | 
177 | code, tt {
178 |   margin: 0 0px;
179 |   padding: 0px 0px;
180 |   white-space: nowrap;
181 |   border: 1px solid #eaeaea;
182 |   background-color: #f8f8f8;
183 |   border-radius: 3px;
184 | }
185 | 
186 | pre>code {
187 |   margin: 0;
188 |   padding: 0;
189 |   white-space: pre;
190 |   border: none;
191 |   background: transparent;
192 | }
193 | 
194 | pre {
195 |   background-color: #f8f8f8;
196 |   border: 1px solid #ccc;
197 |   font-size: 13px;
198 |   line-height: 19px;
199 |   overflow: auto;
200 |   padding: 6px 10px;
201 |   border-radius: 3px;
202 | }
203 | 
204 | pre code, pre tt {
205 |   background-color: transparent;
206 |   border: none;
207 | }
208 | 
209 | kbd {
210 |     -moz-border-bottom-colors: none;
211 |     -moz-border-left-colors: none;
212 |     -moz-border-right-colors: none;
213 |     -moz-border-top-colors: none;
214 |     background-color: #DDDDDD;
215 |     background-image: linear-gradient(#F1F1F1, #DDDDDD);
216 |     background-repeat: repeat-x;
217 |     border-color: #DDDDDD #CCCCCC #CCCCCC #DDDDDD;
218 |     border-image: none;
219 |     border-radius: 2px 2px 2px 2px;
220 |     border-style: solid;
221 |     border-width: 1px;
222 |     font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
223 |     line-height: 10px;
224 |     padding: 1px 4px;
225 | }
226 | 
227 | /* QUOTES
228 | =============================================================================*/
229 | 
230 | blockquote {
231 |   border-left: 4px solid #DDD;
232 |   padding: 0 15px;
233 |   color: #777;
234 | }
235 | 
236 | blockquote>:first-child {
237 |   margin-top: 0px;
238 | }
239 | 
240 | blockquote>:last-child {
241 |   margin-bottom: 0px;
242 | }
243 | 
244 | /* HORIZONTAL RULES
245 | =============================================================================*/
246 | 
247 | hr {
248 |   clear: both;
249 |   margin: 15px 0;
250 |   height: 0px;
251 |   overflow: hidden;
252 |   border: none;
253 |   background: transparent;
254 |   border-bottom: 4px solid #ddd;
255 |   padding: 0;
256 | }
257 | 
258 | /* TABLES
259 | =============================================================================*/
260 | 
261 | table th {
262 |   font-weight: bold;
263 | }
264 | 
265 | table th, table td {
266 |   border: 1px solid #ccc;
267 |   padding: 6px 13px;
268 | }
269 | 
270 | table tr {
271 |   border-top: 1px solid #ccc;
272 |   background-color: #fff;
273 | }
274 | 
275 | table tr:nth-child(2n) {
276 |   background-color: #f8f8f8;
277 | }
278 | 
279 | /* IMAGES
280 | =============================================================================*/
281 | 
282 | img {
283 |   max-width: 100%
284 | }


--------------------------------------------------------------------------------
/update.sh:
--------------------------------------------------------------------------------
1 | git add -A
2 | git commit -am "$(date "+%Y-%m-%d %H:%M:%S")"
3 | git push


--------------------------------------------------------------------------------