├── CS143 Compiler.md
├── CS143 text-srt
    ├── README.md
    ├── chinese
    │   ├── CHSEN_01-01-introduction-redo-correction.srt
    │   ├── CHSEN_01-02-structure-of-a-compiler-final.srt
    │   ├── CHSEN_01-03-the-economy-of-programming-languages.srt
    │   ├── CHSEN_02-01-cool-overview-final.srt
    │   ├── CHSEN_02-02-cool-example-ii-final.srt
    │   ├── CHSEN_02-03-cool-example-iii-final-correction.srt
    │   ├── CHSEN_03-01-A.srt
    │   ├── CHSEN_03-01-B+Lexical+Analysis.srt
    │   ├── CHSEN_03-02-lexical-analysis-examples-final.srt
    │   ├── CHSEN_03-03-A+Regular+Languages.srt
    │   ├── CHSEN_03-03-B.srt
    │   ├── CHSEN_03-04-formal-languages.srt
    │   ├── CHSEN_03-05-lexical-specifications-final-quizupdate.srt
    │   ├── CHSEN_04+02+finite+automata+part+1.srt
    │   ├── CHSEN_04+02+finite+automata+part+2.srt
    │   ├── CHSEN_04-01-lexical-specification.srt
    │   └── CHSEN_04-05-implementing-finite-automata-correction.srt
    └── english
    │   ├── 01-01-introduction-redo-correction.srt
    │   ├── 01-02-structure-of-a-compiler-final.srt
    │   ├── 01-03-the-economy-of-programming-languages.srt
    │   ├── 02-01-cool-overview-final.srt
    │   ├── 02-02-cool-example-ii-final.srt
    │   ├── 02-03-cool-example-iii-final-correction.srt
    │   ├── 03-01-A.srt
    │   ├── 03-01-B+Lexical+Analysis.srt
    │   ├── 03-02-lexical-analysis-examples-final.srt
    │   ├── 03-03-A+Regular+Languages.srt
    │   ├── 03-03-B.srt
    │   ├── 03-04-formal-languages.srt
    │   ├── 03-05-lexical-specifications-final-quizupdate.srt
    │   ├── 04+02+finite+automata+part+1.srt
    │   ├── 04+02+finite+automata+part+2.srt
    │   ├── 04-01-lexical-specification.srt
    │   ├── 04-03.srt
    │   ├── 04-04-nfa-to-dfa-quizupdate.srt
    │   ├── 04-05-implementing-finite-automata-correction.srt
    │   ├── 05-01-introduction-to-parsing.srt
    │   ├── 05-02-A+Context+Free+Grammars.srt
    │   ├── 05-02-B+Context+Free+Grammars.srt
    │   ├── 05-03-B+Derivations.srt
    │   ├── 05-03.srt
    │   ├── 05-04-A+Ambiguity.srt
    │   ├── 05-04-B.srt
    │   ├── 05-04-C+Ambiguity.srt
    │   ├── 06-01-error-handling.srt
    │   ├── 06-02-abstract-syntax-trees.srt
    │   ├── 06-03-recursive-descent-parsing.srt
    │   ├── 06-04-1-recursive-descent-limitations-04-1.srt
    │   ├── 06-04-recursive-descent-algorithm.srt
    │   ├── 06-05-A+Left+Recursion.srt
    │   ├── 06-05-B+Left+Recursion.srt
    │   ├── 07-01-B+Predictive+Parsing.srt
    │   ├── 07-01.srt
    │   ├── 07-02-first-sets.srt
    │   ├── 07-03-follow-sets.srt
    │   ├── 07-04-ll1-parsing-tables.srt
    │   ├── 07-05-B+Bottom-Up+Parsing.srt
    │   ├── 07-05.srt
    │   ├── 07-06-B+Shift-Reduce+Parsing.srt
    │   ├── 07-06.srt
    │   ├── 08-01-B+Handles.srt
    │   ├── 08-01.srt
    │   ├── 08-02-recognizing-handles.srt
    │   ├── 08-03-recognizing-viable-prefixes.srt
    │   ├── 08-04-valid-items.srt
    │   ├── 08-05-slr-parsing.srt
    │   ├── 08-06-slr-parsing-example.srt
    │   ├── 08-07-slr-improvements.srt
    │   ├── 08-08-slr-examples-correction.srt
    │   ├── 09-01-introduction-to-semantic-analysis.srt
    │   ├── 09-02-scope.srt
    │   ├── 09-03-symbol-tables.srt
    │   ├── 09-04-types.srt
    │   ├── 09-05-A+Type+Checking.srt
    │   ├── 09-05-B+Type+Checking.srt
    │   ├── 09-06-A+Type+Environments.srt
    │   ├── 09-06-B+Type+Environments.srt
    │   ├── 09-07-A+Subtyping.srt
    │   ├── 09-07-B+Subtyping.srt
    │   ├── 09-08-A+Typing+Methods.srt
    │   ├── 09-08-B+Typing+Methods.srt
    │   ├── 09-09-implementing-type-checking.srt
    │   ├── 10-01-A+Static+vs.+Dynamic+Typing.srt
    │   ├── 10-01-B+Static+vs.+Dynamic+Typing.srt
    │   ├── 10-02-self-type.srt
    │   ├── 10-03-A+Self+Type+Operations.srt
    │   ├── 10-03-B+Self+Type+Operations.srt
    │   ├── 10-04-self-type-usage.srt
    │   ├── 10-05-A+Self+Type+Checking.srt
    │   ├── 10-05-B+Self+Type+Checking.srt
    │   ├── 10-06-error-recovery.srt
    │   ├── 11-01-runtime-organization.srt
    │   ├── 11-02-A+Activations.srt
    │   ├── 11-02-B+Activations.srt
    │   ├── 11-03-activation-records.srt
    │   ├── 11-04-globals-and-heap.srt
    │   ├── 11-05-alignment.srt
    │   ├── 11-06-stack-machines.srt
    │   ├── 12-01-introduction-to-code-generation.srt
    │   ├── 12-02-A+Code+Generation+I.srt
    │   ├── 12-02-B+Code+Generation+I.srt
    │   ├── 12-03-A+Code+Generation+II.srt
    │   ├── 12-03-B+Code+Generation+II.srt
    │   ├── 12-04-code-generation-example.srt
    │   ├── 12-05-A+Temporaries.srt
    │   ├── 12-05-B+Temporaries.srt
    │   ├── 12-06-A+Object+Layout.srt
    │   ├── 12-06-B+Object+Layout.srt
    │   ├── 13-01-semantics-overview.srt
    │   ├── 13-02-operational-semantics.srt
    │   ├── 13-03-cool-semantics-i.srt
    │   ├── 13-04-A+Cool+Semantics+II.srt
    │   ├── 13-04-B+Cool+Semantics+II.srt
    │   ├── 14-01-intermediate-code.srt
    │   ├── 14-02-optimization-overview.srt
    │   ├── 14-03-local-optimization.srt
    │   ├── 14-04-peephole-optimization.srt
    │   ├── 15-01.srt
    │   ├── 15-02-constant-propagation.srt
    │   ├── 15-03-analysis-of-loops.srt
    │   ├── 15-04-orderings.srt
    │   ├── 15-05-A+Liveness+Analysis.srt
    │   ├── 15-05-B+Liveness+Analysis.srt
    │   ├── 16-01-register-allocation.srt
    │   ├── 16-02-A+Graph+Coloring.srt
    │   ├── 16-02-B+Graph+Coloring.srt
    │   ├── 16-03-A+Spilling.srt
    │   ├── 16-03-B+Spilling.srt
    │   ├── 16-04-managing-caches.srt
    │   ├── 17-01-automatic-memory-management.srt
    │   ├── 17-02-A+Mark+and+Sweep.srt
    │   ├── 17-02-B+Mark+and+Sweep.srt
    │   ├── 17-03-A+Stop+and+Copy.srt
    │   ├── 17-03-B+Stop+and+Copy.srt
    │   ├── 17-04-conservative-collection.srt
    │   ├── 17-05-A+Reference+Counting.srt
    │   ├── 17-05-B+Reference+Counting.srt
    │   ├── 18-01-java.srt
    │   ├── 18-02-java-arrays.srt
    │   ├── 18-03-java-exceptions.srt
    │   ├── 18-04-java-interfaces.srt
    │   ├── 18-05-java-coercions.srt
    │   ├── 18-06-java-threads.srt
    │   ├── 18-07-other-topics.srt
    │   ├── CHSEN_04-03.srt
    │   └── README.md
├── README.md
├── SSR-Android.apk
├── SSR-Windows.zip
├── fubabaxianjinliu.apk
├── src
    ├── c__.ps
    ├── demo.md
    └── gdbref.ps
├── student-dist.tar.gz
└── v2rayn.zip


/CS143 text-srt/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/CS143 text-srt/chinese/CHSEN_03-01-B+Lexical+Analysis.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,900 --> 00:00:06,420
 3 | 总而言之, 词法分析实现必须做两件事。
 4 | To summarize, lexical analysis implementation has to do two things. The first
 5 | 
 6 | 1
 7 | 00:00:06,420 --> 00:00:11,089
 8 | 作业是识别与令牌对应的输入中的子字符串。
 9 | job is to recognize the substrings in the input that correspond to tokens. And
10 | 
11 | 2
12 | 00:00:11,089 --> 00:00:16,690
13 | 这里有一些编译器的术语这些子字符串被称为lexemes
14 | here's a little bit of compiler lingo these substrings are called the lexemes. So
15 | 
16 | 3
17 | 00:00:16,690 --> 00:00:21,920
18 | 这个程序的单词叫做lexemes, 然后第二个任务是
19 | the words of the program are called the lexemes. And then the second job is at for
20 | 
21 | 4
22 | 00:00:21,920 --> 00:00:28,369
23 | 每个lexeme我们必须识别它的令牌类, 然后是词汇的输出
24 | each lexeme we have to identify its token class. And then the output of the lexical
25 | 
26 | 5
27 | 00:00:28,369 --> 00:00:35,369
28 | 分析器是一系列成对的, 它们是令牌类, lexing, 好的, 还有这个
29 | analyzer is a series of pairs which are the token class. And lexing, Okay, And this
30 | 
31 | 6
32 | 00:00:35,610 --> 00:00:39,750
33 | 整件事, 其中一种叫做令牌。
34 | whole thing, one of these pairsis called A token.
35 | 


--------------------------------------------------------------------------------
/CS143 text-srt/chinese/CHSEN_03-03-B.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,940 --> 00:00:05,480
 3 | 好了, 我们已经讲完了这段视频, 总结一下, 我们看了常规的
 4 | Well, we've come to the end of this video. And to summarize, we looked at regular
 5 | 
 6 | 1
 7 | 00:00:05,480 --> 00:00:11,400
 8 | 表达式, 用于定义常规语言。
 9 | expressions, which are used to define regular languages. And the regular
10 | 
11 | 2
12 | 00:00:11,400 --> 00:00:16,910
13 | 表达式是语法（这是我们写的表达式）
14 | expressions are syntax (that's the expression that we write down), and it
15 | 
16 | 3
17 | 00:00:16,910 --> 00:00:21,660
18 | 表示一组字符串这是常规语言这就是
19 | denotes a set of strings which is the regular language and that's the meaning of
20 | 
21 | 4
22 | 00:00:21,660 --> 00:00:26,870
23 | 正则表达式, 有五种正则表达式
24 | the regular expression. And there are five kinds of regular expressions in the
25 | 
26 | 5
27 | 00:00:26,870 --> 00:00:32,439
28 | 标准定义, 这是空字符串的表达式这是由
29 | standard definition. There's an expression for the empty string and that's denoted by
30 | 
31 | 6
32 | 00:00:32,439 --> 00:00:36,289
33 | 然后我们有了所有的字符串然后是3个字符串
34 | epsilon and then we have all the one character strings and then there are three
35 | 
36 | 7
37 | 00:00:36,289 --> 00:00:41,890
38 | 复合表达式——从其他规则构建新的正则表达式的方法
39 | compound expressions - ways of building new regular expressions from other regular
40 | 
41 | 8
42 | 00:00:41,890 --> 00:00:46,460
43 | 表达式——这些是联合、连接和迭代。
44 | expressions - and these are union, concatenation, and iteration.
45 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/03-01-B+Lexical+Analysis.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,900 --> 00:00:06,420
 3 | To summarize, lexical analysis implementation has to do two things. The first
 4 | 
 5 | 1
 6 | 00:00:06,420 --> 00:00:11,089
 7 | job is to recognize the substrings in the input that correspond to tokens. And
 8 | 
 9 | 2
10 | 00:00:11,089 --> 00:00:16,690
11 | here's a little bit of compiler lingo these substrings are called the lexemes. So
12 | 
13 | 3
14 | 00:00:16,690 --> 00:00:21,920
15 | the words of the program are called the lexemes. And then the second job is at for
16 | 
17 | 4
18 | 00:00:21,920 --> 00:00:28,369
19 | each lexeme we have to identify its token class. And then the output of the lexical
20 | 
21 | 5
22 | 00:00:28,369 --> 00:00:35,369
23 | analyzer is a series of pairs which are the token class. And lexing, Okay, And this
24 | 
25 | 6
26 | 00:00:35,610 --> 00:00:39,750
27 | whole thing, one of these pairsis called A token.
28 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/03-03-B.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,940 --> 00:00:05,480
 3 | Well, we've come to the end of this video. And to summarize, we looked at regular
 4 | 
 5 | 1
 6 | 00:00:05,480 --> 00:00:11,400
 7 | expressions, which are used to define regular languages. And the regular
 8 | 
 9 | 2
10 | 00:00:11,400 --> 00:00:16,910
11 | expressions are syntax (that's the expression that we write down), and it
12 | 
13 | 3
14 | 00:00:16,910 --> 00:00:21,660
15 | denotes a set of strings which is the regular language and that's the meaning of
16 | 
17 | 4
18 | 00:00:21,660 --> 00:00:26,870
19 | the regular expression. And there are five kinds of regular expressions in the
20 | 
21 | 5
22 | 00:00:26,870 --> 00:00:32,439
23 | standard definition. There's an expression for the empty string and that's denoted by
24 | 
25 | 6
26 | 00:00:32,439 --> 00:00:36,289
27 | epsilon and then we have all the one character strings and then there are three
28 | 
29 | 7
30 | 00:00:36,289 --> 00:00:41,890
31 | compound expressions - ways of building new regular expressions from other regular
32 | 
33 | 8
34 | 00:00:41,890 --> 00:00:46,460
35 | expressions - and these are union, concatenation, and iteration.
36 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/05-01-introduction-to-parsing.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,830 --> 00:00:08,580
  3 | In this video, we're going to transition from lexical analysis to parsing and talk
  4 | 
  5 | 1
  6 | 00:00:08,580 --> 00:00:15,580
  7 | a little bit about the relationship between those two compiler phases. We've
  8 | 
  9 | 2
 10 | 00:00:16,660 --> 00:00:20,560
 11 | already talked about regular languages and it's worth mentioning that these are the
 12 | 
 13 | 3
 14 | 00:00:20,560 --> 00:00:24,810
 15 | weakest formal languages that are widely used. But they have, of course, many
 16 | 
 17 | 4
 18 | 00:00:24,810 --> 00:00:30,150
 19 | applications, some of which we saw in previous videos. The difficulty with
 20 | 
 21 | 5
 22 | 00:00:30,150 --> 00:00:35,110
 23 | regular languages is that a lot of languages are simply not regular. And
 24 | 
 25 | 6
 26 | 00:00:35,110 --> 00:00:39,699
 27 | there's some pretty important languages that can't be expressed using regular
 28 | 
 29 | 7
 30 | 00:00:39,699 --> 00:00:44,199
 31 | expressions or finite automata. So let's consider this language which is the set of
 32 | 
 33 | 8
 34 | 00:00:44,199 --> 00:00:50,269
 35 | all balanced parentheses. So some elements of this language would be at the string
 36 | 
 37 | 9
 38 | 00:00:50,269 --> 00:00:56,210
 39 | one open-paren, one close-paren, two open-parens, two close-parens, three
 40 | 
 41 | 10
 42 | 00:00:56,210 --> 00:01:02,290
 43 | open-parens, three close-parens and so on. And, you can imagine that this is actually
 44 | 
 45 | 11
 46 | 00:01:02,290 --> 00:01:05,580
 47 | something that's fairly representative of lots of programming language construct. So
 48 | 
 49 | 12
 50 | 00:01:05,580 --> 00:01:12,580
 51 | for example, any kind of nested arithmetic expression would fit into this class but
 52 | 
 53 | 13
 54 | 00:01:13,470 --> 00:01:20,470
 55 | also things like nested if and else's will have this category, this characteristic.
 56 | 
 57 | 14
 58 | 00:01:21,640 --> 00:01:26,380
 59 | And here with the nested [inaudible] it's just the f statement, the functions like
 60 | 
 61 | 15
 62 | 00:01:26,380 --> 00:01:32,490
 63 | an open-paren. Not every languages like cool which has the explicit closing fee as
 64 | 
 65 | 16
 66 | 00:01:32,490 --> 00:01:36,450
 67 | well but they're implicit in many languages and so there are lots of nesting
 68 | 
 69 | 17
 70 | 00:01:36,450 --> 00:01:43,450
 71 | structure in programming languages constructs and those cannot be handled by
 72 | 
 73 | 18
 74 | 00:01:43,690 --> 00:01:50,040
 75 | regular expressions. So this raises the question of what the regular languages can
 76 | 
 77 | 19
 78 | 00:01:50,040 --> 00:01:56,430
 79 | express. And, why they aren't sufficient for recognizing arbitrary nesting
 80 | 
 81 | 20
 82 | 00:01:56,430 --> 00:02:01,240
 83 | structure. So we can illustrate the limitations of regular languages and
 84 | 
 85 | 21
 86 | 00:02:01,240 --> 00:02:05,860
 87 | Finite Automaton by looking a simple two state machine. So let's consider this
 88 | 
 89 | 22
 90 | 00:02:05,860 --> 00:02:10,330
 91 | machine. We have one we have start state and then the other state is the accepting
 92 | 
 93 | 23
 94 | 00:02:10,330 --> 00:02:16,700
 95 | state. And, we'll have this machine. Just be a machine that we've already seen
 96 | 
 97 | 24
 98 | 00:02:16,700 --> 00:02:23,700
 99 | actually and it'll recognize strings with odd numbers of 1's. So if we see a one and
100 | 
101 | 25
102 | 00:02:25,620 --> 00:02:29,920
103 | we're in the start state, we move. We now see an odd number of 1's. We move to the
104 | 
105 | 26
106 | 00:02:29,920 --> 00:02:32,840
107 | accepting state and we stay there until we see another one. In which case, we've seen
108 | 
109 | 27
110 | 00:02:32,840 --> 00:02:35,950
111 | even number of 1's and then we're in the start state. So whenever we see an odd
112 | 
113 | 28
114 | 00:02:35,950 --> 00:02:39,540
115 | number of 1's, we're in the final state. Whenever we see an even number of 1's,
116 | 
117 | 29
118 | 00:02:39,540 --> 00:02:44,250
119 | we're in the start state. And if we feed this a fairly long string of 1's, let's,
120 | 
121 | 30
122 | 00:02:44,250 --> 00:02:49,240
123 | let's select only seven 1's in it. Then what's it going to do is going to go back
124 | 
125 | 31
126 | 00:02:49,240 --> 00:02:52,890
127 | and forth and back and forth between these states. It's gonna wind up in the final
128 | 
129 | 32
130 | 00:02:52,890 --> 00:02:57,160
131 | state when it gets to the last one so it'll accept but notice that it doesn't
132 | 
133 | 33
134 | 00:02:57,160 --> 00:03:01,950
135 | know how many times it's been to that final state. It doesn't remember the
136 | 
137 | 34
138 | 00:03:01,950 --> 00:03:04,880
139 | length of the string; it doesn't have any way of counting how many characters the
140 | 
141 | 35
142 | 00:03:04,880 --> 00:03:11,880
143 | string had in it. And in fact, all I can count here is the parity. So in general
144 | 
145 | 36
146 | 00:03:12,860 --> 00:03:19,580
147 | Finite Automata can really only express things where you can count modulus on k.
148 | 
149 | 37
150 | 00:03:19,580 --> 00:03:26,580
151 | So they can count mod k for some k where k is the number of states in the machine.
152 | 
153 | 38
154 | 00:03:27,349 --> 00:03:30,930
155 | And so, you know if I have pre-test the machine, I can keep track of whether the
156 | 
157 | 39
158 | 00:03:30,930 --> 00:03:35,629
159 | string length is divisible by three or some other similar property but I can't do
160 | 
161 | 40
162 | 00:03:35,629 --> 00:03:42,629
163 | things like count to an arbitrary i so if I need to recognize a language that
164 | 
165 | 41
166 | 00:03:42,849 --> 00:03:46,510
167 | requires counting arbitrarily high like recognizing all strings of balance
168 | 
169 | 42
170 | 00:03:46,510 --> 00:03:53,510
171 | parentheses, we can't do that with the finite set of states. So what does a
172 | 
173 | 43
174 | 00:03:53,720 --> 00:03:58,940
175 | parser do, it takes the sequence of tokens as input from the lexer and it produces a
176 | 
177 | 44
178 | 00:03:58,940 --> 00:04:05,940
179 | parse tree of the program. And for example in cool, here's an input expression that
180 | 
181 | 45
182 | 00:04:08,700 --> 00:04:14,510
183 | is input to the lexical analyzer. The lexical analyzer produces this sequence of
184 | 
185 | 46
186 | 00:04:14,510 --> 00:04:19,479
187 | tokens as its output. That's the input to the parser. Then the parser produces this
188 | 
189 | 47
190 | 00:04:19,478 --> 00:04:24,469
191 | parse tree where the nesting structure has been made explicit. So, we have the, if
192 | 
193 | 48
194 | 00:04:24,469 --> 00:04:28,460
195 | and else and then the three components: the predicate, the then branch and the
196 | 
197 | 49
198 | 00:04:28,460 --> 00:04:35,460
199 | else branch of the, if To summarize, the lexer takes a string of character as input
200 | 
201 | 50
202 | 00:04:36,150 --> 00:04:41,289
203 | and produces a string of tokens as output. That string of tok ens is the input to the
204 | 
205 | 51
206 | 00:04:41,289 --> 00:04:47,300
207 | parser which takes a string of tokens and produces a Parse Tree of the program. And
208 | 
209 | 52
210 | 00:04:47,300 --> 00:04:50,719
211 | it's worth mentioning a couple of thing here. First of all, sometimes the Parse
212 | 
213 | 53
214 | 00:04:50,719 --> 00:04:57,430
215 | Tree is only implicit. So the, a compiler may never actually build the full Parse
216 | 
217 | 54
218 | 00:04:57,430 --> 00:05:02,300
219 | Tree. We'll talk more about that later. Many compilers do build an explicit parse
220 | 
221 | 55
222 | 00:05:02,300 --> 00:05:06,699
223 | tree but many do not. The other thing that's worth mentioning is that there are
224 | 
225 | 56
226 | 00:05:06,699 --> 00:05:11,949
227 | compilers that do combine these two phases into one where everything is done by the
228 | 
229 | 57
230 | 00:05:11,949 --> 00:05:16,029
231 | parser. So, the parsing technology is generally powerful enough to express
232 | 
233 | 58
234 | 00:05:16,029 --> 00:05:21,319
235 | lexical analysis in addition to parsing. But most compilers still divide up the
236 | 
237 | 59
238 | 00:05:21,319 --> 00:05:25,729
239 | work this way because regular expressions are such a good match for lexical analysis
240 | 
241 | 60
242 | 00:05:25,729 --> 00:05:28,279
243 | and then the parsing is handled separately.
244 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/05-02-B+Context+Free+Grammars.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:01,490 --> 00:00:04,870
 3 | Context-free grammars are a big step towards being able to say what we want in
 4 | 
 5 | 1
 6 | 00:00:04,870 --> 00:00:09,389
 7 | a parser but, we still need some other things. First of all, a context-free
 8 | 
 9 | 2
10 | 00:00:09,389 --> 00:00:15,410
11 | grammar at least as we define it so far, just gives us a yes or no answer. Yes
12 | 
13 | 3
14 | 00:00:15,410 --> 00:00:18,650
15 | something, yes a string is in the language of the Context-free grammar or no it is
16 | 
17 | 4
18 | 00:00:18,650 --> 00:00:24,130
19 | not. We also need a method for building a Parse Tree at the input. So in those cases
20 | 
21 | 5
22 | 00:00:24,130 --> 00:00:27,269
23 | where it is on the language, we need to know how it's in the language. We need the
24 | 
25 | 6
26 | 00:00:27,269 --> 00:00:32,930
27 | actual Parse Tree not just yes or no. In the cases where the string is not in the
28 | 
29 | 7
30 | 00:00:32,930 --> 00:00:35,860
31 | language, we have to be able to handle errors gracefully and give some kind of
32 | 
33 | 8
34 | 00:00:35,860 --> 00:00:40,400
35 | feedback to the programmer so we need a method for doing that. And finally if we
36 | 
37 | 9
38 | 00:00:40,400 --> 00:00:44,050
39 | have these two things we need an actual implementation of them in order to
40 | 
41 | 10
42 | 00:00:44,050 --> 00:00:50,340
43 | actually implement context-free grammars. One last comment before we wrap up this
44 | 
45 | 11
46 | 00:00:50,340 --> 00:00:54,750
47 | video. The form of the context-free grammar can be important. Tools are often
48 | 
49 | 12
50 | 00:00:54,750 --> 00:01:00,010
51 | sensitive to the particular you write the grammar and while there are many ways to
52 | 
53 | 13
54 | 00:01:00,010 --> 00:01:04,439
55 | write a grammar for the same language, only some of them may be accepted by the
56 | 
57 | 14
58 | 00:01:04,438 --> 00:01:08,770
59 | tools. And as we'll see there are cases where it's necessary to modify the grammar
60 | 
61 | 15
62 | 00:01:08,770 --> 00:01:13,000
63 | in order to get the tools to accept it. This happens actually sometimes as well
64 | 
65 | 16
66 | 00:01:13,000 --> 00:01:17,329
67 | with regular expressions but it's much less common. So normally for most regular
68 | 
69 | 17
70 | 00:01:17,329 --> 00:01:21,890
71 | expressions you would want to write the tools would be able to digest them. That's
72 | 
73 | 18
74 | 00:01:21,890 --> 00:01:26,770
75 | fine. That's not also true. That's not true of an arbitrary context-free grammar.
76 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/05-03-B+Derivations.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,880 --> 00:00:04,400
 3 | So to summarize, we're not just interested in whether a string is in
 4 | 
 5 | 1
 6 | 00:00:04,400 --> 00:00:09,110
 7 | the language of a particular context-free grammar, we need to have a Parse Tree for
 8 | 
 9 | 2
10 | 00:00:09,110 --> 00:00:15,290
11 | that string. And, a derivation defines a Parse Tree but it turns out that one Parse
12 | 
13 | 3
14 | 00:00:15,290 --> 00:00:20,509
15 | Tree in general has many derivations and in particular, we're interested in the
16 | 
17 | 4
18 | 00:00:20,509 --> 00:00:24,919
19 | leftmost and rightmost derivations. These are the two kinds of derivations that are
20 | 
21 | 5
22 | 00:00:24,919 --> 00:00:27,259
23 | important in parser implementations.
24 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/05-04-A+Ambiguity.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,780 --> 00:00:07,200
  3 | In this video we're going to talk about ambiguous context free grammars in
  4 | 
  5 | 1
  6 | 00:00:07,200 --> 00:00:14,200
  7 | programming languages and what to do about them. We'll begin by looking at our
  8 | 
  9 | 2
 10 | 00:00:16,120 --> 00:00:21,670
 11 | favorite grammar for expressions over + and and identifiers and we'll just look
 12 | 
 13 | 3
 14 | 00:00:21,670 --> 00:00:28,670
 15 | at the string id id + id. Now it turns out that this particular string and let's
 16 | 
 17 | 4
 18 | 00:00:29,230 --> 00:00:36,230
 19 | write it down one more time, id id + id. This string has two parse tree using this
 20 | 
 21 | 5
 22 | 00:00:38,429 --> 00:00:43,409
 23 | grammar. Let's do the Parse tree on the left first. We begin with the start symbol
 24 | 
 25 | 6
 26 | 00:00:43,409 --> 00:00:48,929
 27 | e and the first production in this derivation that gives us this Parse tree
 28 | 
 29 | 7
 30 | 00:00:48,929 --> 00:00:55,929
 31 | must be that e goes to e + e. E + e And then we replace the left most e by e e.
 32 | 
 33 | 8
 34 | 00:01:02,199 --> 00:01:07,950
 35 | We use the production e goes to e e and we still have the plus e left over and at
 36 | 
 37 | 9
 38 | 00:01:07,950 --> 00:01:11,060
 39 | this point you can see that we're going to get this parse tree. We've done with those
 40 | 
 41 | 10
 42 | 00:01:11,060 --> 00:01:16,570
 43 | two productions. We have done this much. The construction of the parse tree and the
 44 | 
 45 | 11
 46 | 00:01:16,570 --> 00:01:20,369
 47 | rest of productions are just generating these id's. So that's a three more
 48 | 
 49 | 12
 50 | 00:01:20,369 --> 00:01:27,369
 51 | productions and we can see that, you know if we do those we'll get id id + id no
 52 | 
 53 | 13
 54 | 00:01:27,990 --> 00:01:33,810
 55 | problem, alright. So now let's switch and do the derivation on the right or excuse
 56 | 
 57 | 14
 58 | 00:01:33,810 --> 00:01:38,930
 59 | me, the parse tree on the right so this begins also with e. But this time we use
 60 | 
 61 | 15
 62 | 00:01:38,930 --> 00:01:44,350
 63 | the production e goes to e e first, all right? And now we're gonna replace the
 64 | 
 65 | 16
 66 | 00:01:44,350 --> 00:01:51,350
 67 | right most e. By e goes to e + e so we have e e + e and now we've, with those
 68 | 
 69 | 17
 70 | 00:01:55,560 --> 00:02:00,200
 71 | two productions we've done this portion of the parse tree and once again with three
 72 | 
 73 | 18
 74 | 00:02:00,200 --> 00:02:07,200
 75 | more productions we can get to id id + id so there you can see we've got two
 76 | 
 77 | 19
 78 | 00:02:13,450 --> 00:02:20,239
 79 | derivations. That produced two distinct Parse trees. And just to be completely
 80 | 
 81 | 20
 82 | 00:02:20,239 --> 00:02:24,459
 83 | clear about this, in this case we're getting two different Parse trees. Each of
 84 | 
 85 | 21
 86 | 00:02:24,459 --> 00:02:29,239
 87 | these derivation, each of these Parse trees has many derivations. Each Parse
 88 | 
 89 | 22
 90 | 00:02:29,239 --> 00:02:33,370
 91 | tree has a left most derivation, a right most derivation and many other
 92 | 
 93 | 23
 94 | 00:02:33,370 --> 00:02:38,459
 95 | derivations. This situation is something different. Here we have two derivations
 96 | 
 97 | 24
 98 | 00:02:38,459 --> 00:02:43,480
 99 | that yield completely different Parse trees and that is the sign or the
100 | 
101 | 25
102 | 00:02:43,480 --> 00:02:48,099
103 | definition of an ambiguous grammar. So a grammar is ambiguous if it has more than
104 | 
105 | 26
106 | 00:02:48,099 --> 00:02:54,400
107 | one Parse tree for some string. And another way of saying the same thing is
108 | 
109 | 27
110 | 00:02:54,400 --> 00:02:59,110
111 | that there is more than one right most or left most derivation for some string. So
112 | 
113 | 28
114 | 00:02:59,110 --> 00:03:03,400
115 | if some string has two right most derivations or more or two left
116 | 
117 | 29
118 | 00:03:03,400 --> 00:03:08,810
119 | derivations or more then the, that string will have two distinct parse trees and
120 | 
121 | 30
122 | 00:03:08,810 --> 00:03:15,810
123 | that grammar will be ambiguous. Ambiguity is bad. If you have multiple parse trees
124 | 
125 | 31
126 | 00:03:17,230 --> 00:03:20,819
127 | for some program then that essentially means that you're leaving it up to the
128 | 
129 | 32
130 | 00:03:20,819 --> 00:03:25,569
131 | compiler to pick which of those two possible interpretations of the program
132 | 
133 | 33
134 | 00:03:25,569 --> 00:03:30,790
135 | you want it to generate code for and that's not a good idea. We don't like to
136 | 
137 | 34
138 | 00:03:30,790 --> 00:03:34,189
139 | have ambiguity in our programming languages and leave decisions about what
140 | 
141 | 35
142 | 00:03:34,189 --> 00:03:36,110
143 | the program means up to the compiler.
144 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/05-04-C+Ambiguity.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,439 --> 00:00:06,050
  3 | Now you might think that the unambiguous if, then, else grammar is complicated and hard
  4 | 
  5 | 1
  6 | 00:00:06,050 --> 00:00:10,959
  7 | to understand and frankly I would have to agree with you. Unfortunately, it's
  8 | 
  9 | 2
 10 | 00:00:10,959 --> 00:00:15,699
 11 | impossible to automatically convert an unambiguous grammar to an ambiguous one,
 12 | 
 13 | 3
 14 | 00:00:15,699 --> 00:00:19,190
 15 | so we have to do these conversions by hand. If we're going to have unambiguous
 16 | 
 17 | 4
 18 | 00:00:19,190 --> 00:00:23,020
 19 | grammars, we have t o figure out how to write them by hand and that does result
 20 | 
 21 | 5
 22 | 00:00:23,020 --> 00:00:27,820
 23 | insignificantly more complex grammars and grammars that are hard to read and
 24 | 
 25 | 6
 26 | 00:00:27,820 --> 00:00:33,160
 27 | maintain than if we use the, well more straightforward ambiguous grammars. A
 28 | 
 29 | 7
 30 | 00:00:33,160 --> 00:00:38,670
 31 | possibility is to we'll try to somehow live with ambiguity cuz that will allow us
 32 | 
 33 | 8
 34 | 00:00:38,670 --> 00:00:43,899
 35 | more depth natural definitions. And, but then we would need some disambiguation
 36 | 
 37 | 9
 38 | 00:00:43,899 --> 00:00:48,440
 39 | mechanism. We need some way to tell which Parse tree we want when more than one
 40 | 
 41 | 10
 42 | 00:00:48,440 --> 00:00:55,229
 43 | Parse tree is possible. In fact, most practical parsing tools adopt the second
 44 | 
 45 | 11
 46 | 00:00:55,229 --> 00:00:58,989
 47 | approach. So, instead of rewriting the grammar, we used the more natural
 48 | 
 49 | 12
 50 | 00:00:58,989 --> 00:01:03,690
 51 | ambiguous grammar and the tool provide some kind of disambiguating declarations.
 52 | 
 53 | 13
 54 | 00:01:03,690 --> 00:01:09,070
 55 | The most popular form of disambiguating declarations are precedence and
 56 | 
 57 | 14
 58 | 00:01:09,070 --> 00:01:16,070
 59 | associativity declarations. So, here is a natural grammar for plus over the integers
 60 | 
 61 | 15
 62 | 00:01:16,320 --> 00:01:21,040
 63 | and this is ambiguous even just with a single infix operation, we can get
 64 | 
 65 | 16
 66 | 00:01:21,040 --> 00:01:25,650
 67 | ambiguity because this grammar doesn't tell us whether plus is left associative
 68 | 
 69 | 17
 70 | 00:01:25,650 --> 00:01:30,840
 71 | or right associative. And. So, simple solution here is to have associativity
 72 | 
 73 | 18
 74 | 00:01:30,840 --> 00:01:35,600
 75 | declarations. So here, we declare plus to be left associative and this is the
 76 | 
 77 | 19
 78 | 00:01:35,600 --> 00:01:41,360
 79 | notation used in Bison. So, Bison is a particular tool and a present left plus
 80 | 
 81 | 20
 82 | 00:01:41,360 --> 00:01:46,950
 83 | declares that plus is a left-associative operation and so that would rule out this
 84 | 
 85 | 21
 86 | 00:01:46,950 --> 00:01:51,820
 87 | particular Parse tree over here. Here [inaudible] more complex grammar
 88 | 
 89 | 22
 90 | 00:01:51,820 --> 00:01:56,770
 91 | essentially the grammar we began with the, at the start of this video. Here we have
 92 | 
 93 | 23
 94 | 00:01:56,770 --> 00:02:00,920
 95 | plus and times over the integers and again this grammar is ambiguous because it
 96 | 
 97 | 24
 98 | 00:02:00,920 --> 00:02:05,960
 99 | doesn't say what the precedence of times is with respect to plus. And the solution
100 | 
101 | 25
102 | 00:02:05,960 --> 00:02:11,830
103 | for this is to have multiple associativity declarations. We declare plus to be left
104 | 
105 | 26
106 | 00:02:11,830 --> 00:02:16,730
107 | associative and we declare times to be left associative and then the precedence
108 | 
109 | 27
110 | 00:02:16,730 --> 00:02:21,230
111 | between plus and times is given by the order. So the fact here that times appears
112 | 
113 | 28
114 | 00:02:21,230 --> 00:02:28,230
115 | after. Plus means that times has a higher precedence than plus. Just one word of
116 | 
117 | 29
118 | 00:02:33,760 --> 00:02:37,810
119 | caution, These declarations are called associativity and precedence declarations
120 | 
121 | 30
122 | 00:02:37,810 --> 00:02:42,170
123 | but that's not what's quite what's going on inside the parser. The parser doesn't
124 | 
125 | 31
126 | 00:02:42,170 --> 00:02:45,750
127 | really understand about associativity and precedence. Instead, these declarations
128 | 
129 | 32
130 | 00:02:45,750 --> 00:02:50,560
131 | tell it to make certain kinds of moves in certain situations. We won't really be
132 | 
133 | 33
134 | 00:02:50,560 --> 00:02:54,480
135 | able to explain this until we get much further into parsing technology, but just
136 | 
137 | 34
138 | 00:02:54,480 --> 00:02:59,180
139 | be aware that we have to be a little bit cautious in using these declarations.
140 | 
141 | 35
142 | 00:02:59,180 --> 00:03:02,480
143 | Usually they behave like associativity and precedence in the way you expect but there
144 | 
145 | 36
146 | 00:03:02,480 --> 00:03:07,709
147 | are some situations in which they will cause Rising behavior And is important
148 | 
149 | 37
150 | 00:03:07,709 --> 00:03:11,590
151 | that you check the behavior of your grammar after you add these declarations.
152 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/06-02-abstract-syntax-trees.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,799 --> 00:00:09,030
  3 | In this video, we're gonna talk about the core data structure used in compilers, the
  4 | 
  5 | 1
  6 | 00:00:09,030 --> 00:00:16,030
  7 | abstract syntax tree. To briefly review, a parser traces the derivation of a sequence
  8 | 
  9 | 2
 10 | 00:00:18,320 --> 00:00:23,050
 11 | of tokens but this by itself Is not all that useful to the compiler because the
 12 | 
 13 | 3
 14 | 00:00:23,050 --> 00:00:27,409
 15 | rest of the compiler needs some representation of the program. It needs an
 16 | 
 17 | 4
 18 | 00:00:27,409 --> 00:00:31,529
 19 | actual data structure that tells it what the operations are in the program and how
 20 | 
 21 | 5
 22 | 00:00:31,529 --> 00:00:36,100
 23 | they're put together. Well, we know one such data structure is called a Parse Tree
 24 | 
 25 | 6
 26 | 00:00:36,100 --> 00:00:41,010
 27 | but it turns out that a Parse Tree really isn't what we wanted to work on. Instead,
 28 | 
 29 | 7
 30 | 00:00:41,010 --> 00:00:45,429
 31 | we wanted to work on something called an Abstract Syntax Tree. And the Abstract
 32 | 
 33 | 8
 34 | 00:00:45,429 --> 00:00:50,789
 35 | Syntax Tree is really just the Parse Tree but with some details ignored. We have
 36 | 
 37 | 9
 38 | 00:00:50,789 --> 00:00:55,129
 39 | abstracted a way From some of the details of the Parse Tree. And here's an
 40 | 
 41 | 10
 42 | 00:00:55,129 --> 00:01:02,129
 43 | abbreviation that you'll see, ASTs stand for Abstract Syntax Tree. So, let's look
 44 | 
 45 | 11
 46 | 00:01:02,199 --> 00:01:08,590
 47 | at the grammar. Here's the grammar for plus expressions over the integers and we
 48 | 
 49 | 12
 50 | 00:01:08,590 --> 00:01:14,640
 51 | also parenthesize expressions. And here's a string and after lexical analysis, what
 52 | 
 53 | 13
 54 | 00:01:14,640 --> 00:01:19,360
 55 | do we have? Well, we've got a sequence of tokens again with their associated lexemes
 56 | 
 57 | 14
 58 | 00:01:19,360 --> 00:01:24,690
 59 | telling us what the actual strings were. And that gets past into the parser and
 60 | 
 61 | 15
 62 | 00:01:24,690 --> 00:01:30,130
 63 | then we build a parse tree. And here's a parse tree for that expression. Now, if
 64 | 
 65 | 16
 66 | 00:01:30,130 --> 00:01:34,470
 67 | it's expressed that this representation, the parse tree is actually perfectly
 68 | 
 69 | 17
 70 | 00:01:34,470 --> 00:01:39,330
 71 | adequate for compilation. We could do our compiler using the parse tree. This is a
 72 | 
 73 | 18
 74 | 00:01:39,330 --> 00:01:43,970
 75 | faithful representation of the program. The problem is that it would be quite
 76 | 
 77 | 19
 78 | 00:01:43,970 --> 00:01:48,340
 79 | inconvenient to do that and to see this, it only point out some features of the
 80 | 
 81 | 20
 82 | 00:01:48,340 --> 00:01:52,470
 83 | parse tree. First of all you can see if the parse tree is quite robust so for
 84 | 
 85 | 21
 86 | 00:01:52,470 --> 00:01:58,680
 87 | example we have here a node e and it has only one child. So when there's only one
 88 | 
 89 | 22
 90 | 00:01:58,680 --> 00:02:03,500
 91 | successor of the, of the node, what is that really doing for us? Well, we don't
 92 | 
 93 | 23
 94 | 00:02:03,500 --> 00:02:07,880
 95 | really need the e at all, we could just put the, The five right here and, and make
 96 | 
 97 | 24
 98 | 00:02:07,880 --> 00:02:13,450
 99 | the tree smaller and similarly for the other single successor nodes. Furthermore
100 | 
101 | 25
102 | 00:02:13,450 --> 00:02:16,840
103 | these parentheses h ere, well these are very important in parsing because they
104 | 
105 | 26
106 | 00:02:16,840 --> 00:02:22,810
107 | show the association of, of this, of the arguments with respect to these two plus
108 | 
109 | 27
110 | 00:02:22,810 --> 00:02:26,810
111 | operations. It shows that this plus is nested; this plus down here is nested
112 | 
113 | 28
114 | 00:02:26,810 --> 00:02:30,870
115 | inside. Of this plus up here But once we've done the parsing, the tree structure
116 | 
117 | 29
118 | 00:02:30,870 --> 00:02:35,090
119 | shows us the same thing. We don't need to know that these were inside a parenthesis,
120 | 
121 | 30
122 | 00:02:35,090 --> 00:02:39,280
123 | that the fact that these two expressions or the argument of this plus already tells
124 | 
125 | 31
126 | 00:02:39,280 --> 00:02:43,780
127 | us all we need to know. And so, you know? All of these nodes in here are also in a
128 | 
129 | 32
130 | 00:02:43,780 --> 00:02:49,510
131 | sense redundant. We don't really need that information anymore. And so we prefer to
132 | 
133 | 33
134 | 00:02:49,510 --> 00:02:54,040
135 | do is to use something called an Abstract Syntax Tree that just compresses out all
136 | 
137 | 34
138 | 00:02:54,040 --> 00:02:59,460
139 | the junk in the Parse Tree. So here is a. Abstract syntax tree or a hypothetical
140 | 
141 | 35
142 | 00:02:59,460 --> 00:03:03,180
143 | abstract syntax tree that would represent the same thing as the parse tree on the
144 | 
145 | 36
146 | 00:03:03,180 --> 00:03:08,430
147 | previous slide and you can see here we've really just cut it down to the essential
148 | 
149 | 37
150 | 00:03:08,430 --> 00:03:13,690
151 | items. We have the two+ nodes. We have the three. Arguments and the association is
152 | 
153 | 38
154 | 00:03:13,690 --> 00:03:17,990
155 | just shown by which plus node is nested inside the other. We don't have any of the
156 | 
157 | 39
158 | 00:03:17,990 --> 00:03:22,600
159 | extraneous nonterminals. We don't have the parenthesis. Everything is much simpler
160 | 
161 | 40
162 | 00:03:22,600 --> 00:03:27,040
163 | and you can imagine that it'll be easier to write algorithms that walk over a
164 | 
165 | 41
166 | 00:03:27,040 --> 00:03:31,849
167 | structure like this rather than the, the rather, elaborate structure we had on the
168 | 
169 | 42
170 | 00:03:31,849 --> 00:03:38,099
171 | previous slide. Of course again is called an abstract syntax tree because it
172 | 
173 | 43
174 | 00:03:38,099 --> 00:03:42,870
175 | abstracts away from the concrete syntax. We suppress details of the concrete syntax
176 | 
177 | 44
178 | 00:03:42,870 --> 00:03:47,459
179 | and just keep enough information to be able to faithfully represent the program
180 | 
181 | 45
182 | 00:03:47,459 --> 00:03:48,260
183 | and compile it.
184 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/06-03-recursive-descent-parsing.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,949 --> 00:00:07,839
  3 | In this video, we're going to talk about our first parsing algorithm, recursive
  4 | 
  5 | 1
  6 | 00:00:07,839 --> 00:00:14,839
  7 | descent parsing. So Recursive Descent is what is called a top-down parsing
  8 | 
  9 | 2
 10 | 00:00:16,090 --> 00:00:21,220
 11 | algorithm and you might suspect that there are also bottom-up algorithms and they are
 12 | 
 13 | 3
 14 | 00:00:21,220 --> 00:00:25,540
 15 | indeed such things but we will be talking about them later but in a top-down parsing
 16 | 
 17 | 4
 18 | 00:00:25,540 --> 00:00:29,500
 19 | algorithm, the parse tree is constructed from the top so starting with the root
 20 | 
 21 | 5
 22 | 00:00:29,500 --> 00:00:35,879
 23 | node and from left to right. And so the terminals then will be seen in the order
 24 | 
 25 | 6
 26 | 00:00:35,879 --> 00:00:40,520
 27 | that they appear in the token string. So for example, if I have this token string
 28 | 
 29 | 7
 30 | 00:00:40,520 --> 00:00:45,410
 31 | here, this is a hypothetical parse tree that I could construct and the numbers
 32 | 
 33 | 8
 34 | 00:00:45,410 --> 00:00:50,530
 35 | here correspond to the order in which the nodes of this parse tree are constructed.
 36 | 
 37 | 9
 38 | 00:00:50,530 --> 00:00:54,760
 39 | So we have to begin at the roots, that's the first thing that happens and then if
 40 | 
 41 | 10
 42 | 00:00:54,760 --> 00:00:59,489
 43 | T2 is a. Belongs here in the parse tree. That would be next thing that happened but
 44 | 
 45 | 11
 46 | 00:00:59,489 --> 00:01:04,720
 47 | then if we have a nonterminal of the next position, that will be number three and
 48 | 
 49 | 12
 50 | 00:01:04,720 --> 00:01:08,619
 51 | then if it has children, well the left most one should be going left to right
 52 | 
 53 | 13
 54 | 00:01:08,619 --> 00:01:12,780
 55 | will be the fourth thing to be generated. And then let's say the two children of
 56 | 
 57 | 14
 58 | 00:01:12,780 --> 00:01:16,920
 59 | number four are both terminals that would be the next two terminals in the input and
 60 | 
 61 | 15
 62 | 00:01:16,920 --> 00:01:21,030
 63 | so on. The next thing that'll happen is the second child of number three and then
 64 | 
 65 | 16
 66 | 00:01:21,030 --> 00:01:27,630
 67 | the last two terminals appearing in left to right order. So let's consider this
 68 | 
 69 | 17
 70 | 00:01:27,630 --> 00:01:34,039
 71 | grammar for integer expressions and let's look at a particular input, a very simple
 72 | 
 73 | 18
 74 | 00:01:34,039 --> 00:01:39,700
 75 | one, just open paren five, close paren. And now, what we're going to do is we're
 76 | 
 77 | 19
 78 | 00:01:39,700 --> 00:01:42,999
 79 | going to parse this using a recursive descent strategy. I'm not gonna actually
 80 | 
 81 | 20
 82 | 00:01:42,999 --> 00:01:47,340
 83 | show you any pseudocode or anything like that. I'm just going to walk through how
 84 | 
 85 | 21
 86 | 00:01:47,340 --> 00:01:52,189
 87 | this, how this input string would be parsed. But using this grammar and the
 88 | 
 89 | 22
 90 | 00:01:52,189 --> 00:01:57,749
 91 | Recursive Descent Algorithm and the basic idea is that we begin with a nonterminal,
 92 | 
 93 | 23
 94 | 00:01:57,749 --> 00:02:02,999
 95 | we begin with the root node and we always try the rules for nonterminal in order. So
 96 | 
 97 | 24
 98 | 00:02:02,999 --> 00:02:08,110
 99 | we will begin by starting with e goes to t and if that doesn't work, we'll try e goes
100 | 
101 | 25
102 | 00:02:08,110 --> 00:02:12,560
103 | to t + e. So, this is gonna be a top down algorithm beginning at the root. We're
104 | 
105 | 26
106 | 00:02:12,560 --> 00:02:17,569
107 | gonna work from left to right, we try the productions in order and when the
108 | 
109 | 27
110 | 00:02:17,569 --> 00:02:22,860
111 | productions fail, we may have to do some back tracking in order to try alternative
112 | 
113 | 28
114 | 00:02:22,860 --> 00:02:26,730
115 | productions. There are three parts. There's the grammar that we're using.
116 | 
117 | 29
118 | 00:02:26,730 --> 00:02:30,970
119 | There is the parse tree that we're building and initially that's just the
120 | 
121 | 30
122 | 00:02:30,970 --> 00:02:35,430
123 | root of the parse tree 3e and finally there's the input that we're processing
124 | 
125 | 31
126 | 00:02:35,430 --> 00:02:38,790
127 | and we'll indicate our position in the input, how much of the input we have read
128 | 
129 | 32
130 | 00:02:38,790 --> 00:02:44,290
131 | by this big fat red arrow and it always points to the next terminal symbol to be
132 | 
133 | 33
134 | 00:02:44,290 --> 00:02:48,409
135 | read, The next token to be read. So in this case, we're starting with an open
136 | 
137 | 34
138 | 00:02:48,409 --> 00:02:54,019
139 | paren. Okay? And also in the grammar, you can see the highlighting here the brighter
140 | 
141 | 35
142 | 00:02:54,019 --> 00:02:58,569
143 | red color indicates which production we're going to try. So, we're going to begin to
144 | 
145 | 36
146 | 00:02:58,569 --> 00:03:03,489
147 | build our Parse Tree by trying production e goes to t, and what does that mean?
148 | 
149 | 37
150 | 00:03:03,489 --> 00:03:08,420
151 | Well, that means we make t the child of e and then we continue trying to build the
152 | 
153 | 38
154 | 00:03:08,420 --> 00:03:13,560
155 | Parse Tree. Well, so remember we're going left to right and top-down so now, t is an
156 | 
157 | 39
158 | 00:03:13,560 --> 00:03:18,129
159 | unexpanded nonterminal, is the only unexpanded nonterminal so we have to work
160 | 
161 | 40
162 | 00:03:18,129 --> 00:03:22,239
163 | on it. And what are we going to do, well we're going to try a production for t and
164 | 
165 | 41
166 | 00:03:22,239 --> 00:03:26,859
167 | since we haven't tried any yet, we'll just try the first one, t goes to it. So the
168 | 
169 | 42
170 | 00:03:26,859 --> 00:03:32,040
171 | next step is to make nth a child with t and that's what our parse tree looks like.
172 | 
173 | 43
174 | 00:03:32,040 --> 00:03:36,269
175 | And now, we actually have something that we can check. We can check whether we're
176 | 
177 | 44
178 | 00:03:36,269 --> 00:03:42,329
179 | making progress. So observe that as long as we're generating nonterminals, we don't
180 | 
181 | 45
182 | 00:03:42,329 --> 00:03:47,890
183 | really know whether we're on the right track or not. We have no way to check
184 | 
185 | 46
186 | 00:03:47,890 --> 00:03:51,810
187 | whether the nonterminals that we're generating are gonna produce the, the
188 | 
189 | 47
190 | 00:03:51,810 --> 00:03:56,549
191 | input string. But once we generate a terminal symbol, then we can compare that
192 | 
193 | 48
194 | 00:03:56,549 --> 00:04:01,549
195 | with the next input token to see if they're the same and in this case,
196 | 
197 | 49
198 | 00:04:01,549 --> 00:04:05,849
199 | unfortunately they're not. So, the nth that we generated here doesn't match the
200 | 
201 | 50
202 | 00:04:05,849 --> 00:04:11,209
203 | open paren in the input and so clearly this parse, th is parsing strategy or
204 | 
205 | 51
206 | 00:04:11,209 --> 00:04:15,139
207 | this. Parse Tree that we're building isn't going to work out. So, what we're going to
208 | 
209 | 52
210 | 00:04:15,139 --> 00:04:19,709
211 | have to do is we're gonna have to back track. That means, we're gonna undo one or
212 | 
213 | 53
214 | 00:04:19,709 --> 00:04:23,700
215 | more of our decisions. We're gonna go back to our last decision point and see if
216 | 
217 | 54
218 | 00:04:23,700 --> 00:04:27,470
219 | there's another alternative to try. So what's the last decision we made, well we
220 | 
221 | 55
222 | 00:04:27,470 --> 00:04:32,580
223 | decide to use t goes to nth, so we can undo that and then we could try the next
224 | 
225 | 56
226 | 00:04:32,580 --> 00:04:39,580
227 | production for t. And that happens to be t goes to n t so expand t using that
228 | 
229 | 57
230 | 00:04:39,820 --> 00:04:44,450
231 | production and now once again, we generated a terminal in the left most
232 | 
233 | 58
234 | 00:04:44,450 --> 00:04:49,250
235 | position and so now we're able to compare that with the input and once again
236 | 
237 | 59
238 | 00:04:49,250 --> 00:04:54,880
239 | unfortunately, the nth token does not match the open paren so we have to back
240 | 
241 | 60
242 | 00:04:54,880 --> 00:05:01,530
243 | track again. So we undo that decision. And this takes us back to trying alternatives
244 | 
245 | 61
246 | 00:05:01,530 --> 00:05:08,440
247 | for t. There's one more possibility, and that's the t goes to (e). So we expand t
248 | 
249 | 62
250 | 00:05:08,440 --> 00:05:15,440
251 | using that production. And now, we can compare the token open paren. With, is
252 | 
253 | 63
254 | 00:05:17,070 --> 00:05:21,650
255 | this open paren? With the open paren in the input and they match. And so, that's
256 | 
257 | 64
258 | 00:05:21,650 --> 00:05:27,620
259 | good. That means that we're, we might be on the right track. And since they match,
260 | 
261 | 65
262 | 00:05:27,620 --> 00:05:32,810
263 | anything that we do in the future is going to have to match the different input and
264 | 
265 | 66
266 | 00:05:32,810 --> 00:05:38,400
267 | so we'll advance the input pointer. So now, where we're gonna work on next? Well,
268 | 
269 | 67
270 | 00:05:38,400 --> 00:05:42,910
271 | we have to expand this non-terminal e and we're gonna do the same thing we did
272 | 
273 | 68
274 | 00:05:42,910 --> 00:05:49,910
275 | before. We're just gonna start with the first production. So we have e goes to t
276 | 
277 | 69
278 | 00:05:49,910 --> 00:05:53,620
279 | and then we have to work on t, so we're gonna pick the first production for t and
280 | 
281 | 70
282 | 00:05:53,620 --> 00:05:59,560
283 | we have t goes to int. So now, we can compare. Is int matching int in the input?
284 | 
285 | 71
286 | 00:05:59,560 --> 00:06:04,840
287 | And if it does and so we advance the input pointer again, And now we're here and
288 | 
289 | 72
290 | 00:06:04,840 --> 00:06:09,620
291 | what's left, well we progressed to this point. We're looking at that open paren
292 | 
293 | 73
294 | 00:06:09,620 --> 00:06:15,270
295 | and that also matches. So that matches the input and now we've matched everything in
296 | 
297 | 74
298 | 00:06:15,270 --> 00:06:20,440
299 | the parse tree and our input pointer is at the end of the string and so this is
300 | 
301 | 75
302 | 00:06:20,440 --> 00:06:27,440
303 | actually a successful parse of the input, of the input string. And so that means th
304 | 
305 | 76
306 | 00:06:28,320 --> 00:06:32,020
307 | at we accept and the parser terminates successfully.
308 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/06-05-B+Left+Recursion.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:01,250 --> 00:00:05,940
 3 | So to summarize our discussion of Recursive Descent Parsing, it's a simple in general
 4 | 
 5 | 1
 6 | 00:00:05,940 --> 00:00:10,980
 7 | parsing strategy. You can parse any Context-Free Grammar using Recursive Descent,
 8 | 
 9 | 2
10 | 00:00:10,980 --> 00:00:14,960
11 | so it's very general in that respect. It cannot work with Left-Recursive
12 | 
13 | 3
14 | 00:00:14,960 --> 00:00:19,470
15 | Grammar so you must eliminate The Left Recursion. Now in principle,
16 | 
17 | 4
18 | 00:00:19,470 --> 00:00:22,789
19 | this can be automatically. You can have Algorithms that will eliminate the
20 | 
21 | 5
22 | 00:00:22,789 --> 00:00:27,760
23 | Left Recursion. In practice, people eliminate the Left Recursion by hand and the
24 | 
25 | 6
26 | 00:00:27,760 --> 00:00:31,240
27 | reason for that is that you need to know what the grammar is that you're using
28 | 
29 | 7
30 | 00:00:31,240 --> 00:00:35,000
31 | so that you can write the semantic actions. And we haven't talked about semantic
32 | 
33 | 8
34 | 00:00:35,000 --> 00:00:41,440
35 | actions yet but we will see them shortly. And because you wanted to know exactly
36 | 
37 | 9
38 | 00:00:41,440 --> 00:00:45,329
39 | what grammar, form of grammar it has, people generally do the elimination of
40 | 
41 | 10
42 | 00:00:45,329 --> 00:00:50,399
43 | left-recursion on their own. But that's not difficult to do. And in fact, recursion
44 | 
45 | 11
46 | 00:00:50,399 --> 00:00:54,620
47 | descent is a popular strategy in practice. There are a lot to be more complicated,
48 | 
49 | 12
50 | 00:00:54,620 --> 00:00:58,140
51 | but actually compilers in fact with complicated grammars use recursive
52 | 
53 | 13
54 | 00:00:58,140 --> 00:01:02,219
55 | descent because it is so general. So, for example GCC is front-end
56 | 
57 | 14
58 | 00:01:02,219 --> 00:01:04,350
59 | is a handwritten Recursive Descent Parser.
60 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/07-05-B+Bottom-Up+Parsing.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,989 --> 00:00:03,389
 3 | So, to summarize, a bottom up parser traces out a
 4 | 
 5 | 1
 6 | 00:00:03,389 --> 00:00:08,420
 7 | rightmost derivation in reverse. And it builds a parse tree by combining small parse
 8 | 
 9 | 2
10 | 00:00:08,420 --> 00:00:12,990
11 | trees into larger ones. It precedes bottom up. Instead of expanding out from the
12 | 
13 | 3
14 | 00:00:12,990 --> 00:00:17,300
15 | start symbol top down, it builds from the leaves of the tree up towards the root.
16 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/07-06-B+Shift-Reduce+Parsing.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,530 --> 00:00:04,170
  3 | It turns out that this left string, this stuff to the left
  4 | 
  5 | 1
  6 | 00:00:04,170 --> 00:00:08,900
  7 | of the vertical bar can be implemented by a stack, and that's because we only do
  8 | 
  9 | 2
 10 | 00:00:08,900 --> 00:00:15,440
 11 | reduced operations immediately to the left of the vertical bar. So it's always suffix
 12 | 
 13 | 3
 14 | 00:00:15,440 --> 00:00:19,010
 15 | of the string to left of the vertical bar, where the reduction is happening so what a
 16 | 
 17 | 4
 18 | 00:00:19,010 --> 00:00:23,170
 19 | shift move then does is to push a terminal onto the stack. It reads one token of
 20 | 
 21 | 5
 22 | 00:00:23,170 --> 00:00:27,870
 23 | input, and pushed it onto the stack. And then a reduce, pops some number symbols
 24 | 
 25 | 6
 26 | 00:00:27,870 --> 00:00:31,560
 27 | off of the stack. That's the production right hand side. And then it pushed one
 28 | 
 29 | 7
 30 | 00:00:31,560 --> 00:00:38,560
 31 | non-terminal onto the stack. And that's the production, left hand side. Now it can
 32 | 
 33 | 8
 34 | 00:00:38,969 --> 00:00:43,989
 35 | happen that in a given state, that more than one action, shift or reduce, may lead
 36 | 
 37 | 9
 38 | 00:00:43,989 --> 00:00:49,069
 39 | to a valid parse. So in particular, if it's legal to shift or reduce, if you can
 40 | 
 41 | 10
 42 | 00:00:49,069 --> 00:00:52,789
 43 | do either one of those things, then we say there is a shift re duce conflict. The
 44 | 
 45 | 11
 46 | 00:00:52,789 --> 00:00:57,210
 47 | parser could either read one token of input and push it on the stack, or it
 48 | 
 49 | 12
 50 | 00:00:57,210 --> 00:01:02,870
 51 | could perform a reduction. If it's legal to reduce by two different productions,
 52 | 
 53 | 13
 54 | 00:01:02,870 --> 00:01:07,090
 55 | then there is what is called a reduce/reduce conflict. Okay. So reduce,
 56 | 
 57 | 14
 58 | 00:01:07,090 --> 00:01:12,290
 59 | reduce conflicts are, are always bad, are almost always bad. They indicate usually
 60 | 
 61 | 15
 62 | 00:01:12,290 --> 00:01:16,600
 63 | some kind of serious problem with the grammar. Shift reduce conflicts are not
 64 | 
 65 | 16
 66 | 00:01:16,600 --> 00:01:21,060
 67 | good, but they're often easier to remove. So if you have reduce, reduce conflicts
 68 | 
 69 | 17
 70 | 00:01:21,060 --> 00:01:24,740
 71 | particularly when you're building your, grammar for cool, then you're doing
 72 | 
 73 | 18
 74 | 00:01:24,740 --> 00:01:30,380
 75 | something seriously wrong. If you have shift reduce conflicts then that, that's
 76 | 
 77 | 19
 78 | 00:01:30,380 --> 00:01:34,869
 79 | to be almost expected, because of You're probably going to need to use precedence
 80 | 
 81 | 20
 82 | 00:01:34,869 --> 00:01:40,909
 83 | declarations, Remove them, and we'll talk about that some more in another video. But
 84 | 
 85 | 21
 86 | 00:01:40,909 --> 00:01:45,049
 87 | in general, if you have either one of these conflicts, it means that, there's
 88 | 
 89 | 22
 90 | 00:01:45,049 --> 00:01:48,979
 91 | some state in which the parser doesn't know what to do. And you either need to
 92 | 
 93 | 23
 94 | 00:01:48,979 --> 00:01:52,299
 95 | rewrite the grammar, or give it a hint as to what it should do in order to
 96 | 
 97 | 24
 98 | 00:01:52,299 --> 00:01:54,570
 99 | successfully parse your language.
100 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/07-06.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,740 --> 00:00:07,090
  3 | In this video, we're gonna continue our discussion of bottom-up parsing with the
  4 | 
  5 | 1
  6 | 00:00:07,090 --> 00:00:14,090
  7 | main strategy used by all bottom-up parsers, so-called shift-reduce parsing.
  8 | 
  9 | 2
 10 | 00:00:15,549 --> 00:00:19,300
 11 | Here is a quick review of the most important thing that we learned last time,
 12 | 
 13 | 3
 14 | 00:00:19,300 --> 00:00:24,380
 15 | that a bottom up parser traces a right most innervations in reverse. Now this
 16 | 
 17 | 4
 18 | 00:00:24,380 --> 00:00:29,849
 19 | particular fact has an important consequence. So let's think about a state
 20 | 
 21 | 5
 22 | 00:00:29,849 --> 00:00:35,460
 23 | of a shift reduced parse where we have string alpha, beta and omega, and let's
 24 | 
 25 | 6
 26 | 00:00:35,460 --> 00:00:40,750
 27 | assume the next reduction is going to be the replaced beta by X. Okay, so remember
 28 | 
 29 | 7
 30 | 00:00:40,750 --> 00:00:47,320
 31 | we're running productions backwards. Then I claim that Omega has to be a string of
 32 | 
 33 | 8
 34 | 00:00:47,320 --> 00:00:51,410
 35 | terminals. And why is that? Well if you think about it, if this is a rightmost
 36 | 
 37 | 9
 38 | 00:00:51,410 --> 00:00:56,120
 39 | innervations in reverse, then when X is replaced when we take this if we look at
 40 | 
 41 | 10
 42 | 00:00:56,120 --> 00:00:59,670
 43 | the forward step is the, the backward step. So remember the parser is running
 44 | 
 45 | 11
 46 | 00:00:59,670 --> 00:01:04,280
 47 | this way replacing data by X. But if we think about the rightmost innervations in
 48 | 
 49 | 12
 50 | 00:01:04,280 --> 00:01:08,729
 51 | the other direction then X has to be the rightmost non-terminal, which means there
 52 | 
 53 | 13
 54 | 00:01:08,729 --> 00:01:15,729
 55 | are no non-terminals to the right of X and so all the Character, all the tokens,
 56 | 
 57 | 14
 58 | 00:01:15,859 --> 00:01:21,700
 59 | whatever it is in this string have to be terminal symbols. Now it turns out that
 60 | 
 61 | 15
 62 | 00:01:21,700 --> 00:01:26,999
 63 | those terminal symbols to the right of the right most non-terminal are exactly the
 64 | 
 65 | 16
 66 | 00:01:26,999 --> 00:01:32,689
 67 | unexamined input in bottom of parsley implementations. That is if I have alpha
 68 | 
 69 | 17
 70 | 00:01:32,689 --> 00:01:39,689
 71 | X, omega and I'm, and X is my right most to non-terminal then this is the input
 72 | 
 73 | 18
 74 | 00:01:40,359 --> 00:01:45,409
 75 | that we haven't read yet. This is unexamined Input And it's gonna be useful
 76 | 
 77 | 19
 78 | 00:01:45,409 --> 00:01:50,810
 79 | to mark, where we are, in the parse, where our, where our input focus is. And we're
 80 | 
 81 | 20
 82 | 00:01:50,810 --> 00:01:54,270
 83 | gonna do that by using a vertical bar. So we're gonna just place, drop a vertical
 84 | 
 85 | 21
 86 | 00:01:54,270 --> 00:01:59,649
 87 | bar. Between the place where we read everything to the left, and we've actually
 88 | 
 89 | 22
 90 | 00:01:59,649 --> 00:02:03,130
 91 | been working on this. So this stuff to the left here can be terminals and
 92 | 
 93 | 23
 94 | 00:02:03,130 --> 00:02:06,670
 95 | non-terminals and we, the parts that we've seen all of that stuff. And the stuff to
 96 | 
 97 | 24
 98 | 00:02:06,670 --> 00:02:10,550
 99 | the right is after the parts hasn't seen. Now we don't know what's out there,
100 | 
101 | 25
102 | 00:02:10,550 --> 00:02:14,080
103 | although we do know its all terminal symbols. An d the vertical bar is just
104 | 
105 | 26
106 | 00:02:14,080 --> 00:02:21,080
107 | gonna mark the dividing line between the two sub-strings. To implement bottom up
108 | 
109 | 27
110 | 00:02:21,110 --> 00:02:26,110
111 | parsing, it turns out we only needs two kinds of actions. Shift moves and reduce
112 | 
113 | 28
114 | 00:02:26,110 --> 00:02:29,860
115 | moves. And we've already talked somewhat about reduce moves, and so we have to
116 | 
117 | 29
118 | 00:02:29,860 --> 00:02:36,310
119 | introduce shift moves. So let's do that now. So a shift move, reads one token of
120 | 
121 | 30
122 | 00:02:36,310 --> 00:02:40,280
123 | input. And we can explain that or represent that by moving the vertical bar
124 | 
125 | 31
126 | 00:02:40,280 --> 00:02:44,790
127 | one token to the right. So if our input focus is here, and we want to read one
128 | 
129 | 32
130 | 00:02:44,790 --> 00:02:48,730
131 | more token of input, then we just move the vertical bar over. And this signifies that
132 | 
133 | 33
134 | 00:02:48,730 --> 00:02:54,010
135 | now the parser knows about that next, terminal symbol. And now we can start
136 | 
137 | 34
138 | 00:02:54,010 --> 00:02:57,340
139 | working on it. It can do things. We can match against it for the purposes of
140 | 
141 | 35
142 | 00:02:57,340 --> 00:03:01,900
143 | performing reductions. Again the stuff out here to the right of the vertical bar the
144 | 
145 | 36
146 | 00:03:01,900 --> 00:03:08,900
147 | parser hasn't seen yet. And then a reduce move is to apply an inverse production at
148 | 
149 | 37
150 | 00:03:08,970 --> 00:03:15,970
151 | the right end of the left string. So if in production a goes to xy and we have x and
152 | 
153 | 38
154 | 00:03:16,110 --> 00:03:20,840
155 | y here immediately to the left of the vertical bar. So this is our focus point,
156 | 
157 | 39
158 | 00:03:20,840 --> 00:03:25,250
159 | okay, and x and y the right hand side of the reduction is right there. Then we can
160 | 
161 | 40
162 | 00:03:25,250 --> 00:03:29,710
163 | do a reduction, we can replace that right hand side by the left hand side, and this
164 | 
165 | 41
166 | 00:03:29,710 --> 00:03:36,710
167 | is a reduce move. Here is the example from the last video, and this is exactly the
168 | 
169 | 42
170 | 00:03:37,470 --> 00:03:42,730
171 | example just showing the reduced moves, now with the vertical bar also shown. So
172 | 
173 | 43
174 | 00:03:42,730 --> 00:03:48,120
175 | this shows where the input focus was, at the point where each of the reductions was
176 | 
177 | 44
178 | 00:03:48,120 --> 00:03:51,980
179 | performed. And what's missing, of course, now we know, is the sequence of shift
180 | 
181 | 45
182 | 00:03:51,980 --> 00:03:57,010
183 | moves. So here is the sequence of shift moves and reduce moves that, take the
184 | 
185 | 46
186 | 00:03:57,010 --> 00:04:04,000
187 | initial input string, to the start symbol. So let's walk through this in more detail.
188 | 
189 | 47
190 | 00:04:04,000 --> 00:04:08,470
191 | So we're going to go step by step. And we're going to show each shift and each
192 | 
193 | 48
194 | 00:04:08,470 --> 00:04:14,740
195 | reduce move. And now in addition to our input string down here we also have a
196 | 
197 | 49
198 | 00:04:14,740 --> 00:04:19,289
199 | pointer showing where the where in the input we are. So initially we haven't seen
200 | 
201 | 50
202 | 00:04:19,289 --> 00:04:24,139
203 | any of the input, and our input pointer is to the left of the entire str ing. So the
204 | 
205 | 51
206 | 00:04:24,139 --> 00:04:29,969
207 | first move is to do a shift. And then we do another shift, and then we do another
208 | 
209 | 52
210 | 00:04:29,969 --> 00:04:35,569
211 | shift. And now just look at the example from before if you look back at that, at
212 | 
213 | 53
214 | 00:04:35,569 --> 00:04:38,979
215 | that example, you know, the next thing we need to do is to reduce. So remember we're
216 | 
217 | 54
218 | 00:04:38,979 --> 00:04:43,759
219 | only allowed to reduce to the left of the arrows. So, we can only reduce over on
220 | 
221 | 55
222 | 00:04:43,759 --> 00:04:48,370
223 | this side of the arrow. So, we always have to read enough of the input before we can
224 | 
225 | 56
226 | 00:04:48,370 --> 00:04:55,370
227 | perform a reduced move. And then we perform another reduce move, okay? And
228 | 
229 | 57
230 | 00:04:55,770 --> 00:04:59,599
231 | then it turns out the next thing to do is two shift moves and we haven't explained
232 | 
233 | 58
234 | 00:04:59,599 --> 00:05:02,979
235 | yet how we know whether to shift or reduce we're going to get there. I'm just showing
236 | 
237 | 59
238 | 00:05:02,979 --> 00:05:07,999
239 | that there exists a sequence of shift and reduce moves that succeed in parsing this
240 | 
241 | 60
242 | 00:05:07,999 --> 00:05:14,430
243 | example. Now we've shifted the entire input onto this sorry we've, we've. We
244 | 
245 | 61
246 | 00:05:14,430 --> 00:05:18,830
247 | shifted over the entire input, so there's no more input to read. And now all we can
248 | 
249 | 62
250 | 00:05:18,830 --> 00:05:23,110
251 | do is reduce moves. But fortunately, there is a sequence of reduce moves from this
252 | 
253 | 63
254 | 00:05:23,110 --> 00:05:29,599
255 | point, that we can we can perform. So, here we reduce int, and then we reduce T
256 | 
257 | 64
258 | 00:05:29,599 --> 00:05:36,599
259 | plus T. Oh, forgot, we first reduce T to E, and then we reduce T plus E back to the
260 | 
261 | 65
262 | 00:05:36,710 --> 00:05:37,569
263 | start symbol.
264 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/08-01-B+Handles.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,079 --> 00:00:03,810
  3 | At this point we know enough to state the second important
  4 | 
  5 | 1
  6 | 00:00:03,810 --> 00:00:07,770
  7 | fact about bottom- up parsing. So in shift reduce parsing handles appear
  8 | 
  9 | 2
 10 | 00:00:07,770 --> 00:00:12,759
 11 | only at the top of the stack Never in sight Side, and in fact this is what justifies
 12 | 
 13 | 3
 14 | 00:00:12,759 --> 00:00:16,840
 15 | using a stack because that string to the left of our focus point we know
 16 | 
 17 | 4
 18 | 00:00:16,840 --> 00:00:19,930
 19 | that all the action will take place immediately to the left of the focus point.
 20 | 
 21 | 5
 22 | 00:00:19,930 --> 00:00:25,060
 23 | We won't have to dive down to the string to look at its [inaudible] and therefore
 24 | 
 25 | 6
 26 | 00:00:25,060 --> 00:00:31,130
 27 | the stack will be sufficient. So here's an informal proof, that handles only
 28 | 
 29 | 7
 30 | 00:00:31,130 --> 00:00:34,580
 31 | appear at the top of the stack. And this is by induction on the number of reduce
 32 | 
 33 | 8
 34 | 00:00:34,580 --> 00:00:39,070
 35 | moves. So this is true initially because the stack is empty. And so, we don't,
 36 | 
 37 | 9
 38 | 00:00:39,070 --> 00:00:42,080
 39 | you know, so the only possible reduction is at the top of the stack if there's
 40 | 
 41 | 10
 42 | 00:00:42,080 --> 00:00:48,360
 43 | an epsilon move, to make. And immediately after we reduce, the right most
 44 | 
 45 | 11
 46 | 00:00:48,360 --> 00:00:51,909
 47 | non terminal is going to be on top of the stack. So immediately after we perform
 48 | 
 49 | 12
 50 | 00:00:51,909 --> 00:00:57,580
 51 | a reduction, we have a, our stack, and then we have a, non terminal. And then our
 52 | 
 53 | 13
 54 | 00:00:57,580 --> 00:01:04,580
 55 | vertical bar, And this is the right most non terminal. And since this is the right
 56 | 
 57 | 14
 58 | 00:01:05,600 --> 00:01:10,850
 59 | most derivation that means that the next handle has to be somewhere to the right. The
 60 | 
 61 | 15
 62 | 00:01:10,850 --> 00:01:16,680
 63 | next handle has to be, It has to include something that, and you know possibly
 64 | 
 65 | 16
 66 | 00:01:16,680 --> 00:01:20,780
 67 | include some of this stuff. But it's either right here at the current focus
 68 | 
 69 | 17
 70 | 00:01:20,780 --> 00:01:25,100
 71 | point, or it's to the right, Because we can't be doing any reductions to the left
 72 | 
 73 | 18
 74 | 00:01:25,100 --> 00:01:30,120
 75 | of the rightmost non-terminal. And so it's gonna require a sequence of shift moves
 76 | 
 77 | 19
 78 | 00:01:30,120 --> 00:01:33,530
 79 | to reach the next handle. So once we have this non-terminal on top of the stack,
 80 | 
 81 | 20
 82 | 00:01:33,530 --> 00:01:37,030
 83 | it is by definition the rightmost non-terminal, and so the next handle has to
 84 | 
 85 | 21
 86 | 00:01:37,030 --> 00:01:43,979
 87 | be somewhere to the right of that. Therefore in shift reduce parsing handles
 88 | 
 89 | 22
 90 | 00:01:43,979 --> 00:01:47,690
 91 | always appear at the top of the stack. Handles are never to the left of the right
 92 | 
 93 | 23
 94 | 00:01:47,690 --> 00:01:52,450
 95 | most knot terminal and this is why shift and reduce moves are sufficient. The shift
 96 | 
 97 | 24
 98 | 00:01:52,450 --> 00:01:57,540
 99 | move only moves the vertical part to the right because we never need to move it left.
100 | 
101 | 25
102 | 00:01:57,540 --> 00:02:01,720
103 | And bottom of parsing algorithms are based on recognizing handles. So as we saw
104 | 
105 | 26
106 | 00:02:01,720 --> 00:02:05,190
107 | in the example at the beginning of this video. Just because you have a right hand
108 | 
109 | 27
110 | 00:02:05,190 --> 00:02:10,130
111 | side on top of the stack that doesn't mean that it's a handle. And so we need to be smarter
112 | 
113 | 28
114 | 00:02:10,130 --> 00:02:12,000
115 | about where we perform our
116 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/08-01.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,850 --> 00:00:07,180
  3 | In this video, we're going to introduce another important concept in bottom-up
  4 | 
  5 | 1
  6 | 00:00:07,180 --> 00:00:14,180
  7 | parsing, the notion of a handle. To review, bottom up parsing is these two
  8 | 
  9 | 2
 10 | 00:00:14,580 --> 00:00:19,590
 11 | kinds of actions: we have shift moves, which just read one token of input and
 12 | 
 13 | 3
 14 | 00:00:19,590 --> 00:00:24,039
 15 | move the vertical bar one to the right, And reduced moves, which replace the right
 16 | 
 17 | 4
 18 | 00:00:24,039 --> 00:00:28,369
 19 | hand side of a production [inaudible] to the left of the vertical bar by a
 20 | 
 21 | 5
 22 | 00:00:28,369 --> 00:00:32,750
 23 | production left hand side. So in this case, the production must have been A goes
 24 | 
 25 | 6
 26 | 00:00:32,750 --> 00:00:39,750
 27 | to XY. And also reviewing what we did in the last video, the left string can be
 28 | 
 29 | 7
 30 | 00:00:39,819 --> 00:00:44,719
 31 | implemented by a stack, where the top of the stack is marked by the vertical bar.
 32 | 
 33 | 8
 34 | 00:00:44,719 --> 00:00:49,749
 35 | So shift pushes the terminal on to the stack and reduce pops zero or more symbols
 36 | 
 37 | 9
 38 | 00:00:49,749 --> 00:00:52,569
 39 | of the stack, and that's gonna be the right hand stack of some production. And
 40 | 
 41 | 10
 42 | 00:00:52,569 --> 00:00:56,989
 43 | then it's going to push one non-terminal on to the stack which is the left hand
 44 | 
 45 | 11
 46 | 00:00:56,989 --> 00:01:03,589
 47 | side of that same production. And the key question in bottom of parsing and the one
 48 | 
 49 | 12
 50 | 00:01:03,589 --> 00:01:08,310
 51 | we haven't addressed at all yet is how do we decide when to shift and when to
 52 | 
 53 | 13
 54 | 00:01:08,310 --> 00:01:14,869
 55 | reduce. So let's take a look at this example grammar. And let's think about a
 56 | 
 57 | 14
 58 | 00:01:14,869 --> 00:01:19,530
 59 | step of a parse where we've shifted one token onto the stack. We have Nth on the
 60 | 
 61 | 15
 62 | 00:01:19,530 --> 00:01:24,119
 63 | stack, and then we have times N plus N still to go that we haven't seen yet. Now
 64 | 
 65 | 16
 66 | 00:01:24,119 --> 00:01:29,520
 67 | at this point we could decide to reduce by T goes to N because we have the production
 68 | 
 69 | 17
 70 | 00:01:29,520 --> 00:01:34,520
 71 | T goes to Nth right here. And so we could then get into this particul-, potential
 72 | 
 73 | 18
 74 | 00:01:34,520 --> 00:01:38,770
 75 | state, or this particular state, where we have T on the stack and then the rest of
 76 | 
 77 | 19
 78 | 00:01:38,770 --> 00:01:42,929
 79 | the input that looks like that. A-, but you can see that this would be a mistake.
 80 | 
 81 | 20
 82 | 00:01:42,929 --> 00:01:48,450
 83 | There is no production in the grammar that begins Hence T times. There's no
 84 | 
 85 | 21
 86 | 00:01:48,450 --> 00:01:55,119
 87 | production up here that looks like T times. And therefore if we were to, to, to
 88 | 
 89 | 22
 90 | 00:01:55,119 --> 00:02:00,090
 91 | make this move, we would get stuck. We could continue to do reductions, to
 92 | 
 93 | 23
 94 | 00:02:00,090 --> 00:02:03,590
 95 | rummage around in the string. But we would never be able to get back to the start
 96 | 
 97 | 24
 98 | 00:02:03,590 --> 00:02:09,100
 99 | symbol. Because there is no way to deal a sub string that has t times something in
100 | 
101 | 25
102 | 00:02:09,100 --> 00:02:16,100
103 | it. So what that shows us is that we don't always want to reduce just because we have
104 | 
105 | 26
106 | 00:02:17,280 --> 00:02:21,950
107 | the right-hand side of a production on top of the stack. To repeat that, even if
108 | 
109 | 27
110 | 00:02:21,950 --> 00:02:25,280
111 | there's the right-hand side of some production sitting right there on top of
112 | 
113 | 28
114 | 00:02:25,280 --> 00:02:29,510
115 | the stack, it might be a mistake to do a reduction. We might want to wait and do
116 | 
117 | 29
118 | 00:02:29,510 --> 00:02:34,360
119 | our reduction someplace else. And the idea about how we decide is that we only want
120 | 
121 | 30
122 | 00:02:34,360 --> 00:02:38,720
123 | to reduce if the result can still be reduced to the start symbol. So let's take
124 | 
125 | 31
126 | 00:02:38,720 --> 00:02:44,680
127 | a look at a right most innervations. So, beginning with the start symbol, we get to
128 | 
129 | 32
130 | 00:02:44,680 --> 00:02:48,690
131 | some state after, after some number of steps where that means, just an arbitrary
132 | 
133 | 33
134 | 00:02:48,690 --> 00:02:53,590
135 | number of steps. We get to some state X is the right most non-terminal and then the
136 | 
137 | 34
138 | 00:02:53,590 --> 00:02:58,010
139 | next step is to replace X with by the right hand side of some production. And
140 | 
141 | 35
142 | 00:02:58,010 --> 00:03:02,330
143 | remember, again, with bottom up parsing, the parsers are actually going in this
144 | 
145 | 36
146 | 00:03:02,330 --> 00:03:09,010
147 | direction, okay. So, this is the reduction direction. The derivation direction, the
148 | 
149 | 37
150 | 00:03:09,010 --> 00:03:14,180
151 | production direction, Because that's the easiest way to talk about what strings are
152 | 
153 | 38
154 | 00:03:14,180 --> 00:03:17,290
155 | derived. We wanna begin with a start symbol. But the [inaudible], but the
156 | 
157 | 39
158 | 00:03:17,290 --> 00:03:22,980
159 | parser's actually going against the flow of these arrows. Anyway if this is a
160 | 
161 | 40
162 | 00:03:22,980 --> 00:03:28,570
163 | rightmost derivation Then we say that alpha beta is a handle of alpha beta
164 | 
165 | 41
166 | 00:03:28,570 --> 00:03:34,690
167 | omega. And that just means that, yes, it would be okay in this situation to reduce
168 | 
169 | 42
170 | 00:03:34,690 --> 00:03:40,710
171 | beta to X. And we could replace beta by X, because it's not a mistake. We can still,
172 | 
173 | 43
174 | 00:03:40,710 --> 00:03:45,960
175 | by some sequence of moves, get back to the start symbol. You know, by, by doing more
176 | 
177 | 44
178 | 00:03:45,960 --> 00:03:52,730
179 | reductions. So handles formulize the intuition about where it is okay to do a
180 | 
181 | 45
182 | 00:03:52,730 --> 00:03:57,600
183 | reduction. A handle is just a reduction that also allows further reduction back to
184 | 
185 | 46
186 | 00:03:57,600 --> 00:04:03,650
187 | the start symbol And we clearly only want to do reduction at handles. If we do a
188 | 
189 | 47
190 | 00:04:03,650 --> 00:04:08,260
191 | reduction at a place that is not a handle even though it looks like it's the right
192 | 
193 | 48
194 | 00:04:08,260 --> 00:04:12,370
195 | hand side or maybe actually be the right hand side of some production, that does
196 | 
197 | 49
198 | 00:04:12,370 --> 00:04:15,980
199 | not mean. That it's actually a handle, and we might, if we could reduce there, we may
200 | 
201 | 50
202 | 00:04:15,980 --> 00:04:22,980
203 | get stuck. So all we said so far is what a handle is. We've defined, a handle, We
204 | 
205 | 51
206 | 00:04:23,260 --> 00:04:27,470
207 | haven't said anything about how to find the handles. And actually, how we find the
208 | 
209 | 52
210 | 00:04:27,470 --> 00:04:31,690
211 | handles is gonna consume much of the rest of our discussion of parsing.
212 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/08-04-valid-items.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,509 --> 00:00:08,709
  3 | In this video, we're going to use our example automaton for recognizing viable
  4 | 
  5 | 1
  6 | 00:00:08,709 --> 00:00:15,709
  7 | prefixes, to introduce one more idea, The idea of a valid item. To refresh your
  8 | 
  9 | 2
 10 | 00:00:16,890 --> 00:00:19,890
 11 | memory, here's where we left off last time. This is the complete
 12 | 
 13 | 3
 14 | 00:00:19,890 --> 00:00:25,199
 15 | nondeterministic automaton for recognizing the viable prefixes of the example grammar
 16 | 
 17 | 4
 18 | 00:00:25,199 --> 00:00:30,539
 19 | [sound]. And Using the standard subset of states construction, we can build a
 20 | 
 21 | 5
 22 | 00:00:30,539 --> 00:00:34,890
 23 | deterministic automaton that is equivalent to the non-deterministic automaton. So
 24 | 
 25 | 6
 26 | 00:00:34,890 --> 00:00:39,600
 27 | here's the deterministic automaton that recognizes exactly the same language. This
 28 | 
 29 | 7
 30 | 00:00:39,600 --> 00:00:44,069
 31 | automa, this deterministic automaton notices the viable prefixes, of our
 32 | 
 33 | 8
 34 | 00:00:44,069 --> 00:00:49,719
 35 | example grammar. But now notice that each state is a set of items. So there's a set
 36 | 
 37 | 9
 38 | 00:00:49,719 --> 00:00:54,670
 39 | of non-deterministic automaton states, in each of these states. And recall that what
 40 | 
 41 | 10
 42 | 00:00:54,670 --> 00:00:59,749
 43 | that means is that the non-deterministic automaton could be in any one of these
 44 | 
 45 | 11
 46 | 00:00:59,749 --> 00:01:05,650
 47 | states. And in particular, this state here is the start state because it has the item
 48 | 
 49 | 12
 50 | 00:01:05,650 --> 00:01:12,650
 51 | S prime goes to dot E. The states of this deterministic automaton are called
 52 | 
 53 | 13
 54 | 00:01:13,600 --> 00:01:18,260
 55 | variously cananugal collections of items or the cananugal collections of LR zero
 56 | 
 57 | 14
 58 | 00:01:18,260 --> 00:01:22,530
 59 | items. If you look in the dragon book it gives another way of constructing the LR
 60 | 
 61 | 15
 62 | 00:01:22,530 --> 00:01:28,050
 63 | zero items than the one that I gave. Mine is somewhat simplified but I think also a
 64 | 
 65 | 16
 66 | 00:01:28,050 --> 00:01:33,270
 67 | little easier to understand if you are seeing this for the first time. Now we
 68 | 
 69 | 17
 70 | 00:01:33,270 --> 00:01:40,090
 71 | need another definition. We'll say that a given item is valid for a viable prefix
 72 | 
 73 | 18
 74 | 00:01:40,090 --> 00:01:45,300
 75 | alpha beta. If the following is true, that beginning from the start symbol, this is
 76 | 
 77 | 19
 78 | 00:01:45,300 --> 00:01:52,159
 79 | our extra start symbol, and by a series of right-most derivation steps, we can get to
 80 | 
 81 | 20
 82 | 00:01:52,159 --> 00:01:58,050
 83 | a configuration, alpha-x-omega, and then in one step, x can go to beta-gamma. And,
 84 | 
 85 | 21
 86 | 00:01:58,050 --> 00:02:05,050
 87 | what this says is after parsing alpha and beta, after seeing. Alpha and beta on the
 88 | 
 89 | 22
 90 | 00:02:05,409 --> 00:02:10,420
 91 | stack, the valid items are the possible tops of the stack of items. That, that we
 92 | 
 93 | 23
 94 | 00:02:10,419 --> 00:02:14,639
 95 | could, that this item, could be the determination state of the
 96 | 
 97 | 24
 98 | 00:02:14,639 --> 00:02:21,450
 99 | nondeterministic automaton. A simpler way of explaining the same idea is that for a
100 | 
101 | 25
102 | 00:02:21,450 --> 00:02:27,650
103 | given viable prefix alpha the items that are valid in that prefix are exactly the
104 | 
105 | 26
106 | 00:02:27,650 --> 00:02:31,659
107 | items that are in the final state of the DFA after it reads that prefix. So these
108 | 
109 | 27
110 | 00:02:31,659 --> 00:02:38,659
111 | are the items that describe the state after you've seen the stack alpha. Now, an
112 | 
113 | 28
114 | 00:02:39,829 --> 00:02:44,920
115 | item is often valid for many, many prefixes. So, for example, the item T goes
116 | 
117 | 29
118 | 00:02:44,920 --> 00:02:51,920
119 | to open paren .e closed paren is valid for all sequences of open parens. And to see
120 | 
121 | 30
122 | 00:02:52,099 --> 00:02:56,840
123 | that, We can just look at our automaton and confirm that if we see an open paren,
124 | 
125 | 31
126 | 00:02:56,840 --> 00:03:00,329
127 | remember, this is the start state. So if we see an open paren, we take this
128 | 
129 | 32
130 | 00:03:00,329 --> 00:03:04,849
131 | transition, we wind up in this state here. And then every open paren we see, we just
132 | 
133 | 33
134 | 00:03:04,849 --> 00:03:10,310
135 | go round and round in this state. So if I have a sequence of five open parens as my
136 | 
137 | 34
138 | 00:03:10,310 --> 00:03:16,540
139 | input, then I'll have transitions one, two, three, four, five, all looping in
140 | 
141 | 35
142 | 00:03:16,540 --> 00:03:22,430
143 | this state. And notice that this item. Is, in, is one of the items in that state. And
144 | 
145 | 36
146 | 00:03:22,430 --> 00:03:27,109
147 | that just says that this item is valid for any prefix, or for, excuse me, any
148 | 
149 | 37
150 | 00:03:27,109 --> 00:03:29,030
151 | sequence of open parens.
152 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-01-introduction-to-semantic-analysis.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,430 --> 00:00:07,879
  3 | Welcome back. In this video, we're going to give a very brief introduction, an
  4 | 
  5 | 1
  6 | 00:00:07,879 --> 00:00:14,879
  7 | overview, of what we're going to be talking about in semantic analysis. Let's
  8 | 
  9 | 2
 10 | 00:00:15,469 --> 00:00:19,869
 11 | take a moment to review where we are in our discussion with compilers. So we
 12 | 
 13 | 3
 14 | 00:00:19,869 --> 00:00:23,720
 15 | talked about lexical analysis, and from the point of view of enforcing the
 16 | 
 17 | 4
 18 | 00:00:23,720 --> 00:00:28,550
 19 | language definition the main job that lexical analyses does is detect input
 20 | 
 21 | 5
 22 | 00:00:28,550 --> 00:00:35,550
 23 | [inaudible] preemptive symbols that aren't part of our language. The next step is
 24 | 
 25 | 6
 26 | 00:00:36,480 --> 00:00:40,510
 27 | parsing. We finished talking about that too. And, again, from the point of view of
 28 | 
 29 | 7
 30 | 00:00:40,510 --> 00:00:43,649
 31 | trying to determine whether a program is well-formed or not, or whether it's a
 32 | 
 33 | 8
 34 | 00:00:43,649 --> 00:00:48,280
 35 | valid program, the job of parsing is to detect all the sentences in the language
 36 | 
 37 | 9
 38 | 00:00:48,280 --> 00:00:53,920
 39 | that are ill-formed, or that don't have a parse string. And finally What we're going
 40 | 
 41 | 10
 42 | 00:00:53,920 --> 00:00:58,469
 43 | to talk about now, what's going to occupy us for a while is semantic analysis. And
 44 | 
 45 | 11
 46 | 00:00:58,469 --> 00:01:02,780
 47 | this is the last of what are called the front end phases. So if you think of
 48 | 
 49 | 12
 50 | 00:01:02,780 --> 00:01:07,360
 51 | lexical analysis, parsing, and semantic analysis as filters that progressively
 52 | 
 53 | 13
 54 | 00:01:07,360 --> 00:01:13,450
 55 | reject more and more input strings until finally you're left after all three phases
 56 | 
 57 | 14
 58 | 00:01:13,450 --> 00:01:18,460
 59 | have run with only valid programs to compile, well semantic analysis is the
 60 | 
 61 | 15
 62 | 00:01:18,460 --> 00:01:23,650
 63 | last line of defense. It's the last one in that pipeline and its job is to catch all
 64 | 
 65 | 16
 66 | 00:01:23,650 --> 00:01:30,430
 67 | potential remaining errors in a program. Now you might ask yourself, why do we even
 68 | 
 69 | 17
 70 | 00:01:30,430 --> 00:01:34,520
 71 | need a separate semantic analysis phase? And the answer to that's very simple,
 72 | 
 73 | 18
 74 | 00:01:34,520 --> 00:01:37,860
 75 | There are, there are some features of programming languages, some kinds of
 76 | 
 77 | 19
 78 | 00:01:37,860 --> 00:01:42,630
 79 | mistakes you can make that parsings simply can't catch. Parsing we'll use in context
 80 | 
 81 | 20
 82 | 00:01:42,630 --> 00:01:47,070
 83 | free grammars is not expressive enough to describe everything that we're interested
 84 | 
 85 | 21
 86 | 00:01:47,070 --> 00:01:51,310
 87 | in, in a language definition. So, some of these language constructs are not context
 88 | 
 89 | 22
 90 | 00:01:51,310 --> 00:01:54,950
 91 | free. And the situation here is very, very similar to what it was when we switched
 92 | 
 93 | 23
 94 | 00:01:54,950 --> 00:01:58,680
 95 | from lexical analysis to parsing. Just like not everything could be done with a
 96 | 
 97 | 24
 98 | 00:01:58,680 --> 00:02:03,420
 99 | finite [inaudible]. And we wanted to have something more. Our context free grammar
100 | 
101 | 25
102 | 00:02:03,420 --> 00:02:08,299
103 | to describe additional features of our programming languages [inaudible] Grammars
104 | 
105 | 26
106 | 00:02:08,299 --> 00:02:11,409
107 | by themselves are also not enough, and there some additional features beyond
108 | 
109 | 27
110 | 00:02:11,409 --> 00:02:18,409
111 | those that can't be easily expressed using context free constructs. So what does
112 | 
113 | 28
114 | 00:02:18,469 --> 00:02:22,959
115 | semantic analysis actually do? In a case of cool C, it does checks of many
116 | 
117 | 29
118 | 00:02:22,959 --> 00:02:26,709
119 | different kinds, and that's pretty typical. So here's a list of six classes
120 | 
121 | 30
122 | 00:02:26,709 --> 00:02:30,559
123 | of checks that are done by Cool C, and let's just run through them quickly. First
124 | 
125 | 31
126 | 00:02:30,559 --> 00:02:33,489
127 | we want to check that all identifiers are declared, and we also have to check that
128 | 
129 | 32
130 | 00:02:33,489 --> 00:02:38,290
131 | any scope restrictions on those identifiers are observed. Cool C compiler
132 | 
133 | 33
134 | 00:02:38,290 --> 00:02:42,760
135 | has to do type checking, and this is actually a major function of the semantic
136 | 
137 | 34
138 | 00:02:42,760 --> 00:02:47,489
139 | analyzer in Cool. There are a number of restrictions that come from the object
140 | 
141 | 35
142 | 00:02:47,489 --> 00:02:51,449
143 | oriented nature of Cool. We have to check that the inheritance relationships between
144 | 
145 | 36
146 | 00:02:51,449 --> 00:02:56,319
147 | classes make sense. We don't want classes to be redefined; we only want one class
148 | 
149 | 37
150 | 00:02:56,319 --> 00:03:01,109
151 | definition per class. Similarly methods should only be defined once within a
152 | 
153 | 38
154 | 00:03:01,109 --> 00:03:05,279
155 | class. Cool has a number of reserved identifiers and we have to be careful that
156 | 
157 | 39
158 | 00:03:05,279 --> 00:03:09,209
159 | those aren't misused. And this is pretty typical; lots of languages have some
160 | 
161 | 40
162 | 00:03:09,209 --> 00:03:13,120
163 | reserved identifiers with special rules that have to be followed for those
164 | 
165 | 41
166 | 00:03:13,120 --> 00:03:17,159
167 | identifiers. And, actually this list is not even complete. There are a number of
168 | 
169 | 42
170 | 00:03:17,159 --> 00:03:22,230
171 | other restrictions. And we'll be talking about all of those in future videos. The
172 | 
173 | 43
174 | 00:03:22,230 --> 00:03:26,150
175 | main message here is that its medic analyzer needs to do quite a few different
176 | 
177 | 44
178 | 00:03:26,150 --> 00:03:30,979
179 | kinds of checks. These checks will vary with the language. The kinds of checks
180 | 
181 | 45
182 | 00:03:30,979 --> 00:03:36,290
183 | that cool C does are pretty typical of statically typed checked object oriented
184 | 
185 | 46
186 | 00:03:36,290 --> 00:03:40,180
187 | languages. But other families of languages will have different kinds of checks.
188 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-05-B+Type+Checking.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,810 --> 00:00:04,060
 3 | To summarize, type checking proves facts with a form, E has type
 4 | 
 5 | 1
 6 | 00:00:04,059 --> 00:00:08,880
 7 | T And notice that this proof is on the structure of the abstract syntax tree.
 8 | 
 9 | 2
10 | 00:00:08,880 --> 00:00:15,140
11 | So, for the expression one+2, we prove something about one+2, but by first proving
12 | 
13 | 3
14 | 00:00:15,140 --> 00:00:18,660
15 | something about each of the sub-expressions. So we prove that the sub-expressions
16 | 
17 | 4
18 | 00:00:18,660 --> 00:00:22,369
19 | had type int. And then we managed to prove that the whole thing had
20 | 
21 | 5
22 | 00:00:22,369 --> 00:00:29,369
23 | type int. And so the proof has the same shape as the abstract syntax tree. You can
24 | 
25 | 6
26 | 00:00:29,369 --> 00:00:34,110
27 | look at this proof as a tree. Now the root of the tree in the case of the proof is at
28 | 
29 | 7
30 | 00:00:34,110 --> 00:00:37,580
31 | the bottom, we usually draw the abstract syntax tree with the root at the top, so this
32 | 
33 | 8
34 | 00:00:37,580 --> 00:00:41,980
35 | tree looks like this, whereas we often draw the abstract syntax tree in the
36 | 
37 | 9
38 | 00:00:41,980 --> 00:00:45,670
39 | other way around. But, the important thing here is that the proof has the shape of the
40 | 
41 | 10
42 | 00:00:45,670 --> 00:00:50,260
43 | abstract syntax tree and there's one type rule that's used for each abstract syntax
44 | 
45 | 11
46 | 00:00:50,260 --> 00:00:54,210
47 | tree node. So there's a very direct correspondence between the structure of the
48 | 
49 | 12
50 | 00:00:54,210 --> 00:00:59,160
51 | proof and the shape of the abstract syntax tree. And in general the type rule
52 | 
53 | 13
54 | 00:00:59,160 --> 00:01:03,600
55 | used for a particular node of the abstract syntax string, the hypothesis, are going to
56 | 
57 | 14
58 | 00:01:03,600 --> 00:01:08,070
59 | be the proofs of the type of E's sub-expressions. So, whatever sub-expressions
60 | 
61 | 15
62 | 00:01:08,070 --> 00:01:12,320
63 | make up E we're going to need types for them first. And the conclusion
64 | 
65 | 16
66 | 00:01:12,320 --> 00:01:16,159
67 | at, at that particular node will be the type of the entire expression
68 | 
69 | 17
70 | 00:01:16,159 --> 00:01:20,999
71 | E. And this way you can that types are computed in a bottom up pass over the abstract
72 | 
73 | 18
74 | 00:01:20,999 --> 00:01:24,020
75 | syntax string. That is I assign first types to the leads. Like here I know
76 | 
77 | 19
78 | 00:01:24,020 --> 00:01:29,710
79 | that one has type int and two has type int and then. The types flow towards the root.
80 | 
81 | 20
82 | 00:01:29,710 --> 00:01:33,700
83 | I'm able to compute then the next level abstract of syntax tree and so on And then
84 | 
85 | 21
86 | 00:01:33,700 --> 00:01:38,509
87 | once I computed the types of all the sub expression of a knob. Then I can compute the
88 | 
89 | 22
90 | 00:01:38,509 --> 00:01:40,290
91 | type at that knob.
92 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-06-B+Type+Environments.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:01,770 --> 00:00:05,680
 3 | To summarize this video, The type environment gives types to the free
 4 | 
 5 | 1
 6 | 00:00:05,680 --> 00:00:09,770
 7 | identifiers in the current scope. And this is very important, because it doesn't even
 8 | 
 9 | 2
10 | 00:00:09,770 --> 00:00:13,500
11 | really makes sense to talk about type checking an expression, unless we have
12 | 
13 | 3
14 | 00:00:13,500 --> 00:00:17,940
15 | some information for the types of the free identifiers. And the type environment is
16 | 
17 | 4
18 | 00:00:17,940 --> 00:00:22,070
19 | just a way of formalizing that, of giving a name to some set of assumptions about
20 | 
21 | 5
22 | 00:00:22,070 --> 00:00:26,760
23 | what the types of those free identifiers are. Notice that the type environment is
24 | 
25 | 6
26 | 00:00:26,760 --> 00:00:30,450
27 | passed down the abstract syntax tree from the root towards the leaves. That is, as
28 | 
29 | 7
30 | 00:00:30,450 --> 00:00:33,199
31 | we pass through definitions, the type environment is extended with new
32 | 
33 | 8
34 | 00:00:33,199 --> 00:00:36,650
35 | definitions For example, [inaudible] expressions. And so the type environment
36 | 
37 | 9
38 | 00:00:36,650 --> 00:00:40,820
39 | will grow as you pass from the root of the abstract syntax tree down towards the
40 | 
41 | 10
42 | 00:00:40,820 --> 00:00:45,010
43 | leaves of the abstract syntax tree. And then the types are computed up the
44 | 
45 | 11
46 | 00:00:45,010 --> 00:00:48,420
47 | abstract syntax tree from the leaves towards the roots. So we begin at the
48 | 
49 | 12
50 | 00:00:48,420 --> 00:00:52,540
51 | leaves, get all the types of the leaf expressions, most of which are very easy.
52 | 
53 | 13
54 | 00:00:52,540 --> 00:00:56,079
55 | Things like integers and string constants have the obvious types. And we just look
56 | 
57 | 14
58 | 00:00:56,079 --> 00:01:00,320
59 | up the types of variables, in the type environment. And then we compute the types
60 | 
61 | 15
62 | 00:01:00,320 --> 00:01:03,749
63 | for the more complicated expressions in a bottom up fashion.
64 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-07-B+Subtyping.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,319 --> 00:00:04,590
  3 | Now we can give a type-checking role for if-then-else. So
  4 | 
  5 | 1
  6 | 00:00:04,590 --> 00:00:08,210
  7 | the first thing to know is about if-then-else expressions is that they do not
  8 | 
  9 | 2
 10 | 00:00:08,210 --> 00:00:11,959
 11 | affect the environment, and if-then-else neither introduces nor removes
 12 | 
 13 | 3
 14 | 00:00:11,959 --> 00:00:16,420
 15 | any variables from the environment, so all the sub-expressions are
 16 | 
 17 | 4
 18 | 00:00:16,420 --> 00:00:21,740
 19 | typed in the same environment as the entire expression. Now, the predicate of the
 20 | 
 21 | 5
 22 | 00:00:21,740 --> 00:00:25,509
 23 | FNL30, well that should have type [inaudible]. Because that's our decision whether
 24 | 
 25 | 6
 26 | 00:00:25,509 --> 00:00:28,480
 27 | we're going to take the true branch or the false branch, But then the two
 28 | 
 29 | 7
 30 | 00:00:28,480 --> 00:00:33,570
 31 | branches can have different types. E1 just has to have some type T1, and E2 just
 32 | 
 33 | 8
 34 | 00:00:33,570 --> 00:00:37,780
 35 | has to have some type T2. So notice again what this is sayi ng. This is just saying
 36 | 
 37 | 9
 38 | 00:00:37,780 --> 00:00:41,649
 39 | that E1 and E2 do have a type check. They have to be type correct but we don't care
 40 | 
 41 | 10
 42 | 00:00:41,649 --> 00:00:45,039
 43 | what the type is. The type can be anything. And then the type of the entire
 44 | 
 45 | 11
 46 | 00:00:45,039 --> 00:00:49,809
 47 | expression is just going to be the least upper bound of T1 and T2. Because that's going
 48 | 
 49 | 12
 50 | 00:00:49,809 --> 00:00:54,469
 51 | to be the best Estimate we can give to the final type of the expression given
 52 | 
 53 | 13
 54 | 00:00:54,469 --> 00:00:59,320
 55 | that the truth branch might return T one something of type T one. And the false branch
 56 | 
 57 | 14
 58 | 00:00:59,320 --> 00:01:06,320
 59 | might return something of type T two. The rule for case expression is the most
 60 | 
 61 | 15
 62 | 00:01:06,890 --> 00:01:13,250
 63 | complex one we've seen so far. But really it's a glorified if then L's. And it's
 64 | 
 65 | 16
 66 | 00:01:13,250 --> 00:01:17,030
 67 | relatively easy to understand if we just pull it apart. So, so let's begin by
 68 | 
 69 | 17
 70 | 00:01:17,030 --> 00:01:22,390
 71 | reminding ourselves of what case does. First of all it looks at E0. It evaluates
 72 | 
 73 | 18
 74 | 00:01:22,390 --> 00:01:29,040
 75 | E0 and then it looks at the runtime type of E0. So, so it takes the dynamic class of
 76 | 
 77 | 19
 78 | 00:01:29,040 --> 00:01:33,720
 79 | E0 and then it looks at the first branch. >> Let's do that. >> And what is it
 80 | 
 81 | 20
 82 | 00:01:33,720 --> 00:01:37,520
 83 | going to do? It's going to compare the type of these [inaudible] at run time to the
 84 | 
 85 | 21
 86 | 00:01:37,520 --> 00:01:44,409
 87 | type t one, and if t one is super type. Of the run time type of E O And in fact,
 88 | 
 89 | 22
 90 | 00:01:44,409 --> 00:01:47,759
 91 | it is the smallest of all the possible branches. It's the smallest super
 92 | 
 93 | 23
 94 | 00:01:47,759 --> 00:01:52,100
 95 | type of all the possible branches. Then it's going to pick this branch. It's going
 96 | 
 97 | 24
 98 | 00:01:52,100 --> 00:01:58,039
 99 | to bind X1 to the value. It's going to give it the type T1. So combine X1 to the
100 | 
101 | 25
102 | 00:01:58,039 --> 00:02:04,090
103 | value of E O. It's going to retype it as. Type T one and it's going to evaluate E one.
104 | 
105 | 26
106 | 00:02:04,090 --> 00:02:08,899
107 | So you can see in one sense it's a, it's a glorified if then else, we're just
108 | 
109 | 27
110 | 00:02:08,899 --> 00:02:14,020
111 | picking the best matching branch, the one that, whose type, declared type, is, most
112 | 
113 | 28
114 | 00:02:14,020 --> 00:02:18,050
115 | closely matches the runtime type of E zero. And then we're going to execute that
116 | 
117 | 29
118 | 00:02:18,050 --> 00:02:23,820
119 | branch with the variable that's named in that branch bound to the type of E zero. So
120 | 
121 | 30
122 | 00:02:23,820 --> 00:02:28,220
123 | let's see how the typing works out. So first we type check E0 and we can some type
124 | 
125 | 31
126 | 00:02:28,220 --> 00:02:35,220
127 | T0, and now what's going to happen well if we select. The branch, The first branch,
128 | 
129 | 32
130 | 00:02:35,800 --> 00:02:39,660
131 | Well, then, we're going to take the environment and we're going to extend
132 | 
133 | 33
134 | 00:02:39,660 --> 00:02:44,640
135 | it with the new variable x one which is going to have type t one. And so we only take
136 | 
137 | 34
138 | 00:02:44,640 --> 00:02:50,120
139 | this branch, remember, if the runtime type of e zero matches t one, most closely
140 | 
141 | 35
142 | 00:02:50,120 --> 00:02:53,870
143 | among all the branches, but if we do take it, then we're going to execute, execute e
144 | 
145 | 36
146 | 00:02:53,870 --> 00:02:59,950
147 | one in this environment and we'll get back something of some type, t one prime, and similarly
148 | 
149 | 37
150 | 00:02:59,950 --> 00:03:04,100
151 | for all the other branches until finally the last branch, which is exactly
152 | 
153 | 38
154 | 00:03:04,100 --> 00:03:08,760
155 | the same as the first one, just with the letter n replacing the number one. And so
156 | 
157 | 39
158 | 00:03:08,760 --> 00:03:13,620
159 | since we don't know which brain to match at one time, it could be any of the branches
160 | 
161 | 40
162 | 00:03:13,620 --> 00:03:17,650
163 | that actually executes and therefore the type of the entire expression
164 | 
165 | 41
166 | 00:03:17,650 --> 00:03:23,120
167 | is just going to be least upper bound over all of the types a, of the various
168 | 
169 | 42
170 | 00:03:23,120 --> 00:03:26,510
171 | branches, and here I've just extendedly super bound from a binding area
172 | 
173 | 43
174 | 00:03:26,510 --> 00:03:30,420
175 | of the operation a, to an in area operation a, that should be clear enough,
176 | 
177 | 44
178 | 00:03:30,420 --> 00:03:33,410
179 | we're just going to take the least upper bound to all of these types.
180 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-08-A+Typing+Methods.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,029 --> 00:00:07,439
  3 | In this video, we're going to continue our discussion of type checking and cool with
  4 | 
  5 | 1
  6 | 00:00:07,439 --> 00:00:14,439
  7 | the rules for type checking methods and method calls. So here's the situation, we
  8 | 
  9 | 2
 10 | 00:00:15,690 --> 00:00:19,829
 11 | want to type check a method call, let's say that we have a dispatch on some
 12 | 
 13 | 3
 14 | 00:00:19,829 --> 00:00:24,029
 15 | expression easier, and we're calling some method named F, and we have some arguments
 16 | 
 17 | 4
 18 | 00:00:24,029 --> 00:00:28,090
 19 | E one through E N. Well, so clearly we're gonna type check E zero, it's gonna have
 20 | 
 21 | 5
 22 | 00:00:28,090 --> 00:00:32,340
 23 | some type E zero, and similarly we're gonna type check all of the arguments and
 24 | 
 25 | 6
 26 | 00:00:32,340 --> 00:00:36,710
 27 | they're gonna have some types, and then the question is, what is the return type
 28 | 
 29 | 7
 30 | 00:00:36,710 --> 00:00:40,769
 31 | of this method call, what value, what kind of value do we get back after we call this
 32 | 
 33 | 8
 34 | 00:00:40,769 --> 00:00:46,129
 35 | method? And as you can probably see we're in a very similar situation here that we
 36 | 
 37 | 9
 38 | 00:00:46,129 --> 00:00:50,769
 39 | were in before when we were typing check the variable reference. We have this name
 40 | 
 41 | 10
 42 | 00:00:50,769 --> 00:00:56,339
 43 | F and we don't know anything about what it does, we don't know the behavior of F is
 44 | 
 45 | 11
 46 | 00:00:56,339 --> 00:01:01,059
 47 | unless we have some information about F's behavior, we can't really say what kind of
 48 | 
 49 | 12
 50 | 00:01:01,059 --> 00:01:07,600
 51 | value it is going to return. An added wrinkle in cool is that method object
 52 | 
 53 | 13
 54 | 00:01:07,600 --> 00:01:12,439
 55 | identifiers live in different name spaces. That is, it is possible in the same scope
 56 | 
 57 | 14
 58 | 00:01:12,439 --> 00:01:17,659
 59 | to have a method called foo and also an object called foo and we won't get them
 60 | 
 61 | 15
 62 | 00:01:17,659 --> 00:01:21,659
 63 | confused. They are different enough and used differently enough in the language
 64 | 
 65 | 16
 66 | 00:01:21,659 --> 00:01:24,990
 67 | that we can always tell when we're talking about the object foo and when we're
 68 | 
 69 | 17
 70 | 00:01:24,990 --> 00:01:28,909
 71 | talking about the method foo. But what this means in effect is that there's two
 72 | 
 73 | 18
 74 | 00:01:28,909 --> 00:01:34,740
 75 | different environments. One for objects and one for methods and so in the type
 76 | 
 77 | 19
 78 | 00:01:34,740 --> 00:01:38,479
 79 | rules, this is going to be reflected by having a separate mapping a separate
 80 | 
 81 | 20
 82 | 00:01:38,479 --> 00:01:44,130
 83 | method environment that's going to record the signature of each of the methods. And
 84 | 
 85 | 21
 86 | 00:01:44,130 --> 00:01:48,210
 87 | a signature, as is a standard name that you'll probably hear used in other
 88 | 
 89 | 22
 90 | 00:01:48,210 --> 00:01:54,259
 91 | contexts, but the signature of a function is just its input and output types. And so
 92 | 
 93 | 23
 94 | 00:01:54,259 --> 00:01:58,920
 95 | this table, m, is gonna take the name of a class. It's gonna take the name of a
 96 | 
 97 | 24
 98 | 00:01:58,920 --> 00:02:04,229
 99 | method in that class and is just gonna tell us what are the argument types of the
100 | 
101 | 25
102 | 00:02:04,229 --> 00:02:08,950
103 | methods. So all but the last type in the list here is one of the arguing types of
104 | 
105 | 26
106 | 00:02:08,949 --> 00:02:14,610
107 | the method and then the last type is the result type. That's the type of the return
108 | 
109 | 27
110 | 00:02:14,610 --> 00:02:18,330
111 | value. So the way we are going to write the method signature is just as a tutor or
112 | 
113 | 28
114 | 00:02:18,330 --> 00:02:23,950
115 | a list of types, the first, all but the last one taken together are the, are the
116 | 
117 | 29
118 | 00:02:23,950 --> 00:02:27,819
119 | types of the arguments in order. And then the very last one is the type of the
120 | 
121 | 30
122 | 00:02:27,819 --> 00:02:34,370
123 | result And so an entry like this in our method, environment just means that f has
124 | 
125 | 31
126 | 00:02:34,370 --> 00:02:38,360
127 | a signature that looks like this. It takes in arguments, with the respective types
128 | 
129 | 32
130 | 00:02:38,360 --> 00:02:45,360
131 | and it returns something of type t n plus one. So with the method environment added
132 | 
133 | 33
134 | 00:02:46,290 --> 00:02:50,870
135 | to our rules, now we can write a rule for dispatch. So notice, first of all, that we
136 | 
137 | 34
138 | 00:02:50,870 --> 00:02:55,720
139 | have these two mappings, one for object identifiers and one for method names on
140 | 
141 | 35
142 | 00:02:55,720 --> 00:03:01,310
143 | the left hand side of the turnstile. We have to propagate that method environment
144 | 
145 | 36
146 | 00:03:01,310 --> 00:03:06,540
147 | through all the typing for the sub expressions and for the case of method
148 | 
149 | 37
150 | 00:03:06,540 --> 00:03:12,439
151 | dispatch, we just type The, type of the expression we're dispatching to e zero,
152 | 
153 | 38
154 | 00:03:12,439 --> 00:03:17,640
155 | and all of the arguments and get types t one through t n, and then we look up the
156 | 
157 | 39
158 | 00:03:17,640 --> 00:03:24,390
159 | type of f in the class t zero. So, what class are we dispatching to? Well, that's
160 | 
161 | 40
162 | 00:03:24,390 --> 00:03:28,680
163 | gonna be to the class of e zero And so where do we look up m in our environment.
164 | 
165 | 41
166 | 00:03:28,680 --> 00:03:34,200
167 | Where there better be a method called F to find in class T0, and it must have some
168 | 
169 | 42
170 | 00:03:34,200 --> 00:03:39,430
171 | signature with the right number of arguments. And then, the actual arguments
172 | 
173 | 43
174 | 00:03:39,430 --> 00:03:44,010
175 | that we're passing, the E1 through E-N, they're types have to be sub-types of the
176 | 
177 | 44
178 | 00:03:44,010 --> 00:03:49,269
179 | declared formal parameter. So here, the signature of F. Says that, for example,
180 | 
181 | 45
182 | 00:03:49,269 --> 00:03:54,870
183 | the first argument of f has type t one prime, and so we're going to require that
184 | 
185 | 46
186 | 00:03:54,870 --> 00:04:00,409
187 | the type of e one be some type t one such that t one is a sub type of t one prime.
188 | 
189 | 47
190 | 00:04:00,409 --> 00:04:05,829
191 | And similarly for all the other arguments of the method call. And if all of that
192 | 
193 | 48
194 | 00:04:05,829 --> 00:04:09,980
195 | checks out, if that has a signature like this, and all the sub type requirements on
196 | 
197 | 49
198 | 00:04:09,980 --> 00:04:15,519
199 | the actual arguments and the formal arguments match, then we're going to say
200 | 
201 | 50
202 | 00:04:15,519 --> 00:04:21,209
203 | that the entire expression. [inaudible] Return something of type t n plus one the
204 | 
205 | 51
206 | 00:04:21,209 --> 00:04:28,030
207 | return type of the method. The typing rule for static dispatch is very similar to the
208 | 
209 | 52
210 | 00:04:28,030 --> 00:04:32,580
211 | rule for regular dispatch. So recall that, syntactically, the only thing that's
212 | 
213 | 53
214 | 00:04:32,580 --> 00:04:36,030
215 | different is that the programmer writes the name of the class at which they wish
216 | 
217 | 54
218 | 00:04:36,030 --> 00:04:42,210
219 | to run the, the method. So instead of running the method F as defined in the
220 | 
221 | 55
222 | 00:04:42,210 --> 00:04:46,120
223 | class E0, whatever that class happens to be, we're going to run whatever that
224 | 
225 | 56
226 | 00:04:46,120 --> 00:04:51,340
227 | method F happens to be in some ancestor class of the class of E0. And how is that
228 | 
229 | 57
230 | 00:04:51,340 --> 00:04:55,740
231 | expressed in the type rules? Well once again, we type E0 and all of the
232 | 
233 | 58
234 | 00:04:55,740 --> 00:05:02,000
235 | arguments. And now we require that, whatever the type was we discovered for
236 | 
237 | 59
238 | 00:05:02,000 --> 00:05:08,710
239 | E0, it has to be a sub-type of T. So T has to be an ancestor type in the class
240 | 
241 | 60
242 | 00:05:08,710 --> 00:05:14,770
243 | hierarchy of the type of E0. And moreover, that class T had better have a method
244 | 
245 | 61
246 | 00:05:14,770 --> 00:05:19,889
247 | called F. That has the right number of our units, with the right kind of types, such
248 | 
249 | 62
250 | 00:05:19,889 --> 00:05:25,650
251 | that all the type constraints work out, that the, actual argument types are sub
252 | 
253 | 63
254 | 00:05:25,650 --> 00:05:29,990
255 | types of the corresponding formal argument types, and then if all of that is true,
256 | 
257 | 64
258 | 00:05:29,990 --> 00:05:34,979
259 | we'll be able to conclude that the entire dispatch expression has a type t n plus
260 | 
261 | 65
262 | 00:05:34,979 --> 00:05:38,110
263 | one which is the return type of the method.
264 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-08-B+Typing+Methods.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,670 --> 00:00:05,379
  3 | The method environment has to be added to all of the other type rules in our
  4 | 
  5 | 1
  6 | 00:00:05,379 --> 00:00:09,970
  7 | system. This is really easy to do, because only the dispatch rules actually care
  8 | 
  9 | 2
 10 | 00:00:09,970 --> 00:00:14,099
 11 | about what the methods are. All the rest of the rules just pass the method environment
 12 | 
 13 | 3
 14 | 00:00:14,099 --> 00:00:17,820
 15 | along. So what do I mean by that? Well, here's our rule for add, with
 16 | 
 17 | 4
 18 | 00:00:17,820 --> 00:00:21,260
 19 | just the object environment. So now what were going to do, is we're going to add a
 20 | 
 21 | 5
 22 | 00:00:21,260 --> 00:00:24,760
 23 | method environment. And the sub-expressions will be just type checked.
 24 | 
 25 | 6
 26 | 00:00:24,760 --> 00:00:31,279
 27 | In exactly the same method environment as the entire expression and all the other rules
 28 | 
 29 | 7
 30 | 00:00:31,279 --> 00:00:34,839
 31 | simply pass down the method environment from the root towards the leaves
 32 | 
 33 | 8
 34 | 00:00:34,839 --> 00:00:41,239
 35 | without changing it just as in this rule. Now it turns out that for some cases
 36 | 
 37 | 9
 38 | 00:00:41,239 --> 00:00:45,679
 39 | involving self-type, we actually need one more thing in our environment. And so, the
 40 | 
 41 | 10
 42 | 00:00:45,679 --> 00:00:50,159
 43 | actual full environment for Cool type checking consists of three things. First of
 44 | 
 45 | 11
 46 | 00:00:50,159 --> 00:00:54,559
 47 | all, there's the mapping O, that gives types to object IDs. There's this mapping
 48 | 
 49 | 12
 50 | 00:00:54,559 --> 00:00:58,389
 51 | M, that gives types to methods. And finally, we just need to know the name of
 52 | 
 53 | 13
 54 | 00:00:58,389 --> 00:01:02,959
 55 | the current class. So, whatever class the expression we're type checking actually sits
 56 | 
 57 | 14
 58 | 00:01:02,959 --> 00:01:09,030
 59 | in. So the full form of a sentence in the Cool type checking [inaudible] looks like
 60 | 
 61 | 15
 62 | 00:01:09,030 --> 00:01:12,850
 63 | this And is read as follows, under the assumption that the object identifiers
 64 | 
 65 | 16
 66 | 00:01:12,850 --> 00:01:17,399
 67 | have the types given by O. That the methods have the signatures given by M And
 68 | 
 69 | 17
 70 | 00:01:17,399 --> 00:01:24,240
 71 | that the expression sits in the class C. Then we can prove that the expression E has
 72 | 
 73 | 18
 74 | 00:01:24,240 --> 00:01:30,020
 75 | the type T. And here's an example, the ad example, the, the rule for addition again,
 76 | 
 77 | 19
 78 | 00:01:30,020 --> 00:01:35,469
 79 | written out with the full environment. So the rules that I've given
 80 | 
 81 | 20
 82 | 00:01:35,469 --> 00:01:39,329
 83 | you for type checking here are cool specific and some other languages have very
 84 | 
 85 | 21
 86 | 00:01:39,329 --> 00:01:43,090
 87 | different rules. But there are some general themes in type checking. First of
 88 | 
 89 | 22
 90 | 00:01:43,090 --> 00:01:47,189
 91 | all, type rules are defined on the structure of expressions. So they're typically
 92 | 
 93 | 23
 94 | 00:01:47,189 --> 00:01:51,590
 95 | done in this inductive fashion, where the types of an expres-, the type of
 96 | 
 97 | 24
 98 | 00:01:51,590 --> 00:01:57,109
 99 | an expression depends on the types of its sub expressions. And also the types of variables
100 | 
101 | 25
102 | 00:01:57,109 --> 00:02:01,039
103 | and more generally any free names in an expression, things like method
104 | 
105 | 26
106 | 00:02:01,039 --> 00:02:05,520
107 | names, they're going to be modeled by an environment, so we're going to have some mapping
108 | 
109 | 27
110 | 00:02:05,520 --> 00:02:10,259
111 | sitting around that's going to tell us any kind of fr-. A Russian what assumptions
112 | 
113 | 28
114 | 00:02:10,258 --> 00:02:15,849
115 | the type rules should make about the types of those names And one thing
116 | 
117 | 29
118 | 00:02:15,849 --> 00:02:18,909
119 | you've probably noticed by now but it's worth saying explicitly is that type
120 | 
121 | 30
122 | 00:02:18,909 --> 00:02:24,340
123 | rules are really very compact. The notation is not complicated but there's actually
124 | 
125 | 31
126 | 00:02:24,340 --> 00:02:27,730
127 | a lot of information in these rules. I mean you have to take some time to
128 | 
129 | 32
130 | 00:02:27,730 --> 00:02:30,760
131 | sit and read them carefully to really understand what they are saying.
132 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/09-09-implementing-type-checking.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:05,150 --> 00:00:09,340
  3 | In this video we're going to talk about how one takes the type checking rules and
  4 | 
  5 | 1
  6 | 00:00:09,340 --> 00:00:16,340
  7 | translates them into an implementation. The high level overview of cool type
  8 | 
  9 | 2
 10 | 00:00:17,100 --> 00:00:21,080
 11 | checking is that it can be implemented in a single traversal over the abstract
 12 | 
 13 | 3
 14 | 00:00:21,080 --> 00:00:26,359
 15 | syntax tree. And there's actually two phases here. There's the top down phase,
 16 | 
 17 | 4
 18 | 00:00:26,359 --> 00:00:30,800
 19 | in which the type environment is passed down the tree. And there's a bottom up
 20 | 
 21 | 5
 22 | 00:00:30,800 --> 00:00:36,370
 23 | phase in which the types are passed back up. So we start at the root of the tree
 24 | 
 25 | 6
 26 | 00:00:36,370 --> 00:00:39,790
 27 | with an initial type environment, this type of environment is passed down
 28 | 
 29 | 7
 30 | 00:00:39,790 --> 00:00:43,300
 31 | recursively through the various nodes of the abstract syntax tree until we hit the
 32 | 
 33 | 8
 34 | 00:00:43,300 --> 00:00:47,800
 35 | leaves. And starting at the leaves we use the environment to compute the types of
 36 | 
 37 | 9
 38 | 00:00:47,800 --> 00:00:54,800
 39 | each sub-expression working our way back up the tree to the root. Let's start our
 40 | 
 41 | 10
 42 | 00:00:55,550 --> 00:00:58,710
 43 | discussion of the implementation of cool type checking with one of the simpler
 44 | 
 45 | 11
 46 | 00:00:58,710 --> 00:01:02,920
 47 | rules in the type system, the rule for addition. And let's just briefly review
 48 | 
 49 | 12
 50 | 00:01:02,920 --> 00:01:09,830
 51 | what this rule says. It says that the type check E one plus E two, we first have to
 52 | 
 53 | 13
 54 | 00:01:09,830 --> 00:01:15,810
 55 | type check E one, and then we have to type check the sub expression E two. And both
 56 | 
 57 | 14
 58 | 00:01:15,810 --> 00:01:21,620
 59 | of those sub expressions have to have type end. And if they do then we can conclude
 60 | 
 61 | 15
 62 | 00:01:21,620 --> 00:01:26,850
 63 | that the overall expression, the sum of the two sub expressions, also has type A.
 64 | 
 65 | 16
 66 | 00:01:26,850 --> 00:01:30,560
 67 | And furthermore, this type checking is carried out in some environment. In this
 68 | 
 69 | 17
 70 | 00:01:30,560 --> 00:01:34,640
 71 | case, the environment is the same for the entire expression and both sub
 72 | 
 73 | 18
 74 | 00:01:34,640 --> 00:01:39,550
 75 | expressions. Just, just to remind you, there's always an object environment for
 76 | 
 77 | 19
 78 | 00:01:39,550 --> 00:01:44,390
 79 | the object names and scope, a method environment for the methods of the various
 80 | 
 81 | 20
 82 | 00:01:44,390 --> 00:01:49,909
 83 | classes and we always need to know the current class. Now how will we implement
 84 | 
 85 | 21
 86 | 00:01:49,909 --> 00:01:55,090
 87 | this? Well we will have a recursive function called type check It takes two
 88 | 
 89 | 22
 90 | 00:01:55,090 --> 00:01:59,110
 91 | arguments, it takes an [inaudible] environment and this will be a record, I'm
 92 | 
 93 | 23
 94 | 00:01:59,110 --> 00:02:03,220
 95 | not specifying exactly how this record is declared but it is essentially going to be
 96 | 
 97 | 24
 98 | 00:02:03,220 --> 00:02:08,920
 99 | three parts m, o, and c. And it also takes an expression, and so here we are just
100 | 
101 | 25
102 | 00:02:08,919 --> 00:02:13,819
103 | doing the case here where the expression is E1 + E2. And what should the code look
104 | 
105 | 26
106 | 00:02:13,819 --> 00:02:19,459
107 | like? Well, we can pretty much just read the rule and translate directly into code,
108 | 
109 | 27
110 | 00:02:19,459 --> 00:02:23,260
111 | and this is one of the nice things about the notation for type systems, is that it
112 | 
113 | 28
114 | 00:02:23,260 --> 00:02:29,730
115 | really tells you very, very clearly how to write the implementation from the
116 | 
117 | 29
118 | 00:02:29,730 --> 00:02:34,760
119 | description. So what's the first thing we have to do? Well we have to type-check the
120 | 
121 | 30
122 | 00:02:34,760 --> 00:02:39,870
123 | sub expression E1. And we can see from the rule that the environment in which E1 is
124 | 
125 | 31
126 | 00:02:39,870 --> 00:02:44,819
127 | type checked is exactly the same as the environment of E1 plus E2. So we just pass
128 | 
129 | 32
130 | 00:02:44,819 --> 00:02:49,609
131 | whatever our original environment argument was for E1 plus E2. We pass an, an
132 | 
133 | 33
134 | 00:02:49,609 --> 00:02:54,279
135 | argument on, to a recursive call of the type check, to type check the sub
136 | 
137 | 34
138 | 00:02:54,279 --> 00:03:00,060
139 | expression E1. And that type-checking will run and it will return some type T1, and
140 | 
141 | 35
142 | 00:03:00,060 --> 00:03:04,529
143 | we don't know that T1 is an integer at this point. We're gonna have to check
144 | 
145 | 36
146 | 00:03:04,529 --> 00:03:11,529
147 | that, so we just remember what the type of E1 is. And, furthermore we type check E2,
148 | 
149 | 37
150 | 00:03:12,540 --> 00:03:17,189
151 | okay? And that also happens in the same environment, we can see that here in the
152 | 
153 | 38
154 | 00:03:17,189 --> 00:03:24,189
155 | rule. And again we'll get back some type for E2, so type T2. And then we confirm
156 | 
157 | 39
158 | 00:03:25,529 --> 00:03:32,529
159 | that both T1 and T2 are type integer. And we could have done a, the track that T1
160 | 
161 | 40
162 | 00:03:32,849 --> 00:03:39,040
163 | is, is int a, right away, right after we had the type check T1 that would be a fine
164 | 
165 | 41
166 | 00:03:39,040 --> 00:03:44,449
167 | thing to do. Here just to save space on the slide I have to clip the checks for T1
168 | 
169 | 42
170 | 00:03:44,449 --> 00:03:51,449
171 | and T2 a, on one line. And if that check succeeds. If it doesn't succeed,
172 | 
173 | 43
174 | 00:03:51,540 --> 00:03:55,859
175 | presumably there should be some code in here, to, print out an error message But
176 | 
177 | 44
178 | 00:03:55,859 --> 00:04:01,269
179 | if that, if both T1 and T2 are in fact integers, than the type of the whole
180 | 
181 | 45
182 | 00:04:01,269 --> 00:04:05,459
183 | expression is also an integer. So that's what's returned by this call, by the
184 | 
185 | 46
186 | 00:04:05,459 --> 00:04:11,249
187 | outermost call here, to the type check function. So now let's take a look at a
188 | 
189 | 47
190 | 00:04:11,249 --> 00:04:15,409
191 | somewhat more complex type checking rule and its implementation. Here's the rule
192 | 
193 | 48
194 | 00:04:15,409 --> 00:04:21,519
195 | for a net with initialization. So we're declaring a variable x, of type t. And
196 | 
197 | 49
198 | 00:04:21,519 --> 00:04:26,300
199 | that's going to be visible in the expression E1. But before we execute E1,
200 | 
201 | 50
202 | 00:04:26,300 --> 00:04:31,169
203 | we're going to initialize X to the value of E0. And then after we've evaluated the
204 | 
205 | 51
206 | 00:04:31,169 --> 00:04:37,389
207 | entire let expression, we expect to get back something of type T1 And now, for all
208 | 
209 | 52
210 | 00:04:37,389 --> 00:04:41,960
211 | of that to work out. A few things have to be satisfied and those are listed as
212 | 
213 | 53
214 | 00:04:41,960 --> 00:04:48,960
215 | premises here of the rule. First of all E0 has to have some type T0 which is a
216 | 
217 | 54
218 | 00:04:49,330 --> 00:04:54,580
219 | subtype of T. And that's to guarantee that this initialization is correct, that X can
220 | 
221 | 55
222 | 00:04:54,580 --> 00:05:01,580
223 | actually hold something of E0's type. And, for the entire expression to have type T1,
224 | 
225 | 56
226 | 00:05:01,770 --> 00:05:07,300
227 | well then E1 has to have type T1. But that type checking is carried out in an
228 | 
229 | 57
230 | 00:05:07,300 --> 00:05:12,199
231 | environment that's extended with the declaration for X. So we, so we also know
232 | 
233 | 58
234 | 00:05:12,199 --> 00:05:19,199
235 | within E1, that X has type T. So now let's write the type-checking case for this. So
236 | 
237 | 59
238 | 00:05:20,550 --> 00:05:24,509
239 | the function type check is again is gonna take an environment as argument, and now
240 | 
241 | 60
242 | 00:05:24,509 --> 00:05:31,509
243 | we're doing a case for a led with initialization. So, just reading of the,
244 | 
245 | 61
246 | 00:05:32,180 --> 00:05:35,960
247 | the rules and what the conditions are that we have to check, we can see that one of
248 | 
249 | 62
250 | 00:05:35,960 --> 00:05:39,569
251 | the first things we have to do, or one of the things we have to do, is to check that
252 | 
253 | 63
254 | 00:05:39,569 --> 00:05:44,909
255 | E zero has some type T zero. So we just have a recursive call to type check here.
256 | 
257 | 64
258 | 00:05:44,909 --> 00:05:47,870
259 | This is carried out in the same environment as the overall expression. So
260 | 
261 | 65
262 | 00:05:47,870 --> 00:05:51,539
263 | we just pass the environment on to the recursive call And now we're just type
264 | 
265 | 66
266 | 00:05:51,539 --> 00:05:58,539
267 | checking E zero and we record its type T zero. So the second premise is implemented
268 | 
269 | 67
270 | 00:05:59,740 --> 00:06:06,569
271 | like this. Now we're type checking E1 and we expect it to have some type T1, but now
272 | 
273 | 68
274 | 00:06:06,569 --> 00:06:09,590
275 | the environment is different, so we're taking the original environment, the
276 | 
277 | 69
278 | 00:06:09,590 --> 00:06:15,430
279 | overall environment of the expression and we're adding, a declaration that X has
280 | 
281 | 70
282 | 00:06:15,430 --> 00:06:19,009
283 | type T to that environment. So we're extending the environment, with an
284 | 
285 | 71
286 | 00:06:19,009 --> 00:06:25,400
287 | additional variable declaration. Okay? And so we do that type checking call, and we
288 | 
289 | 72
290 | 00:06:25,400 --> 00:06:31,629
291 | get back a type T1. Now, we have to check that T0 is a sub-type of T. So that's a,
292 | 
293 | 73
294 | 00:06:31,629 --> 00:06:37,259
295 | that's a call to some function that implements the sub-typing relationship,
296 | 
297 | 74
298 | 00:06:37,259 --> 00:06:43,479
299 | and if, if that passes, if that check passes, well then we're done. And we can
300 | 
301 | 75
302 | 00:06:43,479 --> 00:06:47,169
303 | return the type T1. And there's a little mistake here on the slide, there shouldn't
304 | 
305 | 76
306 | 00:06:47,169 --> 00:06:53,740
307 | be a semicolon there. S o we just return T1 as the type of the entire expression.
308 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/10-01-A+Static+vs.+Dynamic+Typing.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,470 --> 00:00:10,470
  3 | In this video, we're gonna talk about static typing versus dynamic typing. One
  4 | 
  5 | 1
  6 | 00:00:10,540 --> 00:00:13,950
  7 | way to think about the purpose of static types system is to prevent common
  8 | 
  9 | 2
 10 | 00:00:13,950 --> 00:00:17,609
 11 | programming errors and they do this at compile times. So they do this when the
 12 | 
 13 | 3
 14 | 00:00:17,609 --> 00:00:23,230
 15 | program is compiled. And in particular they do it without knowing any input to
 16 | 
 17 | 4
 18 | 00:00:23,230 --> 00:00:26,960
 19 | the program. So the only thing that is available is the program text, and that's
 20 | 
 21 | 5
 22 | 00:00:26,960 --> 00:00:30,759
 23 | why we call them static Because they don't involve any of the dynamic behavior, the
 24 | 
 25 | 6
 26 | 00:00:30,759 --> 00:00:36,170
 27 | actual execution behavior of the program. Now any type system that is correct, any
 28 | 
 29 | 7
 30 | 00:00:36,170 --> 00:00:40,180
 31 | static type system that actually does the right thing, is going to have to disallow
 32 | 
 33 | 8
 34 | 00:00:40,180 --> 00:00:45,690
 35 | some correct programs. It can't reason completely precisely at compile time about
 36 | 
 37 | 9
 38 | 00:00:45,690 --> 00:00:49,030
 39 | everything that could happen as the program runs. Now what this means is that
 40 | 
 41 | 10
 42 | 00:00:49,030 --> 00:00:53,470
 43 | some correct programs, by that I mean some programs that would actually run correctly
 44 | 
 45 | 11
 46 | 00:00:53,470 --> 00:00:58,760
 47 | if you executed them, are going to have to be disallowed by the type checker. And so
 48 | 
 49 | 12
 50 | 00:00:58,760 --> 00:01:02,250
 51 | for this reason some people argue for a dynamic type checking instead, and this is
 52 | 
 53 | 13
 54 | 00:01:02,250 --> 00:01:07,970
 55 | type checking that's done solely when the program runs. So at run time we check
 56 | 
 57 | 14
 58 | 00:01:07,970 --> 00:01:11,920
 59 | whether the actual operations were executing are appropriate for the actual
 60 | 
 61 | 15
 62 | 00:01:11,920 --> 00:01:17,310
 63 | data that arises when the program executes. Other people say well the
 64 | 
 65 | 16
 66 | 00:01:17,310 --> 00:01:21,860
 67 | problem is really just that the type systems just aren't expressive enough and
 68 | 
 69 | 17
 70 | 00:01:21,860 --> 00:01:28,700
 71 | we should work on fancier static type checking systems. And. Over time, there's
 72 | 
 73 | 18
 74 | 00:01:28,700 --> 00:01:32,460
 75 | been a considerable development in both camps. We see a lot of new dynamically
 76 | 
 77 | 19
 78 | 00:01:32,460 --> 00:01:37,060
 79 | type checked languages, coming out, so a lot of the modern scripting like languages
 80 | 
 81 | 20
 82 | 00:01:37,060 --> 00:01:41,250
 83 | and domain specific languages have some form of dynamic type checking. Other
 84 | 
 85 | 21
 86 | 00:01:41,250 --> 00:01:46,590
 87 | people have been working on fancier and fancier type systems and actually there's
 88 | 
 89 | 22
 90 | 00:01:46,590 --> 00:01:53,140
 91 | been a lot of progress, in static checking The disadvantage of the more expressive
 92 | 
 93 | 23
 94 | 00:01:53,140 --> 00:01:57,479
 95 | text. Time checking systems they do tend to get more complicated though, and not
 96 | 
 97 | 24
 98 | 00:01:57,479 --> 00:01:59,950
 99 | all of these features that these people have develop have actually found their way
100 | 
101 | 25
102 | 00:01:59,950 --> 00:02:06,950
103 | yet into main stream languages. Now , one important idea that this discussion
104 | 
105 | 26
106 | 00:02:07,240 --> 00:02:12,130
107 | suggests is that there are two different notions of type. There is the dynamic
108 | 
109 | 27
110 | 00:02:12,130 --> 00:02:16,920
111 | type. That is the type that the object or the value that we're talking about
112 | 
113 | 28
114 | 00:02:16,920 --> 00:02:23,920
115 | actually has at run time And then there is the static type, which is the compile time
116 | 
117 | 29
118 | 00:02:24,500 --> 00:02:29,390
119 | notion, what the type checker knows about the object. And there is some relationship
120 | 
121 | 30
122 | 00:02:29,390 --> 00:02:34,230
123 | that has to exist between the static type and the dynamic type if the static type
124 | 
125 | 31
126 | 00:02:34,230 --> 00:02:41,230
127 | checker is to be correct. In this relationship can be formalized by some
128 | 
129 | 32
130 | 00:02:43,920 --> 00:02:47,720
131 | kind of a theorem that proves something like the following, what we'd like to know
132 | 
133 | 33
134 | 00:02:47,720 --> 00:02:52,270
135 | is that for every expression E, for every program expression E that you can write in
136 | 
137 | 34
138 | 00:02:52,270 --> 00:02:57,900
139 | the programming language, the static type that the compiler says that the, the
140 | 
141 | 35
142 | 00:02:57,900 --> 00:03:02,739
143 | expression is going to have is equal to the dynamic type of that expression.
144 | 
145 | 36
146 | 00:03:02,739 --> 00:03:07,510
147 | Another way is saying that if you actually run the program. Then you get something
148 | 
149 | 37
150 | 00:03:07,510 --> 00:03:11,650
151 | that is consistent with what you expected to get from the static type checker. That
152 | 
153 | 38
154 | 00:03:11,650 --> 00:03:16,420
155 | the static type checker is actually able to correctly predict what values will hap,
156 | 
157 | 39
158 | 00:03:16,420 --> 00:03:21,450
159 | will, will arise at run time. And in fact in the early days of programming languages
160 | 
161 | 40
162 | 00:03:21,450 --> 00:03:26,290
163 | these were exactly the kinds of terms we had for the very simple type systems in
164 | 
165 | 41
166 | 00:03:26,290 --> 00:03:31,640
167 | the languages at that time. Now the situations a little more complicated for a
168 | 
169 | 42
170 | 00:03:31,640 --> 00:03:37,050
171 | language like COOL. So let's take a look at the execution of a, a typical COOL
172 | 
173 | 43
174 | 00:03:37,050 --> 00:03:41,269
175 | program. So here's a couple of classes, class A and a class B that inherits from
176 | 
177 | 44
178 | 00:03:41,269 --> 00:03:47,459
179 | A. So B is going to be a subtype of A which we'll write like that. And now we
180 | 
181 | 45
182 | 00:03:47,459 --> 00:03:53,870
183 | have a declaration here of X having type A and this is the static type of X. So the
184 | 
185 | 46
186 | 00:03:53,870 --> 00:04:00,870
187 | static type of X is A. And that's what the compiler knows about X's value. And then
188 | 
189 | 47
190 | 00:04:03,260 --> 00:04:09,190
191 | here, when we execute this line of code, we can see that we assign a new A object
192 | 
193 | 48
194 | 00:04:09,190 --> 00:04:11,739
195 | to X. And the fact that it's new is not important. All that's important is the
196 | 
197 | 49
198 | 00:04:11,739 --> 00:04:16,840
199 | fact that it's an A object. And so, at this point, the dynamic type of X is also
200 | 
201 | 50
202 | 00:04:16,839 --> 00:04:22,090
203 | A. Okay. So if this line of code when it actually execu tes A, which was declared
204 | 
205 | 51
206 | 00:04:22,089 --> 00:04:27,110
207 | to have static type A, actually holds an object of class A. But a little bit later
208 | 
209 | 52
210 | 00:04:27,110 --> 00:04:31,580
211 | on, down at this line of code, the dynamic type is actually different. The dynamic
212 | 
213 | 53
214 | 00:04:31,580 --> 00:04:38,580
215 | type here of X Is going to be B. K line of code executes x holds a b object even
216 | 
217 | 54
218 | 00:04:40,990 --> 00:04:45,370
219 | though it's declared Have a different type. And this is a very, very important
220 | 
221 | 55
222 | 00:04:45,370 --> 00:04:48,930
223 | distinction to keep in mind. So there's a static type, there's a type that the
224 | 
225 | 56
226 | 00:04:48,930 --> 00:04:53,340
227 | compiler knows about, and that's invariant. X has type A It always has type
228 | 
229 | 57
230 | 00:04:53,340 --> 00:05:00,220
231 | a All the uses of x for the entire scope of our typed with class A by the compiler.
232 | 
233 | 58
234 | 00:05:00,220 --> 00:05:03,590
235 | But at run time, because we have assignments and we can assign different
236 | 
237 | 59
238 | 00:05:03,590 --> 00:05:08,660
239 | objects to x, x can actually take on objects of different types, different run
240 | 
241 | 60
242 | 00:05:08,660 --> 00:05:15,660
243 | time types. Type b, that's assigned x when the program executes.
244 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/10-01-B+Static+vs.+Dynamic+Typing.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:02,139 --> 00:00:04,269
 3 | What this means is that the soundness theorem for the Cool type
 4 | 
 5 | 1
 6 | 00:00:04,269 --> 00:00:09,160
 7 | system is a bit more complicated than the one for simple type systems. So in the
 8 | 
 9 | 2
10 | 00:00:09,160 --> 00:00:14,869
11 | presence of sub-typing, the, the property that we want is that the static type, computed
12 | 
13 | 3
14 | 00:00:14,869 --> 00:00:20,999
15 | by the compiler for a given expression E, is going to be a correct predictor of all
16 | 
17 | 4
18 | 00:00:20,999 --> 00:00:25,230
19 | the possible dynamic types that he could have, and we do that by using the sub typing
20 | 
21 | 5
22 | 00:00:25,230 --> 00:00:29,480
23 | relationship right here. So we say that with every dynamic type E could have
24 | 
25 | 6
26 | 00:00:29,480 --> 00:00:34,980
27 | whatever types E can take on at run time, they have to be a sub type of the single static
28 | 
29 | 7
30 | 00:00:34,980 --> 00:00:41,730
31 | type that is predicted for E. What this means is that all the operations that
32 | 
33 | 8
34 | 00:00:41,730 --> 00:00:46,370
35 | can be used on an object of type C have to also be able to be used on any object of type
36 | 
37 | 9
38 | 00:00:46,370 --> 00:00:52,320
39 | C prime that's a subtype of C. So if C defines certain attributes and methods,
40 | 
41 | 10
42 | 00:00:52,320 --> 00:00:59,320
43 | then all of those attributes and methods have to be available in C prime. And therefore
44 | 
45 | 11
46 | 00:00:59,500 --> 00:01:05,379
47 | sub classes can only add attributes or methods. So whatever attributes
48 | 
49 | 12
50 | 00:01:05,379 --> 00:01:10,579
51 | and methods a sub class will have, what c, c prime in this case, those
52 | 
53 | 13
54 | 00:01:10,579 --> 00:01:16,850
55 | are all in addition to what c has. So c prime, a sub class, will never remove an attribute
56 | 
57 | 14
58 | 00:01:16,850 --> 00:01:21,600
59 | or remove a method. It will only extend or add methods and attributes to the
60 | 
61 | 15
62 | 00:01:21,600 --> 00:01:25,880
63 | class it is inheriting fro M. And note that you're allowed to redefine methods in
64 | 
65 | 16
66 | 00:01:25,880 --> 00:01:30,530
67 | cool, and in most object oriented languages, but you cannot change the type.
68 | 
69 | 17
70 | 00:01:30,530 --> 00:01:34,890
71 | So even though you can redefine the code that goes with that method, it still has to
72 | 
73 | 18
74 | 00:01:34,890 --> 00:01:39,219
75 | type check according to the original type that you declared. And so whatever type
76 | 
77 | 19
78 | 00:01:39,219 --> 00:01:43,259
79 | the method has in the first class in which it's defined, it's going to have that
80 | 
81 | 20
82 | 00:01:43,259 --> 00:01:47,289
83 | same type, that same argument and result, the same types for the method arguments,
84 | 
85 | 21
86 | 00:01:47,289 --> 00:01:51,009
87 | and the same types for the method result in all of the sub classes. And
88 | 
89 | 22
90 | 00:01:51,009 --> 00:01:56,010
91 | that's a pretty standard, design point for a lot of object oriented languages.
92 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/10-03-B+Self+Type+Operations.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:00,530 --> 00:00:03,679
 3 | So now, let's move on to the least upper bound operation. And once again, T and
 4 | 
 5 | 1
 6 | 00:00:03,679 --> 00:00:08,500
 7 | T prime will be any types except self-type. The least upper bound of self-type
 8 | 
 9 | 2
10 | 00:00:08,500 --> 00:00:12,530
11 | with itself is just self-type, and I think that that's pret ty clear. The
12 | 
13 | 3
14 | 00:00:12,530 --> 00:00:17,550
15 | least upper bound of self-types of C and T will be the least upper bound of the class
16 | 
17 | 4
18 | 00:00:17,550 --> 00:00:22,300
19 | C and T. And once again, this is because C is the largest type, that self-type could
20 | 
21 | 5
22 | 00:00:22,300 --> 00:00:28,180
23 | be. And therefore, the largest type is guaranteed to cover both self-types of C.
24 | 
25 | 6
26 | 00:00:28,180 --> 00:00:35,180
27 | And T is going to be the least upper bound of C and T. And least of our bounds is the
28 | 
29 | 7
30 | 00:00:35,339 --> 00:00:41,449
31 | symmetric operations, so if I reverse these two arguments, the answer is the same.
32 | 
33 | 8
34 | 00:00:41,449 --> 00:00:44,649
35 | And finally if self-type is not one of the arguments at least upper bound then
36 | 
37 | 9
38 | 00:00:44,649 --> 00:00:48,809
39 | we just do what we do before. The self-type, the least upper bound definition
40 | 
41 | 10
42 | 00:00:48,809 --> 00:00:53,719
43 | excuse me, has not changed for class names for type names other than self-type.
44 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/10-04-self-type-usage.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,350 --> 00:00:07,759
  3 | Now that we've seen some of the operations on self type, in this video we're going to
  4 | 
  5 | 1
  6 | 00:00:07,759 --> 00:00:14,759
  7 | talk about where self type can be used in Cool. The parser checks if self type
  8 | 
  9 | 2
 10 | 00:00:15,709 --> 00:00:19,340
 11 | appears only where types are permitted, but that's in fact a little bit too
 12 | 
 13 | 3
 14 | 00:00:19,340 --> 00:00:24,130
 15 | permissive. There are places where some other types can appear but self type
 16 | 
 17 | 4
 18 | 00:00:24,130 --> 00:00:28,330
 19 | cannot, and so the purpose of this particular video is to go over the various
 20 | 
 21 | 5
 22 | 00:00:28,330 --> 00:00:34,340
 23 | rules for the usage of self type. So let's begin with, a very simple rule. So, self
 24 | 
 25 | 6
 26 | 00:00:34,340 --> 00:00:39,010
 27 | type is not a class name so it can't appear in a class definition, can neither
 28 | 
 29 | 7
 30 | 00:00:39,010 --> 00:00:46,010
 31 | be the name of the class nor the class that is inherited from. In attribute
 32 | 
 33 | 8
 34 | 00:00:46,120 --> 00:00:50,219
 35 | declarations the type of attribute in this case we have an attribute x and is
 36 | 
 37 | 9
 38 | 00:00:50,219 --> 00:00:54,829
 39 | declared to have type t, it is okay for t to be self type so it's fine to have
 40 | 
 41 | 10
 42 | 00:00:54,829 --> 00:01:01,829
 43 | attributes that are declared to be the self type of the class. Similarly, it's
 44 | 
 45 | 11
 46 | 00:01:02,600 --> 00:01:08,799
 47 | fine to have local let down variables that have type self type. And it's fine to
 48 | 
 49 | 12
 50 | 00:01:08,799 --> 00:01:13,740
 51 | allocate a new object of type self type. And what this actually does is that it
 52 | 
 53 | 13
 54 | 00:01:13,740 --> 00:01:20,009
 55 | allocates an object that has the same dynamic type as the self object. So,
 56 | 
 57 | 14
 58 | 00:01:20,009 --> 00:01:23,670
 59 | whatever the type of the self object happens to be, which is not necessarily
 60 | 
 61 | 15
 62 | 00:01:23,670 --> 00:01:30,170
 63 | the type of the enclosing class at run time, the u t operation will create a new
 64 | 
 65 | 16
 66 | 00:01:30,170 --> 00:01:36,780
 67 | object of that dynamic type. The type named in aesthetic dispatch cannot be self
 68 | 
 69 | 17
 70 | 00:01:36,780 --> 00:01:43,780
 71 | type, again because it has to be an actual class name. Finally let's consider method
 72 | 
 73 | 18
 74 | 00:01:45,609 --> 00:01:48,189
 75 | definitions. So, here's a very simple method definition. It has one formal
 76 | 
 77 | 19
 78 | 00:01:48,189 --> 00:01:53,689
 79 | parameter X of type T and the method returns something of type T prime. And it
 80 | 
 81 | 20
 82 | 00:01:53,689 --> 00:01:58,729
 83 | turns out that only T prime, only the return type can be of type self-type. No
 84 | 
 85 | 21
 86 | 00:01:58,729 --> 00:02:04,619
 87 | argument type can be of type self-type. And to see why let's, I can show it
 88 | 
 89 | 22
 90 | 00:02:04,619 --> 00:02:09,890
 91 | actually two different ways. Why, why this has to be the case. And we'll do both
 92 | 
 93 | 23
 94 | 00:02:09,889 --> 00:02:15,500
 95 | because this is actually important. So, let's think about a dispatch to this a
 96 | 
 97 | 24
 98 | 00:02:15,500 --> 00:02:20,730
 99 | method, so let's say we have some expression e, and we call method m, and we
100 | 
101 | 25
102 | 00:02:20,730 --> 00:02:26,250
103 | have some argument e prime. And now, let's say the argument e prime As the
104 | 
105 | 26
106 | 00:02:26,250 --> 00:02:33,250
107 | type t zero. So if you recall the rule for method calls, t zero is gonna have to be a
108 | 
109 | 27
110 | 00:02:34,450 --> 00:02:39,530
111 | sub type of the type of the fall parameter. We're gonna be passing this in,
112 | 
113 | 28
114 | 00:02:39,530 --> 00:02:43,910
115 | so whatever type x is declared to have here has to be a super type of the type of
116 | 
117 | 29
118 | 00:02:43,910 --> 00:02:49,260
119 | the actual argument. So that means that t zero is going to have to be a sub type of,
120 | 
121 | 30
122 | 00:02:49,260 --> 00:02:55,230
123 | now let's assume that the argument can be of type self type. Some view that t zero
124 | 
125 | 31
126 | 00:02:55,230 --> 00:03:01,250
127 | has to be a subtype of self type this is in some class c wherever this is defined,
128 | 
129 | 32
130 | 00:03:01,250 --> 00:03:06,780
131 | and remember that we said This was always false that you couldn't have self type on
132 | 
133 | 33
134 | 00:03:06,780 --> 00:03:13,430
135 | the right hand side and a regular type on the left hand side. Because that would
136 | 
137 | 34
138 | 00:03:13,430 --> 00:03:18,510
139 | lead to problems that would, that we would never be able to prove that in general for
140 | 
141 | 35
142 | 00:03:18,510 --> 00:03:24,090
143 | a, that, that a type is actually a sub type of self type because self type can
144 | 
145 | 36
146 | 00:03:24,090 --> 00:03:28,680
147 | vary over all the sub types of the class C. So that's one way to see that we can't
148 | 
149 | 37
150 | 00:03:28,680 --> 00:03:34,450
151 | allow method parameters, to be typed self type, but it's also helpful to just think
152 | 
153 | 38
154 | 00:03:34,450 --> 00:03:40,230
155 | about executing the code or some example code and see what can go wrong. So here's
156 | 
157 | 39
158 | 00:03:40,230 --> 00:03:46,260
159 | an example. And let me just walk you through what happens if we allow a
160 | 
161 | 40
162 | 00:03:46,260 --> 00:03:52,270
163 | parameter to have type self type in this example. So there are two class
164 | 
165 | 41
166 | 00:03:52,270 --> 00:03:58,760
167 | definitions. Class A has a method comp for comparison, and it takes one argument of
168 | 
169 | 42
170 | 00:03:58,760 --> 00:04:03,100
171 | type self-type. And it returns a bull. So the idea here is that the comparison
172 | 
173 | 43
174 | 00:04:03,100 --> 00:04:07,730
175 | operation probably compares the, this parameter with the argument and returns
176 | 
177 | 44
178 | 00:04:07,730 --> 00:04:13,170
179 | true or false. Then, there's a second class B, and B is a sub-type of A, it
180 | 
181 | 45
182 | 00:04:13,170 --> 00:04:20,170
183 | inherits from A. And it has one new field B little b here of type int. And now the
184 | 
185 | 46
186 | 00:04:21,649 --> 00:04:27,460
187 | comparison function in class B is overridden has the same signature as the
188 | 
189 | 47
190 | 00:04:27,460 --> 00:04:33,370
191 | comparison function or the comp function in class A. But, the, the method body here
192 | 
193 | 48
194 | 00:04:33,370 --> 00:04:40,370
195 | accesses the field B. And now let's take a look at what happens with a piece of code
196 | 
197 | 49
198 | 00:04:40,990 --> 00:04:46,090
199 | that uses these two classes. So, here X is going to be declared to be of type A. But
200 | 
201 | 50
202 | 00:04:46,090 --> 00:04:50,750
203 | we're going to assign it something of type B. And here we're notice that there's a
204 | 
205 | 51
206 | 00:04:50,750 --> 00:04:55,370
207 | gap between the static type, which will be A, and the dynamic type, which will be B.
208 | 
209 | 52
210 | 00:04:55,370 --> 00:05:02,370
211 | And that's actually key to the problem. And now, we Invoke the cup method on X and
212 | 
213 | 53
214 | 00:05:03,340 --> 00:05:08,600
215 | we pass it a new A object. And so what happens, well this type checks just fine
216 | 
217 | 54
218 | 00:05:08,600 --> 00:05:15,130
219 | because X is in class A. X is of type A and this argument is also of type A. So if
220 | 
221 | 55
222 | 00:05:15,130 --> 00:05:18,880
223 | self-type if having an argument type self-type is ever going to work it has to
224 | 
225 | 56
226 | 00:05:18,880 --> 00:05:25,530
227 | work for this example where the two static types of the, of the dispatched of this
228 | 
229 | 57
230 | 00:05:25,530 --> 00:05:29,880
231 | parameter and the former parameter are exactly the same. So that clearly has to
232 | 
233 | 58
234 | 00:05:29,880 --> 00:05:35,050
235 | be allowed if we allow self type as the type of the argument. And now let's think
236 | 
237 | 59
238 | 00:05:35,050 --> 00:05:41,860
239 | about what happens when it actually executes Is going to invoke the comp
240 | 
241 | 60
242 | 00:05:41,860 --> 00:05:48,260
243 | method in the b class okay, Because X is of dynamic type B. And then it's going to
244 | 
245 | 61
246 | 00:05:48,260 --> 00:05:53,230
247 | take the arguments and it's going to access its B field. But the argument is of
248 | 
249 | 62
250 | 00:05:53,230 --> 00:05:57,970
251 | dynamic type A and it has no B field. And so, this is actually going to cause a
252 | 
253 | 63
254 | 00:05:57,970 --> 00:06:02,310
255 | runtime crash. So, and just to go over that one more time, Just to make sure that
256 | 
257 | 64
258 | 00:06:02,310 --> 00:06:08,560
259 | it is clear. Here X has type A ut dynamic type B. The argument has static type A and
260 | 
261 | 65
262 | 00:06:08,560 --> 00:06:14,430
263 | dynamic type A and when this method gets invoked the argument That which is of
264 | 
265 | 66
266 | 00:06:14,430 --> 00:06:21,250
267 | dynamic type A, does not have the operations, all the fields and methods, of
268 | 
269 | 67
270 | 00:06:21,250 --> 00:06:25,680
271 | the class B And results in a run time, undefined behavior at run time.
272 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/10-05-B+Self+Type+Checking.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,660 --> 00:00:04,230
  3 | To wrap up this video, here are a few comments on
  4 | 
  5 | 1
  6 | 00:00:04,230 --> 00:00:09,440
  7 | implementing type checking in the presence of self-type. First of all, the extended
  8 | 
  9 | 2
 10 | 00:00:09,440 --> 00:00:15,030
 11 | subtyping and least upper bound operations can do a lot of the work. If you extend
 12 | 
 13 | 3
 14 | 00:00:15,030 --> 00:00:18,470
 15 | subtyping and least upper bound the way that we did then a lot of the rules don't
 16 | 
 17 | 4
 18 | 00:00:18,470 --> 00:00:22,000
 19 | have to change and for the most part, you don't have to do anything special for self
 20 | 
 21 | 5
 22 | 00:00:22,000 --> 00:00:27,830
 23 | type. Self-type can only be used in a very few places in the language and it's up to
 24 | 
 25 | 6
 26 | 00:00:27,830 --> 00:00:30,699
 27 | you to check that it isn't used anywhere else. Those restrictions have to be
 28 | 
 29 | 7
 30 | 00:00:30,699 --> 00:00:36,940
 31 | followed carefully. And finally, for the most part, a use of self-type always
 32 | 
 33 | 8
 34 | 00:00:36,940 --> 00:00:41,410
 35 | refers to any sub-type of the current class. There is one exception to this in
 36 | 
 37 | 9
 38 | 00:00:41,410 --> 00:00:46,649
 39 | the type checking of dispatch. In dispatch there's a method look up where we look up
 40 | 
 41 | 10
 42 | 00:00:46,649 --> 00:00:53,649
 43 | in some class C, a method F. And it's possible that, that method will have a
 44 | 
 45 | 11
 46 | 00:00:53,690 --> 00:01:00,340
 47 | return type self-type. And this class here c, may have nothing to do with the current
 48 | 
 49 | 12
 50 | 00:01:00,340 --> 00:01:04,059
 51 | class. We are dispatching here to a different class and whatever our current
 52 | 
 53 | 13
 54 | 00:01:04,059 --> 00:01:09,110
 55 | class is, this particular self type refers self type in That class, the class in
 56 | 
 57 | 14
 58 | 00:01:09,110 --> 00:01:12,090
 59 | which we're doing the look up, not whatever class in which we're doing type
 60 | 
 61 | 15
 62 | 00:01:12,090 --> 00:01:16,320
 63 | checking. And fortunately we never need to compare that self-type to any self -type
 64 | 
 65 | 16
 66 | 00:01:16,320 --> 00:01:21,120
 67 | in the current class so there's no cross talk between different kinds of self-type.
 68 | 
 69 | 17
 70 | 00:01:21,120 --> 00:01:26,600
 71 | And again this is the only place in the type checking rules where you look at a
 72 | 
 73 | 18
 74 | 00:01:26,600 --> 00:01:33,600
 75 | self-type that is not one in the current class. To summarize our discussion of
 76 | 
 77 | 19
 78 | 00:01:35,290 --> 00:01:39,080
 79 | self-type, self-type is still a research idea. It adds more expressiveness to the
 80 | 
 81 | 20
 82 | 00:01:39,080 --> 00:01:42,520
 83 | type system, and I think that's, yeah, easy to see, but you won't find self type
 84 | 
 85 | 21
 86 | 00:01:42,520 --> 00:01:47,890
 87 | in mainstream languages. Self-type by itself I think is not so important, except
 88 | 
 89 | 22
 90 | 00:01:47,890 --> 00:01:51,560
 91 | for the project that you're going to implement. Rather, the reason for
 92 | 
 93 | 23
 94 | 00:01:51,560 --> 00:01:55,440
 95 | including self-type is to illustrate that type checking can be quite subtle. It's
 96 | 
 97 | 24
 98 | 00:01:55,440 --> 00:02:00,180
 99 | not all INT plus INT equals INT. There are actually are fairly sophisticated things
100 | 
101 | 25
102 | 00:02:00,180 --> 00:02:05,880
103 | and fairly sophisticated reasoning that goes on inside type checking In practice,
104 | 
105 | 26
106 | 00:02:05,880 --> 00:02:09,780
107 | of course, there needs to be a balance between the complexity of the type-system
108 | 
109 | 27
110 | 00:02:09,780 --> 00:02:14,310
111 | and its expressiveness. So more complex type-systems are harder to learn and
112 | 
113 | 28
114 | 00:02:14,310 --> 00:02:18,770
115 | they're harder to use effectively, but they also allow you to write more programs.
116 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/11-01-runtime-organization.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,730 --> 00:00:10,730
  3 | In this video we're going to begin our discussion of run time systems. Now, at
  4 | 
  5 | 1
  6 | 00:00:12,670 --> 00:00:17,410
  7 | this point we have actually covered the entire front end of the compiler which
  8 | 
  9 | 2
 10 | 00:00:17,410 --> 00:00:22,679
 11 | consists of the three phases, lexical analysis, parsing and semantic analysis
 12 | 
 13 | 3
 14 | 00:00:22,679 --> 00:00:28,730
 15 | And these three passes or these three phases together, their job is to really
 16 | 
 17 | 4
 18 | 00:00:28,730 --> 00:00:35,730
 19 | enforce the language semantics or the language definition. So, we know. After
 20 | 
 21 | 5
 22 | 00:00:38,629 --> 00:00:43,550
 23 | these three phases are done that if no errors have been generated by anyone of
 24 | 
 25 | 6
 26 | 00:00:43,550 --> 00:00:47,590
 27 | those phases then the program is actually a valid program in the programming
 28 | 
 29 | 7
 30 | 00:00:47,590 --> 00:00:51,829
 31 | language that we're compiling And at this point the compiler is going to be able to
 32 | 
 33 | 8
 34 | 00:00:51,829 --> 00:00:56,910
 35 | produce codes to produce a translation of the program that you can actually execute.
 36 | 
 37 | 9
 38 | 00:00:56,910 --> 00:01:00,440
 39 | And I should say that of course. Enforcing the language definition is just one
 40 | 
 41 | 10
 42 | 00:01:00,440 --> 00:01:05,210
 43 | purpose of the front-end. The front-end also builds the data structures that are
 44 | 
 45 | 11
 46 | 00:01:05,209 --> 00:01:10,720
 47 | needed to do co-generation as we seen but there is a real. Once we get through the
 48 | 
 49 | 12
 50 | 00:01:10,720 --> 00:01:15,720
 51 | front-end we no longer looking for errors in the program. We're no longer trying to
 52 | 
 53 | 13
 54 | 00:01:15,720 --> 00:01:19,660
 55 | figure out whether it's a valid program. Now we're really down to the point where
 56 | 
 57 | 14
 58 | 00:01:19,660 --> 00:01:25,970
 59 | we're going to generate code And that is a job at the back end. So cogeneration is
 60 | 
 61 | 15
 62 | 00:01:25,970 --> 00:01:30,570
 63 | certainly part of it. The other big part of the back end is program optimization so
 64 | 
 65 | 16
 66 | 00:01:30,570 --> 00:01:35,790
 67 | doing transformations to improve the program. But before we can talk about
 68 | 
 69 | 17
 70 | 00:01:35,790 --> 00:01:42,230
 71 | either one of those things, we need to talk about Runtime organization And why is
 72 | 
 73 | 18
 74 | 00:01:42,230 --> 00:01:46,630
 75 | that, well because we need to understand what it is we're trying to generate before
 76 | 
 77 | 19
 78 | 00:01:46,630 --> 00:01:50,320
 79 | we can talk about how we generated and have that makes sense. So first, we're
 80 | 
 81 | 20
 82 | 00:01:50,320 --> 00:01:54,870
 83 | gonna talk about what the, the translator program looks like and how it's organized
 84 | 
 85 | 21
 86 | 00:01:54,870 --> 00:01:57,950
 87 | and then we'll talk about algorithms and code generation algorithms were actually
 88 | 
 89 | 22
 90 | 00:01:57,950 --> 00:02:02,710
 91 | producing those things. And this is a well-understood area or at least some very
 92 | 
 93 | 23
 94 | 00:02:02,710 --> 00:02:06,080
 95 | standard techniques that are widely used and those are the ones we wanted to cover
 96 | 
 97 | 24
 98 | 00:02:06,080 --> 00:02:13,080
 99 | and, and encourage you to use in your project. The main thing we're going to
100 | 
101 | 25
102 | 00:02:14,629 --> 00:02:19,489
103 | cover in this sequence of videos is the management of Runtime resources and in
104 | 
105 | 26
106 | 00:02:19,489 --> 00:02:24,310
107 | particular I'm going to be stressing the correspondence and the distinction between
108 | 
109 | 27
110 | 00:02:24,310 --> 00:02:29,879
111 | static and dynamic structures. So static structures are things that exist to
112 | 
113 | 28
114 | 00:02:29,879 --> 00:02:35,099
115 | compile time and dynamic structures, those are the things that exist or happen at
116 | 
117 | 29
118 | 00:02:35,099 --> 00:02:42,099
119 | Runtime And, this is probably the most important distinction for you to try to
120 | 
121 | 30
122 | 00:02:42,120 --> 00:02:45,930
123 | understand if you really want to understand how a compiler works. What
124 | 
125 | 31
126 | 00:02:45,930 --> 00:02:49,569
127 | happens to the compile time and what happens at run time. Having a clear
128 | 
129 | 32
130 | 00:02:49,569 --> 00:02:55,140
131 | separation in your mind between what is done by the compiler and what is deferred
132 | 
133 | 33
134 | 00:02:55,140 --> 00:02:59,709
135 | to when the target program or the generated program actually runs that is
136 | 
137 | 34
138 | 00:02:59,709 --> 00:03:05,819
139 | key to really understanding how compilers work. And we'll also be talking about
140 | 
141 | 35
142 | 00:03:05,819 --> 00:03:10,670
143 | storage organization. So how memory is used to store the data structures of the
144 | 
145 | 36
146 | 00:03:10,670 --> 00:03:17,670
147 | executing program. So let's begin at the beginning. So initially there is the
148 | 
149 | 37
150 | 00:03:19,310 --> 00:03:22,230
151 | operating system and the operating system is the only thing that is running on the
152 | 
153 | 38
154 | 00:03:22,230 --> 00:03:27,670
155 | machine and when a program is invoke. When the user says he wants to run a program,
156 | 
157 | 39
158 | 00:03:27,670 --> 00:03:32,370
159 | what happens while the operating system is going to allocate space for the program
160 | 
161 | 40
162 | 00:03:32,370 --> 00:03:37,819
163 | the code for the program is going to be loaded into that space and then the
164 | 
165 | 41
166 | 00:03:37,819 --> 00:03:42,499
167 | operating system is going to execute a job to the entry point or the main function of
168 | 
169 | 42
170 | 00:03:42,499 --> 00:03:49,349
171 | the program and then your program will be off and running. So, let's take a look at
172 | 
173 | 43
174 | 00:03:49,349 --> 00:03:54,200
175 | what the organization memory looks like very roughly when the Operating System
176 | 
177 | 44
178 | 00:03:54,200 --> 00:03:59,480
179 | began execution of the compiled program. So we're gonna draw our pictures of memory
180 | 
181 | 45
182 | 00:03:59,480 --> 00:04:04,859
183 | like this. That would be just a big block and there will be a starting address at
184 | 
185 | 46
186 | 00:04:04,859 --> 00:04:08,969
187 | the, a lower address and a higher address and this is all the memory that is
188 | 
189 | 47
190 | 00:04:08,969 --> 00:04:14,799
191 | allocated to your program. Now into some portion of that space goes to code for the
192 | 
193 | 48
194 | 00:04:14,799 --> 00:04:20,060
195 | program so the actual compiled code for the program is loaded usually at one end
196 | 
197 | 49
198 | 00:04:20,060 --> 00:04:25,320
199 | of the memory space allocated to the program. And then there is a bunch. Of
200 | 
201 | 50
202 | 00:04:25,320 --> 00:04:29,850
203 | other space that is going to be used for other things and we'll talk about that in
204 | 
205 | 51
206 | 00:04:29,850 --> 00:04:36,470
207 | a minute. Before going on, I want to say a few words about this pictures of Run-time
208 | 
209 | 52
210 | 00:04:36,470 --> 00:04:40,840
211 | Organization because I'm going to be drawing a lot of them over the next few
212 | 
213 | 53
214 | 00:04:40,840 --> 00:04:47,210
215 | videos. So, it's just traditional to have memory drawn as a rectangle with the low
216 | 
217 | 54
218 | 00:04:47,210 --> 00:04:52,340
219 | address at the top and the high address at the bottom. There's nothing magic about
220 | 
221 | 55
222 | 00:04:52,340 --> 00:04:55,620
223 | that, just a convention we could adjust it easily every verse or order of the
224 | 
225 | 56
226 | 00:04:55,620 --> 00:05:01,370
227 | address, no big deal. And then we'll be drawing lines to the limit different
228 | 
229 | 57
230 | 00:05:01,370 --> 00:05:07,530
231 | regions of this memory showing different kinds of data and how they're stored in
232 | 
233 | 58
234 | 00:05:07,530 --> 00:05:12,370
235 | the memory allocated to the program. And clearly these pictures are simplifications
236 | 
237 | 59
238 | 00:05:12,370 --> 00:05:17,450
239 | if this, is a virtual memory system for example, there's no guarantee that these
240 | 
241 | 60
242 | 00:05:17,450 --> 00:05:23,370
243 | data is actually laid out contiguously but it helps to understand you know, what the
244 | 
245 | 61
246 | 00:05:23,370 --> 00:05:29,500
247 | different kinds of data are. And what the, a compiler needs to do with them to have
248 | 
249 | 62
250 | 00:05:29,500 --> 00:05:36,500
251 | simple pictures like this. So coming back to our picture of run time organization,
252 | 
253 | 63
254 | 00:05:37,580 --> 00:05:43,350
255 | we have some block memory and the first portion of that is occupied by the actual
256 | 
257 | 64
258 | 00:05:43,350 --> 00:05:46,970
259 | generated code for the program and then there was this other space and we're what
260 | 
261 | 65
262 | 00:05:46,970 --> 00:05:51,930
263 | goes to that space? Well, what goes to that space is the data for the program. So
264 | 
265 | 66
266 | 00:05:51,930 --> 00:05:57,520
267 | all the data is in the rest of the space and the tricky thing about code generation
268 | 
269 | 67
270 | 00:05:57,520 --> 00:06:01,870
271 | that the compiler is responsible for generating the code but it's also
272 | 
273 | 68
274 | 00:06:01,870 --> 00:06:06,620
275 | responsible for orchestrating the data. So the compiler has to decide what the lay of
276 | 
277 | 69
278 | 00:06:06,620 --> 00:06:11,530
279 | the data is going to be and then generate code that correctly manipulates that data
280 | 
281 | 70
282 | 00:06:11,530 --> 00:06:16,810
283 | so there are references of course in the code. To the data and the code and data
284 | 
285 | 71
286 | 00:06:16,810 --> 00:06:20,500
287 | need to be designed, the code and the layout of the data, excuse me, need to be
288 | 
289 | 72
290 | 00:06:20,500 --> 00:06:26,580
291 | designed together so that the generated program will function correctly. Now, it
292 | 
293 | 73
294 | 00:06:26,580 --> 00:06:31,870
295 | turns out that this actually more than one kind of data that the compiler is going to
296 | 
297 | 74
298 | 00:06:31,870 --> 00:06:36,520
299 | be interested in and what we'll be talking about. In the next video is the different
300 | 
301 | 75
302 | 00:06:36,520 --> 00:06:39,440
303 | kinds of data and the different distinction between the kinds of data that
304 | 
305 | 76
306 | 00:06:39,440 --> 00:06:41,490
307 | go in this data area.
308 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/11-02-B+Activations.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:00,760 --> 00:00:04,800
  3 | To sum up our discussion of activations it's obvious I think that
  4 | 
  5 | 1
  6 | 00:00:04,800 --> 00:00:09,140
  7 | the activation tree depends on the runtime behavior of the program. So it depends on
  8 | 
  9 | 2
 10 | 00:00:09,140 --> 00:00:12,700
 11 | the runtime value who's exactly which procedures are called and what the
 12 | 
 13 | 3
 14 | 00:00:12,700 --> 00:00:17,530
 15 | activation tree turns out to be. Now, this was not illustrated in our examples but it
 16 | 
 17 | 4
 18 | 00:00:17,530 --> 00:00:22,890
 19 | should be obvious that the activation tree can be different for different inputs. And
 20 | 
 21 | 5
 22 | 00:00:22,890 --> 00:00:27,720
 23 | so the programs I showed you didn't take input and so we didn't have, every time
 24 | 
 25 | 6
 26 | 00:00:27,720 --> 00:00:31,830
 27 | you run those programs we'll get the same activation tree, playing general if
 28 | 
 29 | 7
 30 | 00:00:31,830 --> 00:00:35,010
 31 | program takes input, it will execute differently and may call different
 32 | 
 33 | 8
 34 | 00:00:35,010 --> 00:00:41,399
 35 | procedures and different orders. And finally here's perhaps the most important
 36 | 
 37 | 9
 38 | 00:00:41,399 --> 00:00:46,049
 39 | point for an implementation point of view. Since activations are properly nested, we
 40 | 
 41 | 10
 42 | 00:00:46,049 --> 00:00:53,049
 43 | can use a stack to implement of detract the currently active activations. So,
 44 | 
 45 | 11
 46 | 00:00:53,769 --> 00:00:59,219
 47 | let's see how we can use a stack to track activations. We'll use these examples that
 48 | 
 49 | 12
 50 | 00:00:59,219 --> 00:01:04,659
 51 | we looked at before. And what I'm going to do is I'm going to show the activation
 52 | 
 53 | 13
 54 | 00:01:04,659 --> 00:01:10,780
 55 | tree over here on the left and I'm going to show the stack of currently executing
 56 | 
 57 | 14
 58 | 00:01:10,780 --> 00:01:14,820
 59 | activations on the right. So the stack is not gonna keep track of the entire
 60 | 
 61 | 15
 62 | 00:01:14,820 --> 00:01:19,350
 63 | activation tree. It's only going to keep track of the activations that are
 64 | 
 65 | 16
 66 | 00:01:19,350 --> 00:01:24,810
 67 | currently running so at each step of the program, the stack should contain all of
 68 | 
 69 | 17
 70 | 00:01:24,810 --> 00:01:31,390
 71 | the currently active or currently running activations. So, the tree we already saw
 72 | 
 73 | 18
 74 | 00:01:31,390 --> 00:01:36,980
 75 | have the build and we begin by executing main so that will be the root of the tree
 76 | 
 77 | 19
 78 | 00:01:36,980 --> 00:01:42,170
 79 | And since the stack is supposed to have all of the currently running activations,
 80 | 
 81 | 20
 82 | 00:01:42,170 --> 00:01:49,170
 83 | the stack will have to have main on it. So it will begin with just the procedure main
 84 | 
 85 | 21
 86 | 00:01:49,500 --> 00:01:56,500
 87 | And now main calls g And so g becomes a child of main And over here on the stack,
 88 | 
 89 | 22
 90 | 00:01:57,250 --> 00:02:04,250
 91 | we would push g on to the stack And then G returns and what that means is that, that
 92 | 
 93 | 23
 94 | 00:02:07,050 --> 00:02:14,050
 95 | G is no longer running and so G will get popped off the stack and then, the, the
 96 | 
 97 | 24
 98 | 00:02:14,889 --> 00:02:20,319
 99 | main procedure calls F and so F will get pushed on to the stack And you can see
100 | 
101 | 25
102 | 00:02:20,319 --> 00:02:24,989
103 | here that after G finishes we can pop it off and we can push on that and we
104 | 
105 | 26
106 | 00:02:24,989 --> 00:02:29,879
107 | maintain the environment that we have a stack of the currently running
108 | 
109 | 27
110 | 00:02:29,879 --> 00:02:35,849
111 | activations. All right, then F is going to call G. I forgot to complete my tree here,
112 | 
113 | 28
114 | 00:02:35,849 --> 00:02:42,849
115 | So main calls f and then f calls g. All right, So now the stack at this point is
116 | 
117 | 29
118 | 00:02:43,670 --> 00:02:50,670
119 | main f and g. And once g finishes running, it will be Popped off of the stack because
120 | 
121 | 30
122 | 00:02:52,730 --> 00:02:58,859
123 | it is no longer executing. And then f will finish, and f will also get popped off the
124 | 
125 | 31
126 | 00:02:58,859 --> 00:03:04,019
127 | stack and finally main will finish and main will also be popped off the stack.
128 | 
129 | 32
130 | 00:03:04,019 --> 00:03:07,709
131 | And so that's the idea. So that is how we can use the stack. So essentially when a
132 | 
133 | 33
134 | 00:03:07,709 --> 00:03:11,870
135 | procedure is called we'll push an activation for that procedure on to the
136 | 
137 | 34
138 | 00:03:11,870 --> 00:03:16,840
139 | stack. And when the procedure returns, we will pop that activation off the stack.
140 | 
141 | 35
142 | 00:03:16,840 --> 00:03:23,840
143 | And because activation lifetimes are properly nested this will work out. So, to
144 | 
145 | 36
146 | 00:03:23,939 --> 00:03:29,599
147 | conclude our discussion of activations, let's return to the runtime organization
148 | 
149 | 37
150 | 00:03:29,599 --> 00:03:34,430
151 | As you may recall. We have a block of memory that is allocated to the program
152 | 
153 | 38
154 | 00:03:34,430 --> 00:03:39,569
155 | and the first portion of that block is occupied by the code for the program
156 | 
157 | 39
158 | 00:03:39,569 --> 00:03:44,919
159 | itself. And now in the rest of that memory that is allocated to the program, we are
160 | 
161 | 40
162 | 00:03:44,919 --> 00:03:48,180
163 | going to have to restore the data that the program needs to execute and one of the
164 | 
165 | 41
166 | 00:03:48,180 --> 00:03:52,799
167 | important structures that goes there is the stack of activations. So typically,
168 | 
169 | 42
170 | 00:03:52,799 --> 00:03:59,620
171 | this will start after the code area. And the stack would grow towards the other end
172 | 
173 | 43
174 | 00:03:59,620 --> 00:04:05,079
175 | of the memory space of the program and the stack will grow when procedures are called
176 | 
177 | 44
178 | 00:04:05,079 --> 00:04:09,879
179 | and it will shrink when procedures return. And as we'll see, there are other things
180 | 
181 | 45
182 | 00:04:09,879 --> 00:04:15,239
183 | that go in this data area that we are going to be discussing in the upcoming videos.
184 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/11-05-alignment.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,270 --> 00:00:08,910
  3 | In this brief video we're going to talk about alignment. A very low level but very
  4 | 
  5 | 1
  6 | 00:00:08,910 --> 00:00:13,260
  7 | important detail of machine architecture that every compiler writer needs to be
  8 | 
  9 | 2
 10 | 00:00:13,260 --> 00:00:20,260
 11 | aware of. First, let's review a few properties of Contemporary machines.
 12 | 
 13 | 3
 14 | 00:00:23,020 --> 00:00:30,020
 15 | Currently, most modern machines are either 32 or 64 bit meaning you have the 32 or 64
 16 | 
 17 | 4
 18 | 00:00:30,320 --> 00:00:36,590
 19 | bits in a word and the word is actually subdivided into smaller units. We would
 20 | 
 21 | 5
 22 | 00:00:36,590 --> 00:00:42,050
 23 | say that there are eight bits in a bye and then four or eight byes in word depending
 24 | 
 25 | 6
 26 | 00:00:42,050 --> 00:00:48,579
 27 | whether it's a 32 or 64 bit machine. And other important property is that machines
 28 | 
 29 | 7
 30 | 00:00:48,579 --> 00:00:54,460
 31 | can be either byte or word addressable. Meaning that in the native language of the
 32 | 
 33 | 8
 34 | 00:00:54,460 --> 00:01:00,510
 35 | machine in machine code it may be possible to either name only entire words or it may
 36 | 
 37 | 9
 38 | 00:01:00,510 --> 00:01:07,510
 39 | be possible to reference memory at the granule area of individual bytes. They say
 40 | 
 41 | 10
 42 | 00:01:07,880 --> 00:01:14,729
 43 | that data is word aligned if it begins at a word boundary. So if we think about.
 44 | 
 45 | 11
 46 | 00:01:14,729 --> 00:01:21,389
 47 | Data in memory or the organization in the memory and is laid out into bytes. And
 48 | 
 49 | 12
 50 | 00:01:21,389 --> 00:01:28,389
 51 | let's say. That this is a 32-bit machines so that four bytes make a word and one
 52 | 
 53 | 13
 54 | 00:01:28,709 --> 00:01:34,289
 55 | word begins here and the next word begins here and if data is allocated on a word
 56 | 
 57 | 14
 58 | 00:01:34,289 --> 00:01:40,060
 59 | boundary, say, it needs more bytes then that would be a word a line a piece of
 60 | 
 61 | 15
 62 | 00:01:40,060 --> 00:01:44,240
 63 | data. If a piece of data begins in the middle of the word, so let's say for
 64 | 
 65 | 16
 66 | 00:01:44,240 --> 00:01:48,740
 67 | example that begins here, and we have some data that's allocated here, this data is
 68 | 
 69 | 17
 70 | 00:01:48,740 --> 00:01:55,169
 71 | not word aligned, doesn't begin on a word boundary And the important property or the
 72 | 
 73 | 18
 74 | 00:01:55,169 --> 00:02:00,989
 75 | important issue is that most machines have some alignment restrictions. So these
 76 | 
 77 | 19
 78 | 00:02:00,989 --> 00:02:07,219
 79 | restrictions come in one of two forms. So, on some machines, if the data is not
 80 | 
 81 | 20
 82 | 00:02:07,219 --> 00:02:11,770
 83 | properly aligned, meaning that you tried to reference data that isn't aligned the
 84 | 
 85 | 21
 86 | 00:02:11,770 --> 00:02:17,470
 87 | way the machines requires, then the machine may just fail to execute that
 88 | 
 89 | 22
 90 | 00:02:17,470 --> 00:02:23,640
 91 | instruction. Your program may hang or even the machine may hang and it's, but, the
 92 | 
 93 | 23
 94 | 00:02:23,640 --> 00:02:27,740
 95 | important thing is that program will not execute correctly. So there's a, it's
 96 | 
 97 | 24
 98 | 00:02:27,740 --> 00:02:34,740
 99 | incorrect to not have the data aligned properly. Now, there are other machines
100 | 
101 | 25
102 | 00:02:34,950 --> 00:02:39,010
103 | that well, actually al low you to put the data anywhere you like but at a
104 | 
105 | 26
106 | 00:02:39,010 --> 00:02:43,920
107 | significantly cause And maybe that accessing data that is aligned in word
108 | 
109 | 27
110 | 00:02:43,920 --> 00:02:50,220
111 | boundaries is cheaper than accessing that's on non-word boundaries And these
112 | 
113 | 28
114 | 00:02:50,220 --> 00:02:57,220
115 | performance penalties Are often dramatic so it can easily be ten times lower to
116 | 
117 | 29
118 | 00:02:59,460 --> 00:03:06,460
119 | access missile line data than to access data that has the alignment favored by
120 | 
121 | 30
122 | 00:03:07,070 --> 00:03:13,400
123 | that particular machine. So let's take a look at an example where data alignment
124 | 
125 | 31
126 | 00:03:13,400 --> 00:03:17,850
127 | issue tend to come up. One of the most common situations where we have to worry
128 | 
129 | 32
130 | 00:03:17,850 --> 00:03:23,320
131 | about the alignment is in the allocation of strings. So let's say we have this
132 | 
133 | 33
134 | 00:03:23,320 --> 00:03:29,540
135 | string, the string Hello and then we want to put it in memory. So let me draw our
136 | 
137 | 34
138 | 00:03:29,540 --> 00:03:36,540
139 | memory as a linear sequence of bytes so I'll mark out some bytes here. And let's
140 | 
141 | 35
142 | 00:03:40,500 --> 00:03:45,330
143 | assume this is a 32-bit machine so let me make the word boundaries a little bit
144 | 
145 | 36
146 | 00:03:45,330 --> 00:03:52,330
147 | heavier boundaries. So, one, two, three, four. Okay. So, there are the, the word
148 | 
149 | 37
150 | 00:03:54,330 --> 00:03:58,880
151 | boundaries And now let's say there were we are trying to have aligned data, a word
152 | 
153 | 38
154 | 00:03:58,880 --> 00:04:02,620
155 | aligned data and so allocate this string beginning in the word boundary. So, the
156 | 
157 | 39
158 | 00:04:02,620 --> 00:04:09,069
159 | each character will go on the first byte when e, then l, then l, then o. And now,
160 | 
161 | 40
162 | 00:04:09,069 --> 00:04:14,190
163 | we may have terminating null depending on how strings are implemented. And let's
164 | 
165 | 41
166 | 00:04:14,190 --> 00:04:20,799
167 | assume that we do. And this is fine placement of the strings extremely begins
168 | 
169 | 42
170 | 00:04:20,798 --> 00:04:27,180
171 | in the word boundary and. That assess by presumably any alignment restrictions of
172 | 
173 | 43
174 | 00:04:27,180 --> 00:04:34,180
175 | the machine and now the question is where does the next data item go? So we could
176 | 
177 | 44
178 | 00:04:34,240 --> 00:04:39,840
179 | begin the next data item right in the next available byte and that would be good if
180 | 
181 | 45
182 | 00:04:39,840 --> 00:04:43,729
183 | we are very concerned about not wasting memory. But, I noticed that, that data
184 | 
185 | 46
186 | 00:04:43,729 --> 00:04:48,370
187 | item will then be were aligned. We may either run into correctness or performance
188 | 
189 | 47
190 | 00:04:48,370 --> 00:04:54,060
191 | problems if the machine has restrictions on the alignment. So, the simple solution
192 | 
193 | 48
194 | 00:04:54,060 --> 00:04:58,800
195 | here is to simply skip to the next word boundary and allocate the next data item
196 | 
197 | 49
198 | 00:04:58,800 --> 00:05:05,800
199 | whenever it is on the next word beginning at the next word boundary. And what
200 | 
201 | 50
202 | 00:05:06,680 --> 00:05:13,199
203 | happens to this two bytes here, well these bytes are just junks. T hey're not used at
204 | 
205 | 51
206 | 00:05:13,199 --> 00:05:17,099
207 | all, they never reference by the program. It doesn't matter what they're value is
208 | 
209 | 52
210 | 00:05:17,099 --> 00:05:21,819
211 | because the program should never refer to them. It's just unused memory. And note
212 | 
213 | 53
214 | 00:05:21,819 --> 00:05:26,779
215 | that if we didn't have the terminating zero then there would be the terminating,
216 | 
217 | 54
218 | 00:05:26,779 --> 00:05:33,559
219 | no character then and then would be three unused bytes after the string. So to
220 | 
221 | 55
222 | 00:05:33,559 --> 00:05:37,419
223 | summarize this is the general strategy for dealing with alignment when you have
224 | 
225 | 56
226 | 00:05:37,419 --> 00:05:42,689
227 | alignment restrictions. Data begins on the boundaries, typically word boundaries that
228 | 
229 | 57
230 | 00:05:42,689 --> 00:05:49,469
231 | are required and if the particular data that you're allocating has a none integral
232 | 
233 | 58
234 | 00:05:49,469 --> 00:05:53,990
235 | length. Meaning that it doesn't end directly on the next required boundary and
236 | 
237 | 59
238 | 00:05:53,990 --> 00:05:58,699
239 | you just skip over whenever bytes are in between to get the data, the next data
240 | 
241 | 60
242 | 00:05:58,699 --> 00:06:00,569
243 | that's going to be allocated on the correct boundary.
244 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/12-02-B+Code+Generation+I.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,079 --> 00:00:03,850
  3 | So next we're going to talk about code generation for if then else
  4 | 
  5 | 1
  6 | 00:00:03,850 --> 00:00:08,950
  7 | expressions And to do that we're going to need some control flow instructions. So
  8 | 
  9 | 2
 10 | 00:00:08,950 --> 00:00:13,780
 11 | we'll have, we'll need two in fact. So here's the branch equal instruction, and
 12 | 
 13 | 3
 14 | 00:00:13,780 --> 00:00:18,270
 15 | this jumps to a label if the contents of two registers are equal And then we'll
 16 | 
 17 | 4
 18 | 00:00:18,270 --> 00:00:22,150
 19 | also need an unconditional jump. So this just does an unconditional branch, not
 20 | 
 21 | 5
 22 | 00:00:22,150 --> 00:00:29,150
 23 | branch, an unconditional jump to a particular assembly instruction. So let's
 24 | 
 25 | 6
 26 | 00:00:30,640 --> 00:00:35,159
 27 | look at the code generation for the expression if E1 is equal to E2 then
 28 | 
 29 | 7
 30 | 00:00:35,159 --> 00:00:41,830
 31 | evaluate three otherwise evaluate four. So first we have to evaluate the predicate
 32 | 
 33 | 8
 34 | 00:00:41,830 --> 00:00:47,100
 35 | and in order to evaluate the predicate, we first have to evaluate E1. And by now this
 36 | 
 37 | 9
 38 | 00:00:47,100 --> 00:00:51,440
 39 | pattern for binary operation should be familiar. So we evaluate the first
 40 | 
 41 | 10
 42 | 00:00:51,440 --> 00:00:55,890
 43 | sub-expression we save the result on a stack, so we push it on to the stack. It
 44 | 
 45 | 11
 46 | 00:00:55,890 --> 00:00:59,650
 47 | takes two operations, one to save the result of the cumulate on the stack and
 48 | 
 49 | 12
 50 | 00:00:59,650 --> 00:01:06,650
 51 | the other cumulate stack later. Then we evaluate E2. Now we have evaluate d both
 52 | 
 53 | 13
 54 | 00:01:06,970 --> 00:01:12,280
 55 | of the arguments to the predicate. The result of E2 is in the accumulator and the
 56 | 
 57 | 14
 58 | 00:01:12,280 --> 00:01:16,930
 59 | result of E1 is at the top of the stack because again, the evaluation of E2 will
 60 | 
 61 | 15
 62 | 00:01:16,930 --> 00:01:22,680
 63 | preserve the stack. So now we load the value of E One back into a temporary
 64 | 
 65 | 16
 66 | 00:01:22,680 --> 00:01:27,930
 67 | register. And we pop the stack And then we can actually do the comparison. So now we
 68 | 
 69 | 17
 70 | 00:01:27,930 --> 00:01:32,390
 71 | do a branch equal. So if the value of E One is equal. Sorry this is actually the
 72 | 
 73 | 18
 74 | 00:01:32,390 --> 00:01:35,780
 75 | value of E Two and E Zero and if that's equal to the value of E One. Then we
 76 | 
 77 | 19
 78 | 00:01:35,780 --> 00:01:42,270
 79 | branch to the true branch. Otherwise we're going to fall through if there not equal.
 80 | 
 81 | 20
 82 | 00:01:42,270 --> 00:01:47,520
 83 | Okay And so we'll call that the false branch And what are we going to do if we
 84 | 
 85 | 21
 86 | 00:01:47,520 --> 00:01:51,409
 87 | fall through, if this test fails, well then we want to evaluate E4. And that will
 88 | 
 89 | 22
 90 | 00:01:51,409 --> 00:01:55,880
 91 | leave the value of E4 in the accumulator and that will be the value of the entire
 92 | 
 93 | 23
 94 | 00:01:55,880 --> 00:02:00,460
 95 | if then else in the case where the predicate is false. So when we're done,
 96 | 
 97 | 24
 98 | 00:02:00,460 --> 00:02:05,229
 99 | we're going to branch now to some code that we'll just clean up and end the if
100 | 
101 | 25
102 | 00:02:05,229 --> 00:02:10,200
103 | statement. We'll see what that does in a moment. Otherwise, we still need to
104 | 
105 | 26
106 | 00:02:10,199 --> 00:02:13,450
107 | implement the true branch, so we'll stick the label for the true branch here And
108 | 
109 | 27
110 | 00:02:13,450 --> 00:02:19,590
111 | what do we do on the true branch? We just evaluate E3. Okay And then the and if,
112 | 
113 | 28
114 | 00:02:19,590 --> 00:02:25,410
115 | Well, actually, there is no cleanup to do Because, E3 and E4 both preserve the
116 | 
117 | 29
118 | 00:02:25,410 --> 00:02:28,440
119 | stack, and they leave the result of their expressions of the accumulator. So we
120 | 
121 | 30
122 | 00:02:28,440 --> 00:02:34,190
123 | reach and if, from E3 if we executed the true branch And then, in which case, the
124 | 
125 | 31
126 | 00:02:34,190 --> 00:02:39,310
127 | value in the accumulator is the value of E3 And we reach and if, through this
128 | 
129 | 32
130 | 00:02:39,310 --> 00:02:44,090
131 | branch if we executed the false branch And then the value in the accumulator is the
132 | 
133 | 33
134 | 00:02:44,090 --> 00:02:48,680
135 | value of E4 And so this correctly implements an if then else expression.
136 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/12-03-B+Code+Generation+II.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:00,760 --> 00:00:03,840
  3 | So to summarize the main points one very important thing is
  4 | 
  5 | 1
  6 | 00:00:03,840 --> 00:00:08,420
  7 | that the activation record has to be designed together with the code generation.
  8 | 
  9 | 2
 10 | 00:00:08,420 --> 00:00:12,300
 11 | So you have to do these things at the same time. You can't just design the
 12 | 
 13 | 3
 14 | 00:00:12,300 --> 00:00:15,400
 15 | activation record without thinking about what code you're going to generate And
 16 | 
 17 | 4
 18 | 00:00:15,400 --> 00:00:19,270
 19 | you can't just think about generating code without making some decisions about where
 20 | 
 21 | 5
 22 | 00:00:19,270 --> 00:00:23,300
 23 | the data is going to be lived. So the code and the data it manipulates, have
 24 | 
 25 | 6
 26 | 00:00:23,300 --> 00:00:29,460
 27 | to be designed simultaneously. Code generation can be done by a recursive traversal
 28 | 
 29 | 7
 30 | 00:00:29,460 --> 00:00:34,289
 31 | of the abstract syntax street, so just like type checking. Cogeneration can
 32 | 
 33 | 8
 34 | 00:00:34,289 --> 00:00:39,440
 35 | be expressed as a r ecursive tree-walk And that's a very handy way to think about
 36 | 
 37 | 9
 38 | 00:00:39,440 --> 00:00:43,829
 39 | cogeneration because it allows you to think about one case at a time without having
 40 | 
 41 | 10
 42 | 00:00:43,829 --> 00:00:48,979
 43 | to get mixed up thinking about all the different constructs at one time. >> And
 44 | 
 45 | 11
 46 | 00:00:48,979 --> 00:00:55,440
 47 | finally I recommend that you use a stack machine for your compiler. So if you're
 48 | 
 49 | 12
 50 | 00:00:55,440 --> 00:00:59,679
 51 | implementing a course project, the stack machine is the simplest discipline and
 52 | 
 53 | 13
 54 | 00:00:59,679 --> 00:01:03,959
 55 | it gives you a nice framework for think, for breaking up the project into manageable
 56 | 
 57 | 14
 58 | 00:01:03,959 --> 00:01:07,850
 59 | pieces. And because of that simplicity, I think it's a really good way
 60 | 
 61 | 15
 62 | 00:01:07,850 --> 00:01:14,850
 63 | to learn about writing compilers. Now, it is important to realize that production compilers
 64 | 
 65 | 16
 66 | 00:01:15,120 --> 00:01:20,230
 67 | do, do some different things. They're not quite as simple as, the stack
 68 | 
 69 | 17
 70 | 00:01:20,230 --> 00:01:26,320
 71 | machine cogeneration that we have outlined in the last few videos. So, the main differences,
 72 | 
 73 | 18
 74 | 00:01:26,320 --> 00:01:30,900
 75 | or, or, the main difference, is that the big emphasis in a production compiler
 76 | 
 77 | 19
 78 | 00:01:30,900 --> 00:01:35,900
 79 | is on keeping values and registers. It's much more efficient to do
 80 | 
 81 | 20
 82 | 00:01:35,900 --> 00:01:41,370
 83 | operations out of registers than to be saving and loading values from the stack And
 84 | 
 85 | 21
 86 | 00:01:41,370 --> 00:01:45,210
 87 | so, especially the values in the current activation record or current stack
 88 | 
 89 | 22
 90 | 00:01:45,210 --> 00:01:50,820
 91 | frame. It, in production compiler we try to keep those in registers instead of on the
 92 | 
 93 | 23
 94 | 00:01:50,820 --> 00:01:57,460
 95 | stack And also, typically a pressure compiler, to the extent that it has to use
 96 | 
 97 | 24
 98 | 00:01:57,460 --> 00:02:01,390
 99 | temporaries, in the activation record. These would be resolved, laid out directly
100 | 
101 | 25
102 | 00:02:01,390 --> 00:02:04,590
103 | in the activation record, not pushed and popped from the stack. That means they'd be
104 | 
105 | 26
106 | 00:02:04,590 --> 00:02:09,869
107 | assigned, pre-assigned locations in the activation record, just like, the function
108 | 
109 | 27
110 | 00:02:09,869 --> 00:02:13,920
111 | arguments in the simple language we looked at are assigned fixed positions in the activation
112 | 
113 | 28
114 | 00:02:13,920 --> 00:02:17,290
115 | record. So those temporary values would also be assigned fixed positions,
116 | 
117 | 29
118 | 00:02:17,290 --> 00:02:20,709
119 | so you could save the trouble of manipulating the stack pointer.
120 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/12-05-B+Temporaries.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,380 --> 00:00:05,310
  3 | Now, that we know how many temporaries or intermediate values we need
  4 | 
  5 | 1
  6 | 00:00:05,310 --> 00:00:09,970
  7 | to evaluate a function and we also know where those intermediate value is going to
  8 | 
  9 | 2
 10 | 00:00:09,970 --> 00:00:15,130
 11 | be stored in activation record. The last thing we need to know in order to code generation
 12 | 
 13 | 3
 14 | 00:00:15,130 --> 00:00:21,499
 15 | is how many temporaries are in use at each point in the program. Change colors
 16 | 
 17 | 4
 18 | 00:00:21,499 --> 00:00:25,050
 19 | here and so the way we're going to do that is we're going to add a new argument
 20 | 
 21 | 5
 22 | 00:00:25,050 --> 00:00:29,949
 23 | to co-generation which is the position of the next available temporary.
 24 | 
 25 | 6
 26 | 00:00:29,949 --> 00:00:34,970
 27 | So as temporaries gets used up, this argument, the co-generation will change while
 28 | 
 29 | 7
 30 | 00:00:34,970 --> 00:00:39,370
 31 | other expressions to save their values and save places without stepping on
 32 | 
 33 | 8
 34 | 00:00:39,370 --> 00:00:44,460
 35 | temporaries that are already having saved by all other expressions. And as you'll see
 36 | 
 37 | 9
 38 | 00:00:44,460 --> 00:00:49,940
 39 | in a, in a moment here when we do an example, the temporary area of the activation
 40 | 
 41 | 10
 42 | 00:00:49,940 --> 00:00:52,870
 43 | r ecord is going to be used like a small fixed size stack. Essentially,
 44 | 
 45 | 11
 46 | 00:00:52,870 --> 00:00:57,030
 47 | we're going to have the same stack discipline that we had before only all
 48 | 
 49 | 12
 50 | 00:00:57,030 --> 00:01:01,239
 51 | the computation on the stack pointer, all the discussion, all the computation of
 52 | 
 53 | 13
 54 | 00:01:01,239 --> 00:01:06,049
 55 | what all sets to use has already been done by the compiler. So, what we used to do by
 56 | 
 57 | 14
 58 | 00:01:06,049 --> 00:01:11,909
 59 | pushing and popping element from the stack in the generated code allow that computation
 60 | 
 61 | 15
 62 | 00:01:11,909 --> 00:01:15,259
 63 | has been moved into the compiler and all that happens now is a bunch
 64 | 
 65 | 16
 66 | 00:01:15,259 --> 00:01:21,649
 67 | of stores and load. To fix off that from the frame pointer So let's take a look
 68 | 
 69 | 17
 70 | 00:01:21,649 --> 00:01:27,359
 71 | at how this works. Here's the code that we had for e1 + e2 under the old scheme
 72 | 
 73 | 18
 74 | 00:01:27,359 --> 00:01:32,060
 75 | where we didn't have a separate area in the activation records for temporaries.
 76 | 
 77 | 19
 78 | 00:01:32,060 --> 00:01:35,479
 79 | So we would generate a code for e1, and then we would save the results
 80 | 
 81 | 20
 82 | 00:01:35,479 --> 00:01:39,600
 83 | of e1 on the stack, and that would be done by saving the value of the accumulator under
 84 | 
 85 | 21
 86 | 00:01:39,600 --> 00:01:44,249
 87 | the stack and then we would have to adjust the stack pointer And then after we
 88 | 
 89 | 22
 90 | 00:01:44,249 --> 00:01:51,139
 91 | had evaluated the two, then we would load the results of e1 back into a temporary register,
 92 | 
 93 | 23
 94 | 00:01:51,139 --> 00:01:55,850
 95 | we could do the add And then we could pop the value off of the stack, the
 96 | 
 97 | 24
 98 | 00:01:55,850 --> 00:02:01,149
 99 | intermediate value off of the stack Down to the new scheme. Co-generations going to
100 | 
101 | 25
102 | 00:02:01,149 --> 00:02:06,039
103 | take a second argument saying what is the position of the next available temporary so
104 | 
105 | 26
106 | 00:02:06,039 --> 00:02:10,940
107 | what is the position of the next unused temporary inside of the activation record
108 | 
109 | 27
110 | 00:02:10,940 --> 00:02:17,410
111 | and so now we generate code for e1 and we pass along the argument okay because e1 may
112 | 
113 | 28
114 | 00:02:17,410 --> 00:02:23,730
115 | itself have some temporaries that it needs to store And, and then after you [inaudible]
116 | 
117 | 29
118 | 00:02:23,730 --> 00:02:29,440
119 | to evaluating, now we just do a direct store into, into the activation record
120 | 
121 | 30
122 | 00:02:29,440 --> 00:02:33,660
123 | at all set empty from the frame pointer And so now as we have to do in store,
124 | 
125 | 31
126 | 00:02:33,660 --> 00:02:38,120
127 | we have to save e1 in the activation record so we have it for later
128 | 
129 | 32
130 | 00:02:38,120 --> 00:02:42,500
131 | on but we have to do any manipulation of the stacks. So, we could place two instructions
132 | 
133 | 33
134 | 00:02:42,500 --> 00:02:48,050
135 | here by one And then we generate code for e2 but now. We just save
136 | 
137 | 34
138 | 00:02:48,050 --> 00:02:54,670
139 | the temporary value at position at, at all set empty from the frame pointer so the next
140 | 
141 | 35
142 | 00:02:54,670 --> 00:03:01,060
143 | available temporary would be of address empty o r offset, excuse me, nt+4
144 | 
145 | 36
146 | 00:03:01,060 --> 00:03:07,910
147 | And then after each was evaluating, now we have to load the value of e1 back into a temporary
148 | 
149 | 37
150 | 00:03:07,910 --> 00:03:12,940
151 | and again that was all set NT from the frame pointer of the current activation
152 | 
153 | 38
154 | 00:03:12,940 --> 00:03:17,870
155 | record and then we can do the add and once again we saved the manipulation
156 | 
157 | 39
158 | 00:03:17,870 --> 00:03:21,440
159 | of the stack pointers. So this code sequence here is two instructions
160 | 
161 | 40
162 | 00:03:21,440 --> 00:03:25,160
163 | shorter than the one we had before and this actually substantially more
164 | 
165 | 41
166 | 00:03:25,160 --> 00:03:25,410
167 | efficient.
168 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/13-04-B+Cool+Semantics+II.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:00,890 --> 00:00:04,509
  3 | To summarize our discussion of dynamic dispatch, the body of the method is
  4 | 
  5 | 1
  6 | 00:00:04,509 --> 00:00:11,050
  7 | invoked with, within environment e. That has definitions for the formal arguments
  8 | 
  9 | 2
 10 | 00:00:11,050 --> 00:00:17,730
 11 | and the attributes of the self object and a store that's just like color store
 12 | 
 13 | 3
 14 | 00:00:17,730 --> 00:00:22,210
 15 | except that it also has the actual arguments bound to the locations allocated
 16 | 
 17 | 4
 18 | 00:00:22,210 --> 00:00:27,429
 19 | for the formal parameters. Notice in the rules that the notion of a frame or
 20 | 
 21 | 5
 22 | 00:00:27,429 --> 00:00:33,160
 23 | activation records is implicit. We don't actually build a data structure. That
 24 | 
 25 | 6
 26 | 00:00:33,160 --> 00:00:38,140
 27 | contains you know, all of the values all of the arguments and the, the return
 28 | 
 29 | 7
 30 | 00:00:38,140 --> 00:00:42,809
 31 | address and all that stuff together. That information is not gathered together in
 32 | 
 33 | 8
 34 | 00:00:42,809 --> 00:00:47,120
 35 | one place, it's a little more abstract. We don't actually have to say you know,
 36 | 
 37 | 9
 38 | 00:00:47,120 --> 00:00:50,550
 39 | whether things are allocated on the stack or on the heat and that's a good feature.
 40 | 
 41 | 10
 42 | 00:00:50,550 --> 00:00:54,260
 43 | That allows us to have, potentially have a range of implementations like all
 44 | 
 45 | 11
 46 | 00:00:54,260 --> 00:00:59,789
 47 | implement the semantics correctly. Now, we didn't do the semantics on the semantics
 48 | 
 49 | 12
 50 | 00:00:59,789 --> 00:01:06,290
 51 | dispatch but it's extremely similar. The only difference is in how the class that
 52 | 
 53 | 13
 54 | 00:01:06,290 --> 00:01:10,140
 55 | we are going to be dispatching to is looked up so in the stack dispatch you
 56 | 
 57 | 14
 58 | 00:01:10,140 --> 00:01:13,770
 59 | might be able to you know, you can nam e the class that you want to dispatch to
 60 | 
 61 | 15
 62 | 00:01:13,770 --> 00:01:18,280
 63 | this one extra line to the side where the class is being dispatched to in the formal
 64 | 
 65 | 16
 66 | 00:01:18,280 --> 00:01:24,509
 67 | rule and you can look in the manual to see how that works. So it's worth pointing out
 68 | 
 69 | 17
 70 | 00:01:24,509 --> 00:01:29,790
 71 | that while the operation of rules are very detailed, they intentionally omit some
 72 | 
 73 | 18
 74 | 00:01:29,790 --> 00:01:34,049
 75 | cases that you might think they should cover so let's take a look at our dispatch
 76 | 
 77 | 19
 78 | 00:01:34,049 --> 00:01:41,049
 79 | example again. So here notice that we look up the class of v0. So v0 is an object and
 80 | 
 81 | 20
 82 | 00:01:44,469 --> 00:01:51,469
 83 | we checked what is class tag is and then we look up in that class, the name of the
 84 | 
 85 | 21
 86 | 00:01:51,869 --> 00:01:57,619
 87 | method that we're dispatching to and we get out a definition of the method or not
 88 | 
 89 | 22
 90 | 00:01:57,619 --> 00:02:02,829
 91 | the definition of the method that we can write the rest of the rule. Now what would
 92 | 
 93 | 23
 94 | 00:02:02,829 --> 00:02:09,729
 95 | happen If there was no such method f in the class x, I mean this, this rule just
 96 | 
 97 | 24
 98 | 00:02:09,729 --> 00:02:15,750
 99 | assumes that method is in fact to define the class x, And the rule doesn't say
100 | 
101 | 25
102 | 00:02:15,750 --> 00:02:22,010
103 | anything about what to do if it turns out that this class x doesn't have any method
104 | 
105 | 26
106 | 00:02:22,010 --> 00:02:29,010
107 | f? Well, that actually can't happen. So, type-checking has already guaranteed That
108 | 
109 | 27
110 | 00:02:29,019 --> 00:02:33,430
111 | when we go to look up method as in class x it will exist. That was one of the points
112 | 
113 | 28
114 | 00:02:33,430 --> 00:02:39,040
115 | with the type checking rules was that no dynamic dispatch could ever dispatch to a
116 | 
117 | 29
118 | 00:02:39,040 --> 00:02:44,349
119 | method that wasn't defined. And so the fact that the time checking is already
120 | 
121 | 30
122 | 00:02:44,349 --> 00:02:49,500
123 | been done, it will allows us to meet some cases. So there's some checks that we
124 | 
125 | 31
126 | 00:02:49,500 --> 00:02:54,269
127 | don't have to do because we know that, that system has already effectively done
128 | 
129 | 32
130 | 00:02:54,269 --> 00:02:58,190
131 | that And the rules would only be more complicated if we didn't have type
132 | 
133 | 33
134 | 00:02:58,190 --> 00:03:02,129
135 | checking and we needed to actually say what would happen you know, all of the
136 | 
137 | 34
138 | 00:03:02,129 --> 00:03:09,129
139 | cases where type checking will work where things were not typed correct. Now there
140 | 
141 | 35
142 | 00:03:09,590 --> 00:03:13,860
143 | are some run time errors that the type checker doesn't prevent however and in
144 | 
145 | 36
146 | 00:03:13,860 --> 00:03:20,860
147 | cool there are four. One is to dispatch the void. Divisions by zero you can have a
148 | 
149 | 37
150 | 00:03:21,299 --> 00:03:24,890
151 | sub-screen in that excess out of range or you could run out of memory. You could try
152 | 
153 | 38
154 | 00:03:24,890 --> 00:03:29,379
155 | allocating new objects that do not have enough space for that. And in such cases,
156 | 
157 | 39
158 | 00:03:29,379 --> 00:03:33,629
159 | the execution has to aboard gracefully an d that means with an error message and not
160 | 
161 | 40
162 | 00:03:33,629 --> 00:03:39,400
163 | just with a segmentation fault or some other kind of hard crash and in the manual
164 | 
165 | 41
166 | 00:03:39,400 --> 00:03:45,239
167 | there some guidelines as to what a correct co-implementation should do in this four
168 | 
169 | 42
170 | 00:03:45,239 --> 00:03:52,239
171 | situations. To summarize the material in the last couple of videos the operational
172 | 
173 | 43
174 | 00:03:52,280 --> 00:03:58,049
175 | semantic rules are really very precised and detail. If you understand them then
176 | 
177 | 44
178 | 00:03:58,049 --> 00:04:03,989
179 | you really understand how to implement a correct cool compiler. So the rules are
180 | 
181 | 45
182 | 00:04:03,989 --> 00:04:09,700
183 | complete enough and give you enough detail that it really can't go wrong if you just
184 | 
185 | 46
186 | 00:04:09,700 --> 00:04:14,019
187 | implement what the rules tell you to do. So you need to read the rules very
188 | 
189 | 47
190 | 00:04:14,019 --> 00:04:18,630
191 | carefully And I'll emphasize that because there's actually quite a lot going on in
192 | 
193 | 48
194 | 00:04:18,630 --> 00:04:22,630
195 | the rules. They're written in a certain way and you know, to, to achieve a certain
196 | 
197 | 49
198 | 00:04:22,630 --> 00:04:27,070
199 | effect and I pointed out a couple of subtle things in the rules and so you
200 | 
201 | 50
202 | 00:04:27,070 --> 00:04:31,080
203 | know, you really have to actually study the rules in order to internalize what
204 | 
205 | 51
206 | 00:04:31,080 --> 00:04:37,000
207 | they mean and be able to. Implement them correctly. It's also a great way
208 | 
209 | 52
210 | 00:04:37,000 --> 00:04:40,440
211 | understanding these rules and details was actually a great way to learn quite a bit
212 | 
213 | 53
214 | 00:04:40,440 --> 00:04:45,010
215 | of the, the kind of formal thinking that goes in to the design of programming
216 | 
217 | 54
218 | 00:04:45,010 --> 00:04:49,670
219 | languages and what it means for a programming language to have a semantics
220 | 
221 | 55
222 | 00:04:49,670 --> 00:04:55,250
223 | and for implementation of something to be correct. Now having settled that, I should
224 | 
225 | 56
226 | 00:04:55,250 --> 00:04:59,480
227 | say that most languages do not have a well specified operational semantics. There are
228 | 
229 | 57
230 | 00:04:59,480 --> 00:05:03,970
231 | some there are some substantial languages and fairly realistic languages that do
232 | 
233 | 58
234 | 00:05:03,970 --> 00:05:08,780
235 | have a formal semantics but most of the language is that you're familiar with do
236 | 
237 | 59
238 | 00:05:08,780 --> 00:05:13,510
239 | not. Finally just a comment you know when portability is important, when you really
240 | 
241 | 60
242 | 00:05:13,510 --> 00:05:18,890
243 | want software that you write behave the exactly the same in different environments
244 | 
245 | 61
246 | 00:05:18,890 --> 00:05:24,740
247 | so you know if I take the same program and I move it to a different machine or a
248 | 
249 | 62
250 | 00:05:24,740 --> 00:05:29,590
251 | different operating system and I still want to kind of guarantee that this offer
252 | 
253 | 63
254 | 00:05:29,590 --> 00:05:35,970
255 | will behave as it as it you know the same on both machine or the old environment and
256 | 
257 | 64
258 | 00:05:35,970 --> 00:05:40,370
259 | the new environment then I really need some independent defin ition of what it
260 | 
261 | 65
262 | 00:05:40,370 --> 00:05:44,550
263 | means what the behavior of these programs should be. And that's where a formal
264 | 
265 | 66
266 | 00:05:44,550 --> 00:05:47,180
267 | semantics becomes a really critical.
268 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/14-04-peephole-optimization.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,929 --> 00:00:07,240
  3 | In this short video, I'm going to say a few words about a variation on local
  4 | 
  5 | 1
  6 | 00:00:07,240 --> 00:00:10,690
  7 | optimization that applies directly to assembly code called Peephole
  8 | 
  9 | 2
 10 | 00:00:10,690 --> 00:00:17,690
 11 | Optimization. The basic idea here is that instead of optimizing on intermediate code
 12 | 
 13 | 3
 14 | 00:00:19,019 --> 00:00:24,039
 15 | we could do our optimizations directly on assembly code And people optimization is
 16 | 
 17 | 4
 18 | 00:00:24,039 --> 00:00:29,320
 19 | one such technique. The peephole is, stands for a short sequence of usually
 20 | 
 21 | 5
 22 | 00:00:29,320 --> 00:00:33,570
 23 | continuous instructions. So, the idea is that we have our program. We can see, we
 24 | 
 25 | 6
 26 | 00:00:33,570 --> 00:00:38,570
 27 | can think of it as a long sequence of instructions and our peephole is some
 28 | 
 29 | 7
 30 | 00:00:38,570 --> 00:00:43,050
 31 | window onto this program. So, if we have a peephole of size four, we can think of
 32 | 
 33 | 8
 34 | 00:00:43,050 --> 00:00:47,760
 35 | ourselves as staring through a small hole at the program and all we can see is a
 36 | 
 37 | 9
 38 | 00:00:47,760 --> 00:00:51,390
 39 | short sequence of four instructions and then we can optimize that sequence. So,
 40 | 
 41 | 10
 42 | 00:00:51,390 --> 00:00:56,370
 43 | then we can slide the peephole around and optimize different parts of the program
 44 | 
 45 | 11
 46 | 00:00:56,370 --> 00:01:00,309
 47 | And the, what the, what the optimizer will do is it will, you know, stare at this
 48 | 
 49 | 12
 50 | 00:01:00,309 --> 00:01:04,399
 51 | short sequence of instructions and if it knows a better sequence it will replace
 52 | 
 53 | 13
 54 | 00:01:04,399 --> 00:01:08,720
 55 | that sequence by the other one and then it will repeat this as I said. You know,
 56 | 
 57 | 14
 58 | 00:01:08,720 --> 00:01:13,619
 59 | applying other transformations to, to possibly the same or other parts of the
 60 | 
 61 | 15
 62 | 00:01:13,619 --> 00:01:19,080
 63 | assembly program. So, people optimizations are generally written as replacement
 64 | 
 65 | 16
 66 | 00:01:19,080 --> 00:01:22,630
 67 | rules. So, the we'll have the window of instructions on the left. So, it'll be
 68 | 
 69 | 17
 70 | 00:01:22,630 --> 00:01:25,920
 71 | some sequence of instructions and we'll know some other sequence of instructions
 72 | 
 73 | 18
 74 | 00:01:25,920 --> 00:01:29,799
 75 | that we would prefer on the right. So, if we see this instruction sequence on the
 76 | 
 77 | 19
 78 | 00:01:29,799 --> 00:01:35,049
 79 | left, then we'll replace by the one on the right-hand side. So, for example, if I
 80 | 
 81 | 20
 82 | 00:01:35,049 --> 00:01:41,880
 83 | have a move from register b to register a and then I move back from register a to
 84 | 
 85 | 21
 86 | 00:01:41,880 --> 00:01:47,000
 87 | register b well, that's the second move is useless, can, can just be deleted as a way
 88 | 
 89 | 22
 90 | 00:01:47,000 --> 00:01:52,159
 91 | to replace this two instruction sequence by a one instruction, instruction
 92 | 
 93 | 23
 94 | 00:01:52,159 --> 00:01:57,930
 95 | sequence. And this will work provided that there's no possible jump target here. So
 96 | 
 97 | 24
 98 | 00:01:57,930 --> 00:02:01,979
 99 | if, if there's no possibility that the code would ever jump to this instruction
100 | 
101 | 25
102 | 00:02:01,979 --> 00:02:08,450
103 | then that instruction can be removed. Another example, If I add i to the
104 | 
105 | 26
106 | 00:02:08,449 --> 00:02:13,370
107 | register a, and then I subsequently add j to the register a, I can do a constant
108 | 
109 | 27
110 | 00:02:13,370 --> 00:02:17,689
111 | folding optimization here, and combine those two add two additions into one
112 | 
113 | 28
114 | 00:02:17,689 --> 00:02:24,689
115 | addition where I add the sum of i = j to the register A. So, many but not quite all
116 | 
117 | 29
118 | 00:02:25,370 --> 00:02:29,439
119 | of the basic block optimizations that we've discussed in the last video, can be
120 | 
121 | 30
122 | 00:02:29,439 --> 00:02:36,439
123 | cast also as peephole optimizations. So, for example if we are adding zero to a
124 | 
125 | 31
126 | 00:02:37,579 --> 00:02:41,099
127 | register and we're storing it in another register, well, that can be replaced by a
128 | 
129 | 32
130 | 00:02:41,099 --> 00:02:46,879
131 | register move. If we're moving a value from the same register to itself so this
132 | 
133 | 33
134 | 00:02:46,879 --> 00:02:50,230
135 | is like a self-assignment, well, that instruction can just be deleted, replaced
136 | 
137 | 34
138 | 00:02:50,230 --> 00:02:55,019
139 | by the empty sequence of instructions. And together for those two instructions would
140 | 
141 | 35
142 | 00:02:55,019 --> 00:03:00,639
143 | be those two optimizations, excuse me, would be able to eliminate adding zero to
144 | 
145 | 36
146 | 00:03:00,639 --> 00:03:07,569
147 | a register. So, first this would get translated into a move from a to a. And
148 | 
149 | 37
150 | 00:03:07,569 --> 00:03:11,650
151 | then the move from a to a would get deleted. And as this little example
152 | 
153 | 38
154 | 00:03:11,650 --> 00:03:15,309
155 | illustrates just like with local optimizations, people optimizations have
156 | 
157 | 39
158 | 00:03:15,309 --> 00:03:22,309
159 | to be applied repeatedly to get the maximum effect. I hope this simple
160 | 
161 | 40
162 | 00:03:22,620 --> 00:03:27,689
163 | discussion has illustrated for you that many optimizations can be applied directly
164 | 
165 | 41
166 | 00:03:27,689 --> 00:03:32,219
167 | to assembly code and that there's really nothing magic about optimizing
168 | 
169 | 42
170 | 00:03:32,219 --> 00:03:36,189
171 | intermediate code. So, if you have a program written in any language, source
172 | 
173 | 43
174 | 00:03:36,189 --> 00:03:40,430
175 | language, intermediate language, assembly language. It makes sense to talk about
176 | 
177 | 44
178 | 00:03:40,430 --> 00:03:45,269
179 | doing transformations of programs written in that language to improve the behavior
180 | 
181 | 45
182 | 00:03:45,269 --> 00:03:50,359
183 | of the program. And it's also a good time here to mention that program optimization
184 | 
185 | 46
186 | 00:03:50,359 --> 00:03:56,719
187 | is really a terrible term. The compilers do not produce optimal code and it's
188 | 
189 | 47
190 | 00:03:56,719 --> 00:04:03,719
191 | purely an accident if a compiler were to somehow generate the best possible code
192 | 
193 | 48
194 | 00:04:03,939 --> 00:04:08,680
195 | for a given program. Really, what compilers do is they have a bunch of
196 | 
197 | 49
198 | 00:04:08,680 --> 00:04:11,939
199 | transformations that they know will improve the behavior of the program. And
200 | 
201 | 50
202 | 00:04:11,939 --> 00:04:16,269
203 | they'll just improve it as much as they ca N. So, really what program optimization is
204 | 
205 | 51
206 | 00:04:16,269 --> 00:04:20,780
207 | all about is program improvement. We're trying to make the program better but
208 | 
209 | 52
210 | 00:04:20,779 --> 00:04:26,360
211 | there's no guarantee that we will reach the best possible code for a given program.
212 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/15-05-B+Liveness+Analysis.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,670 --> 00:00:05,600
  3 | To wrap up and summarize our discussion of the global analysis of control flow graphs,
  4 | 
  5 | 1
  6 | 00:00:05,600 --> 00:00:10,050
  7 | we've talked about two kinds of analysis in the past several videos. Constant
  8 | 
  9 | 2
 10 | 00:00:10,050 --> 00:00:15,190
 11 | propagation is what is called a forwards analysis Because information is pushed
 12 | 
 13 | 3
 14 | 00:00:15,190 --> 00:00:19,910
 15 | from the inputs to the outputs. So if you think about a control flow graph. What
 16 | 
 17 | 4
 18 | 00:00:19,910 --> 00:00:23,440
 19 | happens in control flow analysis is that information flows in this direction.
 20 | 
 21 | 5
 22 | 00:00:23,440 --> 00:00:27,710
 23 | It flows in the same direction as computation. If I have a constant up here
 24 | 
 25 | 6
 26 | 00:00:27,710 --> 00:00:32,780
 27 | x is assigned constant down here, and x is used later on and that constant will flow
 28 | 
 29 | 7
 30 | 00:00:32,780 --> 00:00:38,019
 31 | forward to the uses. Okay So information flows in the same direction as computation.
 32 | 
 33 | 8
 34 | 00:00:38,019 --> 00:00:42,569
 35 | Liveness on the other hand is a backwards analysis. Information is pushed
 36 | 
 37 | 9
 38 | 00:00:42,569 --> 00:00:48,589
 39 | from outputs back towards inputs. So here in this example and let me change
 40 | 
 41 | 10
 42 | 00:00:48,589 --> 00:00:55,589
 43 | colors. Here we see that x is live before the statement. And that liveness gets propagated
 44 | 
 45 | 11
 46 | 00:00:55,749 --> 00:00:59,709
 47 | in the other direction. It gets propagated against the control, against the
 48 | 
 49 | 12
 50 | 00:00:59,709 --> 00:01:06,709
 51 | flow, of execution, backwards towards the beginning of the program.
 52 | 
 53 | 13
 54 | 00:01:07,109 --> 00:01:11,789
 55 | So they're many other kinds of global flow analysis in the literature. The
 56 | 
 57 | 14
 58 | 00:01:11,789 --> 00:01:15,579
 59 | constant propagation analysis and the liveness analysis are two of the most important.
 60 | 
 61 | 15
 62 | 00:01:15,579 --> 00:01:18,049
 63 | There is a number of others that are also very important and many, many
 64 | 
 65 | 16
 66 | 00:01:18,049 --> 00:01:22,469
 67 | more that people have investigated. Almost all these analyses can be classified
 68 | 
 69 | 17
 70 | 00:01:22,469 --> 00:01:25,950
 71 | as either forward or backward. There are some analyses and some important
 72 | 
 73 | 18
 74 | 00:01:25,950 --> 00:01:30,380
 75 | ones that are neither forward nor backward. That information is basically pushed
 76 | 
 77 | 19
 78 | 00:01:30,380 --> 00:01:34,469
 79 | in both directions. And the other thing is that the, almost all the analyses
 80 | 
 81 | 20
 82 | 00:01:34,469 --> 00:01:38,729
 83 | in the literature that do global flow analysis anyway also follow this methodology
 84 | 
 85 | 21
 86 | 00:01:38,729 --> 00:01:43,619
 87 | of local Rules that relay information between adjacent program points.
 88 | 
 89 | 22
 90 | 00:01:43,619 --> 00:01:47,780
 91 | So it, it's the local rules part that's important. So we break down the complicate
 92 | 
 93 | 23
 94 | 00:01:47,780 --> 00:01:52,219
 95 | d problem of analyzing an entire control flow graph into a collection
 96 | 
 97 | 24
 98 | 00:01:52,219 --> 00:01:55,939
 99 | of rules that only do ver, propagate information very, very locally.
100 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/16-02-A+Graph+Coloring.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,449 --> 00:00:07,779
  3 | In this video we are going to continue our discussion of register interference graphs
  4 | 
  5 | 1
  6 | 00:00:07,779 --> 00:00:13,570
  7 | and talk about how to use RIGS to come up with register assignments for procedures.
  8 | 
  9 | 2
 10 | 00:00:13,570 --> 00:00:16,770
 11 | And we're going to look at one particular technique that's popular called graph
 12 | 
 13 | 3
 14 | 00:00:16,770 --> 00:00:23,770
 15 | coloring. So first, a couple of definitions. A graph coloring is an
 16 | 
 17 | 4
 18 | 00:00:24,140 --> 00:00:27,930
 19 | assignment of colors to nodes such that the nodes connected by an edge have
 20 | 
 21 | 5
 22 | 00:00:27,930 --> 00:00:34,930
 23 | different colors. So if I have a graph let's say with, with three nodes and it's
 24 | 
 25 | 6
 26 | 00:00:35,280 --> 00:00:40,370
 27 | fully connected so every node connect to every other node. And then, and then a
 28 | 
 29 | 7
 30 | 00:00:40,370 --> 00:00:45,079
 31 | coloring of this graph would be an assignment of colors such that every pair
 32 | 
 33 | 8
 34 | 00:00:45,079 --> 00:00:48,220
 35 | of nodes are connected by an edge have a different color. So for example I could
 36 | 
 37 | 9
 38 | 00:00:48,220 --> 00:00:54,780
 39 | color this node blue and I could color this node green and I could color this
 40 | 
 41 | 10
 42 | 00:00:54,780 --> 00:01:00,489
 43 | node black, okay. And then that would be a valid coloring of the graph because each
 44 | 
 45 | 11
 46 | 00:01:00,489 --> 00:01:05,430
 47 | pair of neighbours has a different color. And then the graph is k-colorable if it
 48 | 
 49 | 12
 50 | 00:01:05,430 --> 00:01:12,430
 51 | has a coloring that uses k or fewer colors. In our problem, the colors
 52 | 
 53 | 13
 54 | 00:01:13,060 --> 00:01:17,969
 55 | corresponds to registers so we want to do is to assign colors or registers to the
 56 | 
 57 | 14
 58 | 00:01:17,969 --> 00:01:22,850
 59 | graph nodes. And we're going to let k, the number, the maximum number of colors we're
 60 | 
 61 | 15
 62 | 00:01:22,850 --> 00:01:26,170
 63 | allowed to use be the number of machine register. So the actual number of
 64 | 
 65 | 16
 66 | 00:01:26,170 --> 00:01:31,270
 67 | registers present on the architecture for which we're generating code. And then if,
 68 | 
 69 | 17
 70 | 00:01:31,270 --> 00:01:34,889
 71 | if a RIG, if a registered interference graph is k-colorable, then there's going
 72 | 
 73 | 18
 74 | 00:01:34,889 --> 00:01:41,240
 75 | to be a register assignment that uses no more than k registers. So let's take a
 76 | 
 77 | 19
 78 | 00:01:41,240 --> 00:01:46,539
 79 | look at an example rig and for this particular graph, there is no coloring. It
 80 | 
 81 | 20
 82 | 00:01:46,539 --> 00:01:52,649
 83 | turns out that it uses fewer than four colors. But there is at least one for
 84 | 
 85 | 21
 86 | 00:01:52,649 --> 00:01:59,249
 87 | coloring of this graph. And then here it is so I've used colored labels but also
 88 | 
 89 | 22
 90 | 00:01:59,249 --> 00:02:04,630
 91 | register names so that you can see what registers we might assign to each of the
 92 | 
 93 | 23
 94 | 00:02:04,630 --> 00:02:09,610
 95 | nodes. And just notice that although there are many more than four temporaries or
 96 | 
 97 | 24
 98 | 00:02:09,610 --> 00:02:14,000
 99 | four nodes in this graph, we do manage to color it with only four colors and some of
100 | 
101 | 25
102 | 00:02:14,000 --> 00:02:19,010
103 | the nodes have the same color. So for example, d and b are allocated the same
104 | 
105 | 26
106 | 00:02:19,010 --> 00:02:26,010
107 | color, as are e and a. Just to remind ourselves where this register interference
108 | 
109 | 27
110 | 00:02:26,480 --> 00:02:31,930
111 | graph came from, here is the original control flow graph again. And, once we
112 | 
113 | 28
114 | 00:02:31,930 --> 00:02:36,010
115 | have the coloring of the graph, now we can do the register assignment. We can replace
116 | 
117 | 29
118 | 00:02:36,010 --> 00:02:40,090
119 | the temporaries by their corresponding register names, and then we get this
120 | 
121 | 30
122 | 00:02:40,090 --> 00:02:46,760
123 | control flow graph. So, here we've just renamed each of the variables of the
124 | 
125 | 31
126 | 00:02:46,760 --> 00:02:51,430
127 | program with its register that it was assigned to. And now we're very close, as
128 | 
129 | 32
130 | 00:02:51,430 --> 00:02:56,379
131 | you can see, to having code that we can emit and execute on the target
132 | 
133 | 33
134 | 00:02:56,379 --> 00:02:56,879
135 | architecture.
136 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/16-03-B+Spilling.srt:
--------------------------------------------------------------------------------
 1 | 0
 2 | 00:00:01,189 --> 00:00:07,340
 3 | To summarize this video, register allocation is one of the most important jobs that a compiler
 4 | 
 5 | 1
 6 | 00:00:07,340 --> 00:00:11,740
 7 | performs. And it's really, these days they must have an any kind of reasonable
 8 | 
 9 | 2
10 | 00:00:11,740 --> 00:00:15,499
11 | production compiler. And, the reason you need it is because the inter-media
12 | 
13 | 3
14 | 00:00:15,499 --> 00:00:19,320
15 | code just generally uses too many temporaries. We're allowed to be a little
16 | 
17 | 4
18 | 00:00:19,320 --> 00:00:24,240
19 | bit sloppy with intermediate code precisely because we have good register allocation
20 | 
21 | 5
22 | 00:00:24,240 --> 00:00:27,640
23 | algorithms. And the other reason, registers are just a very important
24 | 
25 | 6
26 | 00:00:27,640 --> 00:00:32,750
27 | resource in making good user registers. Having some procedure for making
28 | 
29 | 7
30 | 00:00:32,750 --> 00:00:36,879
31 | efficient use of the registers. Leads to much, much better code in the end,
32 | 
33 | 8
34 | 00:00:36,879 --> 00:00:41,659
35 | much more efficient code. Now. The register allocation algorithm I described
36 | 
37 | 9
38 | 00:00:41,659 --> 00:00:47,960
39 | here is really targeted at risk machine. So, for risk machine reduce instruction set
40 | 
41 | 10
42 | 00:00:47,960 --> 00:00:53,089
43 | computer what kind of machine. You can pretty much take the register allocation algorithm
44 | 
45 | 11
46 | 00:00:53,089 --> 00:00:57,319
47 | that I described and if any for those machines it would work out of the box.
48 | 
49 | 12
50 | 00:00:57,319 --> 00:01:02,339
51 | CISC machines - which stands for complex instructions for computers - often
52 | 
53 | 13
54 | 00:01:02,339 --> 00:01:07,840
55 | have restrictions on how the register can be used. Certain operation can only work with
56 | 
57 | 14
58 | 00:01:07,840 --> 00:01:11,250
59 | certain registers. You may have register to different sizes that can only
60 | 
61 | 15
62 | 00:01:11,250 --> 00:01:16,420
63 | hold certain values. And so it becomes more complicated to register allocation for
64 | 
65 | 16
66 | 00:01:16,420 --> 00:01:21,140
67 | such machines. What people have done is to adapt the graph coloring procedure that
68 | 
69 | 17
70 | 00:01:21,140 --> 00:01:27,420
71 | I described here. So, the basic idea is exactly the same and you would recognize
72 | 
73 | 18
74 | 00:01:27,420 --> 00:01:30,650
75 | those algorithms is being primarily the graph color algorithms that
76 | 
77 | 19
78 | 00:01:30,650 --> 00:01:36,750
79 | we discussed. There are just additional. Steps in those algorithms and places where
80 | 
81 | 20
82 | 00:01:36,750 --> 00:01:40,340
83 | the particular constraints are what registers can be used have to be observed.
84 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/17-02-A+Mark+and+Sweep.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:04,230 --> 00:00:07,200
  3 | In this video, we're going to talk about the first of three garbage collection
  4 | 
  5 | 1
  6 | 00:00:07,200 --> 00:00:10,870
  7 | techniques that we're going to look at in detail. First one is mark-and-sweep.
  8 | 
  9 | 2
 10 | 00:00:10,870 --> 00:00:17,870
 11 | Mark-and-sweep works in two phases. And it's called, not surprisingly, mark and
 12 | 
 13 | 3
 14 | 00:00:19,410 --> 00:00:23,769
 15 | sweep. So, the mark phase is going to trace all the reachable objects. So, when
 16 | 
 17 | 4
 18 | 00:00:23,769 --> 00:00:26,330
 19 | memory runs out and we stop to do the garbage collection, the first thing we're
 20 | 
 21 | 5
 22 | 00:00:26,330 --> 00:00:29,710
 23 | going to do is go and trace out all the reachable objects. And then the Sweep
 24 | 
 25 | 6
 26 | 00:00:29,710 --> 00:00:35,190
 27 | phase is going to collect all the garbage objects. And to support this, every object
 28 | 
 29 | 7
 30 | 00:00:35,190 --> 00:00:39,480
 31 | is going to have an extra bit somewhere in it called the mark bit. And, this is
 32 | 
 33 | 8
 34 | 00:00:39,480 --> 00:00:42,120
 35 | reserved from memory management and it's not going to be used by anything except
 36 | 
 37 | 9
 38 | 00:00:42,120 --> 00:00:46,289
 39 | the garbage collector. And initially, before we start a garbage collection, the
 40 | 
 41 | 10
 42 | 00:00:46,289 --> 00:00:50,940
 43 | mark bit of every object will always be zero. And that's going to be set to one,
 44 | 
 45 | 11
 46 | 00:00:50,940 --> 00:00:54,230
 47 | for the reachable objects in the mark phase. So, when we mark an object, we mark
 48 | 
 49 | 12
 50 | 00:00:54,230 --> 00:00:59,789
 51 | it with a And that indicates that the object is reachable. So, here is the mark
 52 | 
 53 | 13
 54 | 00:00:59,789 --> 00:01:04,360
 55 | phase. It's going to be a work list based algorithm and so initially our work list
 56 | 
 57 | 14
 58 | 00:01:04,360 --> 00:01:10,000
 59 | consists of all the roots so all the initial pointers held in registers and
 60 | 
 61 | 15
 62 | 00:01:10,000 --> 00:01:13,660
 63 | then while the work list, the to-do list is not empty, we're going to do the
 64 | 
 65 | 16
 66 | 00:01:13,660 --> 00:01:18,050
 67 | following. We pick some element v out of the to-do list we'll remove it from the
 68 | 
 69 | 17
 70 | 00:01:18,050 --> 00:01:25,050
 71 | to-do list, okay. And then, this is the crux of the algorithm. If the object v is
 72 | 
 73 | 18
 74 | 00:01:25,470 --> 00:01:30,390
 75 | not already marked then we mark it, okay. So, we say, mark bit to one and then we
 76 | 
 77 | 19
 78 | 00:01:30,390 --> 00:01:35,030
 79 | find all the pointers inside of it, alright. And we add those to our work
 80 | 
 81 | 20
 82 | 00:01:35,030 --> 00:01:39,640
 83 | list. So, everything, every point gets added to work list. Now, if v is already
 84 | 
 85 | 21
 86 | 00:01:39,640 --> 00:01:43,130
 87 | marked, well then we have already processed it and we've already add all the
 88 | 
 89 | 22
 90 | 00:01:43,130 --> 00:01:46,340
 91 | things it points to, to the work list. And so we just need to do nothing there is no
 92 | 
 93 | 23
 94 | 00:01:46,340 --> 00:01:53,340
 95 | else branch and we just drop it from the to-do list. So, once we've completed the
 96 | 
 97 | 24
 98 | 00:01:53,540 --> 00:01:57,380
 99 | mark phase and every reachable object has been marked, then the sweep phase is going
100 | 
101 | 25
102 | 00:01:57,380 --> 00:02:02,350
103 | to scan th rough the heap looking for objects that have mark bit zero. And the
104 | 
105 | 26
106 | 00:02:02,350 --> 00:02:05,280
107 | sweep phase is just going to march through all of memory. It's going to start at the
108 | 
109 | 27
110 | 00:02:05,280 --> 00:02:10,229
111 | bottom of the heap and walk over every object in the heap and check its mark bit.
112 | 
113 | 28
114 | 00:02:10,229 --> 00:02:14,280
115 | And so, any of the objects that it finds that have mark bit zero, they were not
116 | 
117 | 29
118 | 00:02:14,280 --> 00:02:18,170
119 | visited in mark phase and they're clearly not reachable. S, all those objects will
120 | 
121 | 30
122 | 00:02:18,170 --> 00:02:23,569
123 | be added to a free list. And as we go through the memory is one other detail
124 | 
125 | 31
126 | 00:02:23,569 --> 00:02:27,709
127 | that's important. Any object that has its mark bit set is gonna have its mark bit
128 | 
129 | 32
130 | 00:02:27,709 --> 00:02:33,980
131 | reset to zero. So, that way it's ready for the next garbage collection. So, here is
132 | 
133 | 33
134 | 00:02:33,980 --> 00:02:40,599
135 | the pseudo-code for the sweep phase and this will function, size of p is going to
136 | 
137 | 34
138 | 00:02:40,599 --> 00:02:46,030
139 | size of block, the size of the object that starts at pointer p, alright. And as
140 | 
141 | 35
142 | 00:02:46,030 --> 00:02:50,719
143 | you'll see this is actually, the reason that we have the size of objects encoded
144 | 
145 | 36
146 | 00:02:50,719 --> 00:02:55,269
147 | in the object in COOL. So, remember in the header for COOL objects there is a size
148 | 
149 | 37
150 | 00:02:55,269 --> 00:02:58,180
151 | field that is, so that the garbage collector as it's walking through memory
152 | 
153 | 38
154 | 00:02:58,180 --> 00:03:02,620
155 | can figure out how big the objects are. Anyway, we start at the bottom of the
156 | 
157 | 39
158 | 00:03:02,620 --> 00:03:06,659
159 | heap. And while we haven't reached the top of the heap, we do the following. We look
160 | 
161 | 40
162 | 00:03:06,659 --> 00:03:10,560
163 | at where we're pointing and then we'll always be pointing to the beginning of an
164 | 
165 | 41
166 | 00:03:10,560 --> 00:03:15,349
167 | object. So, we check to see if the mark bit of that object is one. And if it is,
168 | 
169 | 42
170 | 00:03:15,349 --> 00:03:18,769
171 | well then it was a reachable object. So, we just reset its mark bit to zero.
172 | 
173 | 43
174 | 00:03:18,769 --> 00:03:23,489
175 | Otherwise, if its mark bit was zero, then we're going to add that block of memory,
176 | 
177 | 44
178 | 00:03:23,489 --> 00:03:29,060
179 | okay, which is the size of the object to the free list. And finally, in either
180 | 
181 | 45
182 | 00:03:29,060 --> 00:03:34,120
183 | case, okay, we're going to increment p by the size of the object that it points to
184 | 
185 | 46
186 | 00:03:34,120 --> 00:03:37,819
187 | so we point to the next object. Then we'll just repeat that loop over and over again
188 | 
189 | 47
190 | 00:03:37,819 --> 00:03:41,439
191 | resetting the mark bits of things that were reached and adding things that were
192 | 
193 | 48
194 | 00:03:41,439 --> 00:03:47,749
195 | not reached for the free list until we've touched every object in the heap. Here's a
196 | 
197 | 49
198 | 00:03:47,749 --> 00:03:51,749
199 | little example. So, we're starting out here with a, a heap and we're gonna assume
200 | 
201 | 50
202 | 00:03:51,749 --> 00:03:56,579
203 | there's just one root for simplicity. And here are all the objects and initially
204 | 
205 | 51
206 | 00:03:56,579 --> 00:04:01,139
207 | their marked bits are zero and we do have a free list, an initial free list over
208 | 
209 | 52
210 | 00:04:01,139 --> 00:04:04,620
211 | here. Notice that, you know, there's a little bit of memory that is on the free
212 | 
213 | 53
214 | 00:04:04,620 --> 00:04:09,639
215 | list. Okay. So, after the mark phase, what has happened? Well, we've gone through,
216 | 
217 | 54
218 | 00:04:09,639 --> 00:04:12,569
219 | and touched all the reachable objects. So, we started with A and, of course, we set
220 | 
221 | 55
222 | 00:04:12,569 --> 00:04:16,930
223 | its mark bit to one. And then we followed pointers reachable from A, set the mark
224 | 
225 | 56
226 | 00:04:16,930 --> 00:04:20,290
227 | bit there. Follow the pointer reachable from C, set the mark bit there. And so we
228 | 
229 | 57
230 | 00:04:20,290 --> 00:04:26,970
231 | wind up A, C, and E being marked, nothing else is marked, okay. And now the sweep
232 | 
233 | 58
234 | 00:04:26,970 --> 00:04:31,600
235 | phase will go through memory, it's going to reset all the marked bits to zero. And
236 | 
237 | 59
238 | 00:04:31,600 --> 00:04:36,100
239 | as it finds unreachable objects, in this case B and D, it's going to add them to
240 | 
241 | 60
242 | 00:04:36,100 --> 00:04:40,060
243 | the free list and so what we'll wind up the free list will wind up being a linked
244 | 
245 | 61
246 | 00:04:40,060 --> 00:04:45,640
247 | list of, of, of blocks of memory that are available for future allocations.
248 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/17-03-B+Stop+and+Copy.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:01,589 --> 00:00:05,279
  3 | So, just as was the case with mark-and-sweep, when we scan an object, we
  4 | 
  5 | 1
  6 | 00:00:05,279 --> 00:00:09,340
  7 | have to know how big it is and we also need to know where the pointers and the
  8 | 
  9 | 2
 10 | 00:00:09,340 --> 00:00:13,840
 11 | object are. So, if we think about this for a minute, let's say we're scanning this
 12 | 
 13 | 3
 14 | 00:00:13,840 --> 00:00:17,560
 15 | object, so this is our scan pointer and we want now to process all the pointers in
 16 | 
 17 | 4
 18 | 00:00:17,560 --> 00:00:20,670
 19 | it, well, we have to know where the pointers are. So, there's a pointer here
 20 | 
 21 | 5
 22 | 00:00:20,670 --> 00:00:23,820
 23 | and there's a pointer here and we'll be able to find those pointers and we don't
 24 | 
 25 | 6
 26 | 00:00:23,820 --> 00:00:28,250
 27 | want to confuse them with other fields of the object that might look like pointers.
 28 | 
 29 | 7
 30 | 00:00:28,250 --> 00:00:32,359
 31 | So, in a bit pattern for an integer could look an awful lot like a pointer. Now,
 32 | 
 33 | 8
 34 | 00:00:32,359 --> 00:00:36,300
 35 | this is not a big problem because the compiler, of course, in terms of, a lot of
 36 | 
 37 | 9
 38 | 00:00:36,300 --> 00:00:40,140
 39 | the objects in the heap and it can stor e that information somewhere communicated to
 40 | 
 41 | 10
 42 | 00:00:40,140 --> 00:00:44,100
 43 | the garbage collector so that it will be able to find the pointers. So, you can
 44 | 
 45 | 11
 46 | 00:00:44,100 --> 00:00:49,730
 47 | imagine easily a little bit of information stored with the program indicating for
 48 | 
 49 | 12
 50 | 00:00:49,730 --> 00:00:55,899
 51 | each type where the pointers are. And similarly once we've scanned this object,
 52 | 
 53 | 13
 54 | 00:00:55,899 --> 00:00:59,850
 55 | we need to be able to advance our scan pointer just past the object so that we
 56 | 
 57 | 14
 58 | 00:00:59,850 --> 00:01:03,410
 59 | can find the beginning of the next object and that's why we need to know the size,
 60 | 
 61 | 15
 62 | 00:01:03,410 --> 00:01:07,709
 63 | okay. So, we need to know that size so that the scan pointer can be moved past
 64 | 
 65 | 16
 66 | 00:01:07,709 --> 00:01:12,359
 67 | the object and we can find the beginning of the next object. Another issue is that
 68 | 
 69 | 17
 70 | 00:01:12,359 --> 00:01:15,389
 71 | whenever we do a garbage collection, I haven't mentioned this up to this point
 72 | 
 73 | 18
 74 | 00:01:15,389 --> 00:01:20,630
 75 | but it should be clear, we also have to scan and copy objects pointed to by the
 76 | 
 77 | 19
 78 | 00:01:20,630 --> 00:01:25,209
 79 | stack. And we also have to update pointers in the stack. And this can actually turn
 80 | 
 81 | 20
 82 | 00:01:25,209 --> 00:01:30,599
 83 | out to be kind of an expensive operation with stop-and-copy because, you know, you
 84 | 
 85 | 21
 86 | 00:01:30,599 --> 00:01:35,499
 87 | still have to walk the entire stack each time you do a collection in order to make
 88 | 
 89 | 22
 90 | 00:01:35,499 --> 00:01:42,090
 91 | sure that you've copied all the objects pointed to by the stack. To conclude
 92 | 
 93 | 23
 94 | 00:01:42,090 --> 00:01:46,779
 95 | stop-and-copy, I think it's fair to say, is generally believed to be the fastest
 96 | 
 97 | 24
 98 | 00:01:46,779 --> 00:01:50,679
 99 | garbage collection technique. Certainly, I believe that variations on stop-and- copy
100 | 
101 | 25
102 | 00:01:50,679 --> 00:01:56,189
103 | are the most efficient approaches known to automatic memory management. Allocation is
104 | 
105 | 26
106 | 00:01:56,189 --> 00:02:00,499
107 | very cheap, alright. So, cuz all you have to do is increment the e-pointer. So,
108 | 
109 | 27
110 | 00:02:00,499 --> 00:02:05,049
111 | you're just moving a, a, single pointer forward to allocate space. There's no
112 | 
113 | 28
114 | 00:02:05,049 --> 00:02:10,810
115 | complicated free list future verse or decisions to make about where to put the
116 | 
117 | 29
118 | 00:02:10,810 --> 00:02:13,159
119 | object, you know, you're just going to allocate it directly at the allocation
120 | 
121 | 30
122 | 00:02:13,159 --> 00:02:18,040
123 | pointer. So, you know, this, this part of memory management is, is very inexpensive.
124 | 
125 | 31
126 | 00:02:18,040 --> 00:02:22,019
127 | And at the same time, collection is also relatively cheap. And, and interestingly
128 | 
129 | 32
130 | 00:02:22,019 --> 00:02:27,689
131 | it's especially cheap if there is a lot of garbage because, because of making a copy
132 | 
133 | 33
134 | 00:02:27,689 --> 00:02:32,969
135 | of the reachable objects stop-and-copy only touches the reac hable object, It is
136 | 
137 | 34
138 | 00:02:32,969 --> 00:02:39,969
139 | not, in particular, does not touch the garbage. So, if you think about that for a
140 | 
141 | 35
142 | 00:02:41,500 --> 00:02:46,680
143 | minute, that means that the garbage collection is in stop-and-copy is order
144 | 
145 | 36
146 | 00:02:46,680 --> 00:02:52,859
147 | the size of the live objects. So, whatever the sub-graph is that you're copying,
148 | 
149 | 37
150 | 00:02:52,859 --> 00:02:59,780
151 | that's the cost of a garbage collection and that's in contrast to mark-and-sweep
152 | 
153 | 38
154 | 00:02:59,780 --> 00:03:05,030
155 | were the cost is proportional to all the memory that you're using cuz you have the
156 | 
157 | 39
158 | 00:03:05,030 --> 00:03:08,329
159 | sweep phase where you have to go through and touch every single object whether it's
160 | 
161 | 40
162 | 00:03:08,329 --> 00:03:13,890
163 | live or garbage, okay. And so, if you have a relatively lot of garbage and a
164 | 
165 | 41
166 | 00:03:13,890 --> 00:03:17,340
167 | relatively small set of live objects, stop-and-copy is actually much, much
168 | 
169 | 42
170 | 00:03:17,340 --> 00:03:22,640
171 | faster than mark-and-sweep. Now, of course the down side of stop-and-copy is that it
172 | 
173 | 43
174 | 00:03:22,640 --> 00:03:28,400
175 | moves the objects in some languages, in particular C and C++, can't allow you to
176 | 
177 | 44
178 | 00:03:28,400 --> 00:03:33,689
179 | move objects because the address that which an object lives is actually visible
180 | 
181 | 45
182 | 00:03:33,689 --> 00:03:38,079
183 | exposed in the program and is part of the semantics of the object. And so there, you
184 | 
185 | 46
186 | 00:03:38,079 --> 00:03:41,090
187 | really have to use mark-and-sweep because you're not allowed to move anything.
188 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/17-04-conservative-collection.srt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 00:00:03,340 --> 00:00:06,850
  3 | In this very short video, I'm going to say a few words about a technique called
  4 | 
  5 | 1
  6 | 00:00:06,850 --> 00:00:13,850
  7 | Conservative Garbage Collection that can be used for languages like C and C++. To
  8 | 
  9 | 2
 10 | 00:00:14,000 --> 00:00:18,519
 11 | review, Automatic Memory Management relies on being able to find all the reachable
 12 | 
 13 | 3
 14 | 00:00:18,519 --> 00:00:23,660
 15 | objects. And it also needs to be able to find all the pointers in an object. Now,
 16 | 
 17 | 4
 18 | 00:00:23,660 --> 00:00:27,310
 19 | the difficultly with doing garbage collection for a language like C or C++ is
 20 | 
 21 | 5
 22 | 00:00:27,310 --> 00:00:31,820
 23 | that it's very difficult or even impossible to identify the contents of
 24 | 
 25 | 6
 26 | 00:00:31,820 --> 00:00:37,470
 27 | objects in memory with 100 percent reliability. So if we see, two words in
 28 | 
 29 | 7
 30 | 00:00:37,470 --> 00:00:42,620
 31 | memory, you know, it might be a list cell that has, a data and next field. So we see
 32 | 
 33 | 8
 34 | 00:00:42,620 --> 00:00:48,219
 35 | just two words here. And there are some bit patterns in here, 0's and 1's. Okay
 36 | 
 37 | 9
 38 | 00:00:48,219 --> 00:00:52,149
 39 | how do we know whether these are both pointers? It could be that one is a
 40 | 
 41 | 10
 42 | 00:00:52,149 --> 00:00:55,670
 43 | pointer and the, the other is not in the case of a list cell. So one of these
 44 | 
 45 | 11
 46 | 00:00:55,670 --> 00:00:59,789
 47 | fields is just data like an injure and another one is a pointer. Or it could be
 48 | 
 49 | 12
 50 | 00:00:59,789 --> 00:01:04,969
 51 | something like a binary tree node where both of these words are pointers. And
 52 | 
 53 | 13
 54 | 00:01:04,968 --> 00:01:09,670
 55 | because of this weakness really in the C and C++ type systems, we just can't
 56 | 
 57 | 14
 58 | 00:01:09,670 --> 00:01:15,240
 59 | guarantee that we know where all the pointers are. Now it turns out that it is
 60 | 
 61 | 15
 62 | 00:01:15,240 --> 00:01:19,560
 63 | possible to extend garbage collection techniques to work with languages like C
 64 | 
 65 | 16
 66 | 00:01:19,560 --> 00:01:26,259
 67 | and C++. And the basic idea, or insight, is that it's always okay to be
 68 | 
 69 | 17
 70 | 00:01:26,259 --> 00:01:28,859
 71 | conservative. And if we're not sure whether something might be used in the
 72 | 
 73 | 18
 74 | 00:01:28,859 --> 00:01:33,169
 75 | future, then we will just keep it around. And remember that graph reachability is
 76 | 
 77 | 19
 78 | 00:01:33,169 --> 00:01:38,349
 79 | already a conservative technique. What we really want is to keep around the objects
 80 | 
 81 | 20
 82 | 00:01:38,349 --> 00:01:42,700
 83 | that will just be used in the future, but the reachability in the object graph is an
 84 | 
 85 | 21
 86 | 00:01:42,700 --> 00:01:47,579
 87 | approximation to that, so because reachable objects might be used. And now,
 88 | 
 89 | 22
 90 | 00:01:47,579 --> 00:01:51,149
 91 | the problem with C and C++ is that we don't know where the pointers are. We
 92 | 
 93 | 23
 94 | 00:01:51,149 --> 00:01:54,380
 95 | don't have a guarantee from the type system about where the pointers are. And
 96 | 
 97 | 24
 98 | 00:01:54,380 --> 00:01:58,639
 99 | so the basic trick is that, if something looks like a pointer, then we will treat
100 | 
101 | 25
102 | 00:01:58,639 --> 00:02:03,179
103 | it as a pointer. All we have to do is be conservative, and if we are not sure wh
104 | 
105 | 26
106 | 00:02:03,179 --> 00:02:06,959
107 | ether a given word of memory is a pointer. Then we can just treat it as a pointer,
108 | 
109 | 27
110 | 00:02:06,959 --> 00:02:11,769
111 | and keep whatever it points to around. If we, and as long as we are not going to
112 | 
113 | 28
114 | 00:02:11,769 --> 00:02:16,450
115 | move it or change it, that would be okay. And so, how, how do we decide whether a
116 | 
117 | 29
118 | 00:02:16,450 --> 00:02:19,950
119 | particular word of memory is a pointer? Well, it should be a line, meaning, you
120 | 
121 | 30
122 | 00:02:19,950 --> 00:02:24,870
123 | know, it should end in some zeros to indicate that it was pointing, if it was a
124 | 
125 | 31
126 | 00:02:24,870 --> 00:02:28,129
127 | pointer it was pointing to a word boundary. And then, whatever pattern it
128 | 
129 | 32
130 | 00:02:28,129 --> 00:02:31,790
131 | is, if we interpret it as an address, it has to be a valid address. So, it should
132 | 
133 | 33
134 | 00:02:31,790 --> 00:02:35,430
135 | point to the data segment. And Noah said, you know, these two conditions will rule
136 | 
137 | 34
138 | 00:02:35,430 --> 00:02:42,060
139 | out all kinds of data and memory. So for example, any small integer is probably not
140 | 
141 | 35
142 | 00:02:42,060 --> 00:02:46,849
143 | going to be interpretable as a valid address in the data segment. So, you know,
144 | 
145 | 36
146 | 00:02:46,849 --> 00:02:50,579
147 | most likely, only things that are pointers, or very few things that are not
148 | 
149 | 37
150 | 00:02:50,579 --> 00:02:54,340
151 | pointers will be treated as pointers. And what we're going to do then, is, if it
152 | 
153 | 38
154 | 00:02:54,340 --> 00:02:57,950
155 | looks like a pointer, we're going to consider it to be a pointer. We'll follow
156 | 
157 | 39
158 | 00:02:57,950 --> 00:03:01,709
159 | it, and then we'll end up overestimating the set of reachable objects. We may keep
160 | 
161 | 40
162 | 00:03:01,709 --> 00:03:07,049
163 | around some stuff, that isn't reachable at all. But that's alright, it's always okay
164 | 
165 | 41
166 | 00:03:07,049 --> 00:03:12,780
167 | to keep around more stuff than necessary. Now, we still can't move the object,
168 | 
169 | 42
170 | 00:03:12,780 --> 00:03:15,290
171 | alright? Because we can't update the pointers to them. If we don't know that
172 | 
173 | 43
174 | 00:03:15,290 --> 00:03:19,469
175 | something is a pointer, we certainly don't want to change it, okay? And, you know,
176 | 
177 | 44
178 | 00:03:19,469 --> 00:03:22,760
179 | for example, if we thought something was a pointer, and it was actually an account
180 | 
181 | 45
182 | 00:03:22,760 --> 00:03:25,670
183 | number, and then we updated the pointer, when we move the object, we would just
184 | 
185 | 46
186 | 00:03:25,670 --> 00:03:30,010
187 | completely change what the program does. So, this only really works when you mark
188 | 
189 | 47
190 | 00:03:30,010 --> 00:03:30,260
191 | this way.
192 | 


--------------------------------------------------------------------------------
/CS143 text-srt/english/README.md:
--------------------------------------------------------------------------------
 1 | **CS143 编译原理课程的英文字幕, 翻译过程会持续进行。当然, 如果你有兴趣, 可以将字幕翻译后, 添加到课程视频中。如果你对这个课程有贡献的话, 请通知我, 或者在 Github 上直接通知我, 或者通过下列方式：**
 2 | 
 3 | -   QQ：884691896
 4 | -   QQmail：884691896@qq.com
 5 | -   Gmail：gg884691896@gmail.com
 6 | 
 7 | **课程链接：** https://www.bilibili.com/video/av96207540/
 8 | 
 9 | **期待你的参与！**
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Self-learning-Record


--------------------------------------------------------------------------------
/SSR-Android.apk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenWrong/Self-learning-Record/145d014d55e1322f65450a87483e6d8a06acf1d4/SSR-Android.apk


--------------------------------------------------------------------------------
/SSR-Windows.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenWrong/Self-learning-Record/145d014d55e1322f65450a87483e6d8a06acf1d4/SSR-Windows.zip


--------------------------------------------------------------------------------
/fubabaxianjinliu.apk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenWrong/Self-learning-Record/145d014d55e1322f65450a87483e6d8a06acf1d4/fubabaxianjinliu.apk


--------------------------------------------------------------------------------
/src/demo.md:
--------------------------------------------------------------------------------
1 | de de 
2 | 


--------------------------------------------------------------------------------
/student-dist.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenWrong/Self-learning-Record/145d014d55e1322f65450a87483e6d8a06acf1d4/student-dist.tar.gz


--------------------------------------------------------------------------------
/v2rayn.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AllenWrong/Self-learning-Record/145d014d55e1322f65450a87483e6d8a06acf1d4/v2rayn.zip


--------------------------------------------------------------------------------