├── .gitignore ├── LICENSE ├── README.md ├── ch01 ├── 1.1 │ └── 1.1.md ├── 1.3 │ └── 1.3.md └── 1.6 │ └── 1.6.md ├── ch02 ├── 2.2 │ ├── 2.2.md │ └── assets │ │ ├── 2.2.1-2.png │ │ ├── 2.2.3-3.png │ │ ├── 2.2.3-4.png │ │ └── 2.2.3-5.png ├── 2.3 │ └── 2.3.md ├── 2.4 │ ├── 2.4.1.1.c │ ├── 2.4.1.2.c │ ├── 2.4.1.3.c │ └── 2.4.md ├── 2.6 │ └── 2.6.md ├── 2.8 │ └── 2.8.md └── key-point │ ├── assets │ └── dragonbook-keypoint-2.2-2.png │ └── key-point.md ├── ch03 ├── 3.1 │ └── 3.1.md ├── 3.3 │ ├── 3.3.md │ └── assets │ │ ├── 3.3.5-4-1.gif │ │ ├── 3.3.5-4-1.graphml │ │ ├── 3.3.5-4-2.gif │ │ ├── 3.3.5-4-2.graphml │ │ ├── 3.3.5-4-3.gif │ │ ├── 3.3.5-4-3.graphml │ │ ├── 3.3.5-4-4.gif │ │ ├── 3.3.5-4-4.graphml │ │ ├── 3.3.5-4.gif │ │ ├── 3.3.5-4.graphml │ │ ├── 3.3.5-5-1.gif │ │ ├── 3.3.5-5-1.graphml │ │ ├── 3.3.5-5-2.gif │ │ ├── 3.3.5-5-2.graphml │ │ ├── 3.3.5-5-3.gif │ │ ├── 3.3.5-5-3.graphml │ │ ├── 3.3.5-5-4.gif │ │ ├── 3.3.5-5-4.graphml │ │ ├── 3.3.5-5.gif │ │ └── 3.3.5-5.graphml ├── 3.4 │ ├── 3.4.md │ ├── assets │ │ ├── 3.4.1-1-dfa.gif │ │ ├── 3.4.1-1-dfa.graphml │ │ ├── 3.4.1-1-nfa.gif │ │ ├── 3.4.1-1-nfa.graphml │ │ ├── 3.4.1-1.gif │ │ ├── 3.4.1-1.graphml │ │ ├── 3.4.1-2.gif │ │ ├── 3.4.1-2.graphml │ │ ├── 3.4.1-3-nfa.gif │ │ ├── 3.4.1-3-nfa.graphml │ │ ├── 3.4.1-3.gif │ │ ├── 3.4.1-3.graphml │ │ ├── 3.4.1-4.gif │ │ └── 3.4.1-4.graphml │ └── src │ │ ├── failure-function.js │ │ └── kmp.js ├── 3.5 │ ├── 3.5.md │ └── src │ │ ├── lex.l │ │ ├── lex2.l │ │ └── lex3.l ├── 3.6 │ ├── 3.6.md │ └── assets │ │ ├── 3.6.1.gif │ │ └── 3.6.1.graphml ├── 3.7 │ ├── 3.7.md │ └── assets │ │ ├── 3.7.1-1.gif │ │ ├── 3.7.1-1.graphml │ │ ├── 3.7.1-2.gif │ │ ├── 3.7.1-2.graphml │ │ ├── 3.7.1-3.gif │ │ ├── 3.7.1-3.graphml │ │ ├── 3.7.3-1-dfa.gif │ │ ├── 3.7.3-1-nfa.gif │ │ ├── 3.7.3-2-dfa.gif │ │ ├── 3.7.3-2-nfa.gif │ │ ├── 3.7.3-3-dfa.gif │ │ ├── 3.7.3-3-nfa.gif │ │ ├── 3.7.3-4-dfa.gif │ │ ├── 3.7.3-4-dfa.graphml │ │ ├── 3.7.3-4-nfa.gif │ │ └── 3.7.3-4-nfa.graphml ├── 3.8 │ ├── 3.8.md │ └── assets │ │ ├── 3.8.1-dfa.gif │ │ ├── 3.8.1-dfa.graphml │ │ ├── 3.8.1-nfa.gif │ │ ├── 3.8.1-nfa.graphml │ │ ├── 3.8.2-nfa.gif │ │ ├── 3.8.2-nfa.graphml │ │ ├── 3.8.3-1.gif │ │ ├── 3.8.3-1.graphml │ │ ├── 3.8.3-2.gif │ │ └── 3.8.3-2.graphml ├── 3.9 │ ├── 3.9.md │ └── assets │ │ ├── 3.9.2-1-1.gif │ │ ├── 3.9.2-1-1.graphml │ │ ├── 3.9.2-1-2.gif │ │ ├── 3.9.2-1-2.graphml │ │ ├── 3.9.2-1-dfa.gif │ │ └── 3.9.2-1-dfa.graphml └── key-point │ └── key-point.md ├── ch04 ├── 4.2 │ ├── 4.2.md │ └── assets │ │ ├── 4.2.1.gif │ │ └── 4.2.1.graphml ├── 4.3 │ └── 4.3.md ├── 4.4 │ ├── 4.4.md │ ├── courses.engr.illinois.edu-cs373-lec14.pdf │ └── cs.columbia.edu-aho-cs3261-properties-of-cfl-121008.html ├── 4.5 │ └── 4.5.md ├── 4.6 │ ├── 4.6.md │ └── assets │ │ ├── 4.6.1-1.gif │ │ ├── 4.6.1-1.graphml │ │ ├── 4.6.1-2.gif │ │ ├── 4.6.1-2.graphml │ │ ├── 4.6.1-3.gif │ │ └── 4.6.1-3.graphml ├── 4.7 │ └── 4.7.md └── key-point │ └── key-point.md ├── ch05 ├── 5.1 │ ├── 5.1.md │ └── assets │ │ ├── 5.1.1-1.gif │ │ ├── 5.1.1-1.graphml │ │ ├── 5.1.1-2.gif │ │ ├── 5.1.1-2.graphml │ │ ├── 5.1.3-1.gif │ │ ├── 5.1.3-1.graphml │ │ ├── 5.1.3-2.gif │ │ └── 5.1.3-2.graphml ├── 5.2 │ ├── 5.2.1.js │ ├── 5.2.md │ └── assets │ │ ├── 5.2.2-1.gif │ │ └── 5.2.2-1.graphml ├── 5.3 │ └── 5.3.md ├── 5.4 │ └── 5.4.md └── 5.5 │ └── 5.5.md ├── ch06 ├── 6.1 │ ├── 6.1.md │ └── assets │ │ ├── 6.1.1.gif │ │ ├── 6.1.1.graphml │ │ ├── 6.1.2-1.gif │ │ ├── 6.1.2-1.graphml │ │ ├── 6.1.2-2.gif │ │ ├── 6.1.2-2.graphml │ │ ├── 6.1.2-3.gif │ │ └── 6.1.2-3.graphml ├── 6.2 │ ├── 6.2.md │ └── assets │ │ ├── 6.2.1.gif │ │ └── 6.2.1.graphml ├── 6.3 │ └── 6.3.md ├── 6.4 │ ├── 6.4.md │ └── assets │ │ ├── 6.4.3-1.gif │ │ ├── 6.4.3-1.graphml │ │ ├── 6.4.3-2.gif │ │ └── 6.4.3-2.graphml ├── 6.5 │ └── 6.5.md ├── 6.6 │ └── 6.6.md └── 6.7 │ ├── 6.7.md │ └── assets │ ├── 6.7.1-1.gif │ └── 6.7.1-1.graphml ├── ch07 ├── 7.2 │ ├── 7.2.6.c │ ├── 7.2.md │ └── assets │ │ ├── 7.2.1-1.gif │ │ ├── 7.2.1-1.graphml │ │ ├── 7.2.3-1.gif │ │ ├── 7.2.3-1.graphml │ │ ├── 7.2.3-2.gif │ │ ├── 7.2.3-2.graphml │ │ ├── 7.2.3-3.gif │ │ ├── 7.2.3-3.graphml │ │ ├── 7.2.4.gif │ │ └── 7.2.4.graphml ├── 7.3 │ ├── 7.3.md │ └── assets │ │ ├── 7.3.1-activation-stack.gif │ │ ├── 7.3.1-activation-stack.graphml │ │ ├── 7.3.1-activation-tree.gif │ │ ├── 7.3.1-activation-tree.graphml │ │ ├── 7.3.2.gif │ │ └── 7.3.2.graphml ├── 7.4 │ └── 7.4.md ├── 7.5 │ ├── 7.5.md │ └── assets │ │ ├── 7.5.1-1.gif │ │ ├── 7.5.1-1.graphml │ │ ├── 7.5.1-2.gif │ │ ├── 7.5.1-2.graphml │ │ ├── 7.5.1-3.gif │ │ ├── 7.5.1-3.graphml │ │ ├── 7.5.2.gif │ │ └── 7.5.2.graphml ├── 7.6 │ └── 7.6.md └── 7.7 │ ├── 7.7.md │ └── assets │ ├── 7.7.1-1.gif │ ├── 7.7.1-1.graphml │ ├── 7.7.1-2.gif │ └── 7.7.1-2.graphml ├── ch08 ├── 8.2 │ └── 8.2.md ├── 8.3 │ └── 8.3.md ├── 8.4 │ ├── 8.4.md │ └── assets │ │ ├── 8.4.1-2.gif │ │ ├── 8.4.1-2.graphml │ │ ├── 8.4.2-2.gif │ │ └── 8.4.2-2.graphml └── 8.5 │ ├── 8.5.md │ └── assets │ ├── 8.5.1.gif │ ├── 8.5.1.graphml │ ├── 8.5.3.gif │ ├── 8.5.3.graphml │ ├── 8.5.4.gif │ └── 8.5.4.graphml ├── ch12 ├── 12.3 │ └── 12.3.md └── 12.7 │ ├── 12.7.md │ └── assets │ ├── 12.7.1.png │ └── 12.7.2.png └── src └── lexer ├── Lexer.java ├── Num.java ├── Rel.java ├── SyntaxException.java ├── Tag.java ├── Token.java └── Word.java /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .project 3 | .settings 4 | .classpath 5 | build/ 6 | *.out 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Muhammad AL-Muzahid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compilers Principles, Techniques, & Tools (purple dragon book) second edition exercise answers 2 | 3 | 4 | [![Join the chat at https://gitter.im/fool2fish/dragon-book-exercise-answers](https://badges.gitter.im/fool2fish/dragon-book-exercise-answers.svg)](https://gitter.im/fool2fish/dragon-book-exercise-answers?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 5 | 6 | ### Something I hope you know before go into the answers 7 | 8 | - First, please **watch** or **star** this repo, I'll be more happy if you **follow** me. 9 | - Bug report, questions and discussion are welcome, you can post an [issue](https://github.com/fool2fish/dragon-book-practice-answer/issues/new) or [pull a request](https://help.github.com/articles/using-pull-requests). 10 | - All graphs are painted by [yed](http://www.yworks.com/en/products_yed_about.html), it is simple, cross-platform and free. 11 | - There are some key-point.md file, key points and difficult points are summarized in them. 12 | - As we know only teacher can buy the answer book, so I don't know the standard answer, but I tried my best to keep the correctness, if you found any bug, please [tell me](https://github.com/fool2fish/dragon-book-practice-answer/issues/new), thanks. 13 | 14 | ### License 15 | 16 | It is still under consideration. Anyone know which is suitable? 17 | -------------------------------------------------------------------------------- /ch01/1.1/1.1.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 1.1 2 | 3 | ### 1.1.1 4 | 5 | What is the difference between a compiler and an interpreter? 6 | 7 | #### Answer 8 | 9 | A compiler is a program that can read a program in one language - the source language - and translate it into an equivalent program in another language – the target language and report any errors in the source program that it detects during the translation process. 10 | 11 | Interpreter directly executes the operations specified in the source program on inputs supplied by the user. 12 | 13 | ### 1.1.2 14 | 15 | What are the advantages of: 16 | (a) a compiler over an interpreter 17 | (b) an interpreter over a compiler? 18 | 19 | #### Answer 20 | 21 | a. The machine-language target program produced by a compiler is usually much faster than an interpreter at mapping inputs to outputs. 22 | 23 | b. An interpreter can usually give better error diagnostics than a compiler, because it executes the source program statement by statement. 24 | 25 | ### 1.1.3 26 | 27 | What advantages are there to a language-processing system in which the compiler 28 | produces assembly language rather than machine language? 29 | 30 | #### Answer 31 | 32 | The compiler may produce an assembly-language program as its output, because 33 | assembly language is easier to produce as output and is easier to debug. 34 | 35 | ### 1.1.4 36 | 37 | A compiler that translates a high-level language into another high-level 38 | language is called a *source-to-source* translator. What advantages are there to 39 | using C as a target language for a compiler? 40 | 41 | #### Answer 42 | 43 | For the C language there are many compilers available that compile to almost 44 | every hardware. 45 | 46 | ### 1.1.5 47 | 48 | Describe some of the tasks that an assembler needs to perform. 49 | 50 | #### Answer 51 | 52 | It translates from the assembly language to machine code. This machine code is 53 | relocatable. 54 | -------------------------------------------------------------------------------- /ch01/1.3/1.3.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 1.3 2 | 3 | ### 1.3.1 4 | 5 | Indicate which of the following terms: 6 | 7 | a. imperative 8 | b. declarative 9 | c. von Neumann 10 | d. object-oriented 11 | e. functional 12 | f. third-generation 13 | g. fourth-generation 14 | h. scripting 15 | 16 | apply to which of the following languages: 17 | 18 | 1. C 19 | 2. C++ 20 | 3. Cobol 21 | 4. Fortran 22 | 5. Java 23 | 6. Lisp 24 | 7. ML 25 | 8. Perl 26 | 9. Python 27 | 10. VB. 28 | 29 | #### Answer 30 | 31 | imperative: C, C++ 32 | 33 | object-oriented: C++, Java 34 | 35 | functional: ML 36 | 37 | scripting: Perl, Python 38 | -------------------------------------------------------------------------------- /ch01/1.6/1.6.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 1.6 2 | 3 | ### 1.6.1 4 | 5 | For the block-structured C code below, indicate the values assigned to w, x, y, and z. 6 | ``` 7 | int w, x, y, z; 8 | int i = 4; int j = 5; 9 | { 10 | int j = 7; 11 | i = 6; 12 | w = i + j; 13 | } 14 | x = i + j; 15 | { 16 | int i = 8; 17 | y = i + j; 18 | } 19 | z = i + j; 20 | ``` 21 | 22 | #### Answer 23 | 24 | w = 13, x = 11, y = 13, z = 11. 25 | 26 | 27 | ### 1.6.2 28 | 29 | Repeat Exercise 1.6.1 for the code below. 30 | ``` 31 | int w, x, y, z; 32 | int i = 3; int j = 4; 33 | { 34 | int i = 5; 35 | w = i + j; 36 | } 37 | x = i + j; 38 | { 39 | int j = 6; 40 | i = 7; 41 | y = i + j; 42 | } 43 | z = i + j; 44 | ``` 45 | 46 | #### Answer 47 | 48 | w = 9, x = 7, y = 13, z = 11. 49 | 50 | 51 | ### 1.6.3 52 | 53 | For the block-structured code of Fig. 1.14, assuming the usual static scoping of 54 | declarations, give the scope for each of the twelve declarations. 55 | 56 | #### Answer 57 | ``` 58 | Block B1: 59 | declarations: -> scope 60 | w B1-B3-B4 61 | x B1-B2-B4 62 | y B1-B5 63 | z B1-B2-B5 64 | Block B2: 65 | declarations: -> scope 66 | x B2-B3 67 | z B2 68 | Block B3: 69 | declarations: -> scope 70 | w B3 71 | x B3 72 | Block B4: 73 | declarations: -> scope 74 | w B4 75 | x B4 76 | Block B5: 77 | declarations: -> scope 78 | y B5 79 | z B5 80 | ``` 81 | 82 | ### 1.6.4 83 | 84 | What is printed by the following C code? 85 | ``` 86 | #define a (x + 1) 87 | int x = 2; 88 | void b() { x = a; printf("%d\n", x); } 89 | void c() { int x = 1; printf("%d\n", a); } 90 | void main () { b(); c(); } 91 | ``` 92 | 93 | #### Answer 94 | 3 95 | 96 | 2 97 | -------------------------------------------------------------------------------- /ch02/2.2/2.2.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 2.2 2 | 3 | ### 2.2.1 4 | 5 | Consider the context-free grammar: 6 | 7 | S -> S S + | S S * | a 8 | 9 | 1. Show how the string `aa+a*` can be generated by this grammar. 10 | 2. Construct a parse tree for this string. 11 | 3. What language does this grammar generate? Justify your answer. 12 | 13 | #### Answer 14 | 15 | 1. `S` -> `S` S * -> `S` S + S * -> a `S` + S * -> a a + `S` * -> a a + a * 16 | 2. ![Syntax tree](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch02/2.2/assets/2.2.1-2.png) 17 | 3. L = {Postfix expression consisting of digits, plus and multiple signs} 18 | 19 | ### 2.2.2 20 | 21 | What language is generated by the following grammars? In each case justify your answer. 22 | 23 | 1. S -> 0 S 1 | 0 1 24 | 2. S -> + S S | - S S | a 25 | 3. S -> S ( S ) S | ε 26 | 4. S -> a S b S | b S a S | ε 27 | 5. S -> a | S + S | S S | S * | ( S ) 28 | 29 | #### Answer 30 | 31 | 1. L = {0n1n | n>=1} 32 | 2. L = {Prefix expression consisting of plus and minus signs} 33 | 3. L = {Matched brackets of arbitrary arrangement and nesting, includes ε} 34 | 4. L = {String has the same amount of a and b, includes ε} 35 | 5. L = {Regular expressions used to describe regular languages} [refer to wiki](http://en.wikipedia.org/wiki/Regular_expression) 36 | 37 | ### 2.2.3 38 | 39 | Which of the grammars in Exercise 2.2.2 are ambiguous? 40 | 41 | #### Answer 42 | 43 | 1. No 44 | 2. No 45 | 3. Yes 46 | 47 | ![ambiguous parse tree](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch02/2.2/assets/2.2.3-3.png) 48 | 49 | 4. Yes 50 | 51 | ![ambiguous parse tree](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch02/2.2/assets/2.2.3-4.png) 52 | 53 | 5. Yes 54 | 55 | ![ambiguous parse tree](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch02/2.2/assets/2.2.3-5.png) 56 | 57 | 58 | ### 2.2.4 59 | 60 | Construct unambiguous context-free grammars for each of 61 | the following languages. In each case show that your grammar is correct. 62 | 63 | 1. Arithmetic expressions in postfix notation. 64 | 2. Left-associative lists of identifiers separated by commas. 65 | 3. Right-associative lists of identifiers separated by commas. 66 | 4. Arithmetic expressions of integers and identifiers with the four binary operators +, -, *, /. 67 | 5. Add unary plus and minus to the arithmetic operators of 4. 68 | 69 | #### Answer 70 | 71 | ``` 72 | 1. E -> E E op | num 73 | 74 | 2. list -> list , id | id 75 | 76 | 3. list -> id , list | id 77 | 78 | 4. expr -> expr + term | expr - term | term 79 | term -> term * factor | term / factor | factor 80 | factor -> id | num | (expr) 81 | 82 | 5. expr -> expr + term | expr - term | term 83 | term -> term * unary | term / unary | unary 84 | unary -> + factor | - factor | factor 85 | factor - > id | num | (expr) 86 | ``` 87 | 88 | ### 2.2.5 89 | 90 | 1. Show that all binary strings generated by the following grammar have values divisible by 3. Hint. Use induction on the number of nodes in a parse tree. 91 | 92 | num -> 11 | 1001 | num 0 | num num 93 | 94 | 2. Does the grammar generate all binary strings with values divisible by 3? 95 | 96 | #### Answer 97 | 98 | 1. Proof 99 | 100 | Any string derived from the grammar can be considered to be a sequence consisting of 11 and 1001, where each sequence element is possibly suffixed with a 0. 101 | 102 | Let `n` be the set of positions where `11` is placed. `11` is said to be at position `i` if the first `1` in `11` is at position `i`, where `i` starts at 0 and 103 | grows from least significant to most significant bit. 104 | 105 | Let `m` be the equivalent set for `1001`. 106 | 107 | The sum of any string produced by the grammar is: 108 | 109 | sum 110 | 111 | = Σn (21 + 20) * 2 n + Σm (23 + 20) * 2m 112 | 113 | = Σn 3 * 2 n + Σm 9 * 2m 114 | 115 | This is clearly divisible by 3. 116 | 117 | 118 | 2. No. Consider the string "10101", which is divisible by 3, but cannot be 119 | derived from the grammar. 120 | 121 | Readers seeking a more formal proof can read about it below: 122 | 123 | **Proof**: 124 | 125 | Every number divisible by 3 can be written in the form `3k`. We will consider `k > 0` (though it would be valid to consider `k` to be an arbitrary integer). 126 | 127 | Note that every part of num(11, 1001 and 0) is divisible by 3, if the grammar could generate all the numbers divisible by 3, we can get a production for binary k from num's production: 128 | 129 | ``` 130 | 3k = num -> 11 | 1001 | num 0 | num num 131 | k = num/3 -> 01 | 0011 | k 0 | k k 132 | k -> 01 | 0011 | k 0 | k k 133 | ``` 134 | 135 | It is obvious that any value of `k` that has more than 2 consecutive bits set to 1 can never be produced. This can be confirmed by the example given in the beginning: 136 | 137 | 10101 is 3*7, hence, k = 7 = 111 in binary. Because 111 has more than 2 138 | consecutive 1's in binary, the grammar will never produce 21. 139 | 140 | 141 | ### 2.2.6 142 | 143 | Construct a context-free grammar for roman numerals. 144 | 145 | **Note:** we just consider a subset of roman numerals which is less than 4k. 146 | 147 | 148 | #### Answer 149 | 150 | [wikipedia: Roman_numerals](http://en.wikipedia.org/wiki/Roman_numerals) 151 | 152 | - via wikipedia, we can categorize the single roman numerals into 4 groups: 153 | 154 | ``` 155 | I, II, III | I V | V, V I, V II, V III | I X 156 | ``` 157 | 158 | then get the production: 159 | 160 | ``` 161 | digit -> smallDigit | I V | V smallDigit | I X 162 | smallDigit -> I | II | III | ε 163 | ``` 164 | 165 | 166 | 167 | - and we can find a simple way to map roman to arabic numerals. For example: 168 | 169 | - XII => X, II => 10 + 2 => 12 170 | - CXCIX => C, XC, IX => 100 + 90 + 9 => 199 171 | - MDCCCLXXX => M, DCCC, LXXX => 1000 + 800 + 80 => 1880 172 | 173 | - via the upper two rules, we can derive the production: 174 | 175 | romanNum -> thousand hundred ten digit 176 | 177 | thousand -> M | MM | MMM | ε 178 | 179 | hundred -> smallHundred | C D | D smallHundred | C M 180 | 181 | smallHundred -> C | CC | CCC | ε 182 | 183 | ten -> smallTen | X L | L smallTen | X C 184 | 185 | smallTen -> X | XX | XXX | ε 186 | 187 | digit -> smallDigit | I V | V smallDigit | I X 188 | 189 | smallDigit -> I | II | III | ε 190 | -------------------------------------------------------------------------------- /ch02/2.2/assets/2.2.1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch02/2.2/assets/2.2.1-2.png -------------------------------------------------------------------------------- /ch02/2.2/assets/2.2.3-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch02/2.2/assets/2.2.3-3.png -------------------------------------------------------------------------------- /ch02/2.2/assets/2.2.3-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch02/2.2/assets/2.2.3-4.png -------------------------------------------------------------------------------- /ch02/2.2/assets/2.2.3-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch02/2.2/assets/2.2.3-5.png -------------------------------------------------------------------------------- /ch02/2.3/2.3.md: -------------------------------------------------------------------------------- 1 | # 2.3 Exercises for Section 2.3 2 | 3 | ### 2.3.1 4 | 5 | Construct a syntax-directed translation scheme that translates arithmetic 6 | expressions from infix notation into prefix notation in which an operator 7 | appears before its operands; e.g. , -xy is the prefix notation for x - y. Give 8 | annotated parse trees for the inputs 9-5+2 and 9-5*2. 9 | 10 | #### Answer 11 | 12 | productions: 13 | 14 | ``` 15 | expr -> expr + term 16 | | expr - term 17 | | term 18 | term -> term * factor 19 | | term / factor 20 | | factor 21 | factor -> digit | (expr) 22 | ``` 23 | 24 | translation schemes: 25 | 26 | ``` 27 | expr -> {print("+")} expr + term 28 | | {print("-")} expr - term 29 | | term 30 | term -> {print("*")} term * factor 31 | | {print("/")} term / factor 32 | | factor 33 | factor -> digit {print(digit)} 34 | | (expr) 35 | ``` 36 | 37 | ### 2.3.2 38 | 39 | Construct a syntax-directed translation scheme that translates arithmetic 40 | expressions from postfix notation into infix notation. Give annotated parse 41 | trees for the inputs 95-2* and 952*-. 42 | 43 | #### Answer 44 | 45 | productions: 46 | 47 | ``` 48 | expr -> expr expr + 49 | | expr expr - 50 | | expr expr * 51 | | expr expr / 52 | | digit 53 | ``` 54 | 55 | translation schemes: 56 | 57 | ``` 58 | expr -> expr {print("+")} expr + 59 | | expr {print("-")} expr - 60 | | {print("(")} expr {print(")*(")} expr {print(")")} * 61 | | {print("(")} expr {print(")/(")} expr {print(")")} / 62 | | digit {print(digit)} 63 | ``` 64 | 65 | #### Another reference answer 66 | 67 | ``` 68 | E -> {print("(")} E {print(op)} E {print(")"}} op | digit {print(digit)} 69 | ``` 70 | 71 | ### 2.3.3 72 | 73 | Construct a syntax-directed translation scheme that translates integers into 74 | roman numerals. 75 | 76 | #### Answer 77 | 78 | assistant function: 79 | 80 | ``` 81 | repeat(sign, times) // repeat('a',2) = 'aa' 82 | ``` 83 | 84 | translation schemes: 85 | 86 | ``` 87 | num -> thousand hundred ten digit 88 | { num.roman = thousand.roman || hundred.roman || ten.roman || digit.roman; 89 | print(num.roman)} 90 | thousand -> low {thousand.roman = repeat('M', low.v)} 91 | hundred -> low {hundred.roman = repeat('C', low.v)} 92 | | 4 {hundred.roman = 'CD'} 93 | | high {hundred.roman = 'D' || repeat('X', high.v - 5)} 94 | | 9 {hundred.roman = 'CM'} 95 | ten -> low {ten.roman = repeat('X', low.v)} 96 | | 4 {ten.roman = 'XL'} 97 | | high {ten.roman = 'L' || repeat('X', high.v - 5)} 98 | | 9 {ten.roman = 'XC'} 99 | digit -> low {digit.roman = repeat('I', low.v)} 100 | | 4 {digit.roman = 'IV'} 101 | | high {digit.roman = 'V' || repeat('I', high.v - 5)} 102 | | 9 {digit.roman = 'IX'} 103 | low -> 0 {low.v = 0} 104 | | 1 {low.v = 1} 105 | | 2 {low.v = 2} 106 | | 3 {low.v = 3} 107 | high -> 5 {high.v = 5} 108 | | 6 {high.v = 6} 109 | | 7 {high.v = 7} 110 | | 8 {high.v = 8} 111 | ``` 112 | 113 | ### 2.3.4 114 | 115 | Construct a syntax-directed translation scheme that trans­ lates roman numerals into integers. 116 | 117 | #### Answer 118 | 119 | productions: 120 | 121 | ``` 122 | romanNum -> thousand hundred ten digit 123 | thousand -> M | MM | MMM | ε 124 | hundred -> smallHundred | C D | D smallHundred | C M 125 | smallHundred -> C | CC | CCC | ε 126 | ten -> smallTen | X L | L smallTen | X C 127 | smallTen -> X | XX | XXX | ε 128 | digit -> smallDigit | I V | V smallDigit | I X 129 | smallDigit -> I | II | III | ε 130 | ``` 131 | 132 | translation schemes: 133 | 134 | ``` 135 | romanNum -> thousand hundred ten digit {romanNum.v = thousand.v || hundred.v || ten.v || digit.v; print(romanNun.v)} 136 | thousand -> M {thousand.v = 1} 137 | | MM {thousand.v = 2} 138 | | MMM {thousand.v = 3} 139 | | ε {thousand.v = 0} 140 | hundred -> smallHundred {hundred.v = smallHundred.v} 141 | | C D {hundred.v = smallHundred.v} 142 | | D smallHundred {hundred.v = 5 + smallHundred.v} 143 | | C M {hundred.v = 9} 144 | smallHundred -> C {smallHundred.v = 1} 145 | | CC {smallHundred.v = 2} 146 | | CCC {smallHundred.v = 3} 147 | | ε {hundred.v = 0} 148 | ten -> smallTen {ten.v = smallTen.v} 149 | | X L {ten.v = 4} 150 | | L smallTen {ten.v = 5 + smallTen.v} 151 | | X C {ten.v = 9} 152 | smallTen -> X {smallTen.v = 1} 153 | | XX {smallTen.v = 2} 154 | | XXX {smallTen.v = 3} 155 | | ε {smallTen.v = 0} 156 | digit -> smallDigit {digit.v = smallDigit.v} 157 | | I V {digit.v = 4} 158 | | V smallDigit {digit.v = 5 + smallDigit.v} 159 | | I X {digit.v = 9} 160 | smallDigit -> I {smallDigit.v = 1} 161 | | II {smallDigit.v = 2} 162 | | III {smallDigit.v = 3} 163 | | ε {smallDigit.v = 0} 164 | ``` 165 | 166 | ### 2.3.5 167 | 168 | Construct a syntax-directed translation scheme that translates postfix 169 | arithmetic expressions into equivalent prefix arithmetic expressions. 170 | 171 | #### Answer 172 | 173 | production: 174 | 175 | ``` 176 | expr -> expr expr op | digit 177 | ``` 178 | 179 | translation scheme: 180 | 181 | ``` 182 | expr -> {print(op)} expr expr op | digit {print(digit)} 183 | ``` 184 | -------------------------------------------------------------------------------- /ch02/2.4/2.4.1.1.c: -------------------------------------------------------------------------------- 1 | /* 2 | Compile with: 3 | gcc -Wall -o parser 2.4.1.1.c 4 | Run: ./parser 5 | 6 | The code will quitely parse well-formed expressions. For bad expressions that don't 7 | follow the grammar, it will complain with "Syntax error." 8 | 9 | Some example input: 10 | > ++++aaaaa 11 | > +++--+-+-+-+-+++-a-aaaaaaaaaaaaaaaaaa 12 | > 13 | 14 | Note that you canot use spaces. The grammar doesn't allow for that. 15 | We could change gettoken() to ignore spaces, but we'd be introducing 16 | a problem. You see, when reading the input, the last lookahead symbol 17 | left is a newline. If we ignored whitespace, we would block on the call 18 | to gettoken() in the last production, because gettoken() would ignore 19 | whitespaces! This is why a lot of languages require statements to be 20 | properly ended, for example, with a semi-colon. 21 | */ 22 | 23 | #include 24 | 25 | /* The lookahead symbol */ 26 | int token; 27 | 28 | int gettoken(void) { 29 | return token = getchar(); 30 | } 31 | 32 | void s(void) { 33 | if (token == 'a') 34 | gettoken(); 35 | else if (token == '+' || token == '-') { 36 | gettoken(); 37 | s(); 38 | s(); 39 | } 40 | else { 41 | fprintf(stderr, "Syntax error.\n"); 42 | } 43 | } 44 | 45 | int main(void) { 46 | printf("Please enter expressions according to the following grammar:\n"); 47 | printf("S -> '+' S S | '-' S S | 'a'\n"); 48 | while (1) { 49 | printf("> "); 50 | fflush(stdout); 51 | gettoken(); 52 | s(); 53 | } 54 | return 0; 55 | } 56 | 57 | -------------------------------------------------------------------------------- /ch02/2.4/2.4.1.2.c: -------------------------------------------------------------------------------- 1 | /* 2 | Compile with: 3 | gcc -Wall -o parser 2.4.1.2.c 4 | Run: ./parser 5 | 6 | The grammar given cannot be used as is, since it is left recursive. 7 | Recursive descent parsers cannot be implemented with a left recursive grammar, 8 | because it would loop forever! 9 | 10 | Instead, we apply the rule mentioned earlier in the book to convert this grammar 11 | to an equivalent grammar without left recursion. The general form of the rule is: 12 | 13 | A -> Aa | B 14 | 15 | is equivalent to: 16 | 17 | A -> BR 18 | R -> aR | epsilon 19 | 20 | In our case, A is S, and B is epsilon; we get: 21 | 22 | S -> R 23 | R -> '(' S ')' S 24 | | epsilon 25 | 26 | This is the grammar that the code below implements. 27 | */ 28 | 29 | #include 30 | 31 | /* The lookahead symbol */ 32 | int token; 33 | 34 | int gettoken(void) { 35 | return token = getchar(); 36 | } 37 | 38 | void r(void); 39 | void s(void) { 40 | r(); 41 | } 42 | 43 | void r(void) { 44 | if (token != '(') 45 | return; /* R -> epsilon */ 46 | gettoken(); 47 | s(); 48 | if (token != ')') { 49 | fprintf(stderr, "Syntax error.\n"); 50 | return; 51 | } 52 | gettoken(); 53 | s(); 54 | } 55 | 56 | int main(void) { 57 | printf("Please enter expressions according to the following grammar:\n"); 58 | printf("S -> S '(' S ')' S | epsilon\n"); 59 | while (1) { 60 | printf("> "); 61 | fflush(stdout); 62 | gettoken(); 63 | s(); 64 | } 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /ch02/2.4/2.4.1.3.c: -------------------------------------------------------------------------------- 1 | /* 2 | Compile with: 3 | gcc -Wall -o parser 2.4.1.3.c 4 | Run: ./parser 5 | 6 | Again, the grammar given cannot be used as provided, because it is not clear which production 7 | to use when a '0' is read. In particular, FIRST('0' S '1') is not disjoint with FIRST('0' '1'), which 8 | makes it ambiguous for a predictive parser to move forward. 9 | One solution would be to recursively try the first production, and if unsuccessfull, try the second alternative. 10 | This would still make it a recursive descent parser, but there is a more intelligent approach. 11 | 12 | The grammar can be "factored out" by bringing together those rules where FIRST is '0'. The following grammar 13 | is equivalent to the original one, with the advantage that it can be implemented using a predictive parser: 14 | 15 | S -> '0' R 16 | R -> S '1' | '1' 17 | 18 | Basically, the rules starting with '0' were merged into a single rule, and the rest of the job delegated to R. 19 | Since S always starts with a '0' now, it is trivial to implement a predictive parser for this tweaked version. 20 | */ 21 | 22 | #include 23 | 24 | /* The lookahead symbol */ 25 | int token; 26 | 27 | int gettoken(void) { 28 | return token = getchar(); 29 | } 30 | 31 | void r(void); 32 | void s(void) { 33 | if (token != '0') 34 | fprintf(stderr, "Syntax error.\n"); 35 | else { 36 | gettoken(); 37 | r(); 38 | } 39 | } 40 | 41 | void r(void) { 42 | switch (token) { 43 | case '0': 44 | s(); 45 | if (token != '1') 46 | fprintf(stderr, "Syntax error.\n"); 47 | gettoken(); 48 | break; 49 | case '1': 50 | gettoken(); 51 | break; 52 | default: 53 | fprintf(stderr, "Syntax error.\n"); 54 | break; 55 | } 56 | } 57 | 58 | int main(void) { 59 | printf("Please enter expressions according to the following grammar:\n"); 60 | printf("S -> '0' S '1' | '0' '1'\n"); 61 | while (1) { 62 | printf("> "); 63 | fflush(stdout); 64 | gettoken(); 65 | s(); 66 | } 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /ch02/2.4/2.4.md: -------------------------------------------------------------------------------- 1 | ## Exercises for Section 2.4 2 | 3 | ### 2.4.1 4 | 5 | Construct recursive-descent parsers, starting with the following grammars: 6 | 7 | 1. S -> + S S | - S S | a 8 | 2. S -> S ( S ) S | ε 9 | 3. S -> 0 S 1 | 0 1 10 | 11 | ### Answer 12 | 13 | See [2.4.1.1.c](./2.4.1.1.c), [2.4.1.2.c](./2.4.1.2.c), and [2.4.1.3.c](./2.4.1.3.c) for real implementations in C. 14 | 15 | 1) S -> + S S | - S S | a 16 | 17 | ``` 18 | void S(){ 19 | switch(lookahead){ 20 | case "+": 21 | match("+"); S(); S(); 22 | break; 23 | case "-": 24 | match("-"); S(); S(); 25 | break; 26 | case "a": 27 | match("a"); 28 | break; 29 | default: 30 | throw new SyntaxException(); 31 | } 32 | } 33 | void match(Terminal t){ 34 | if(lookahead = t){ 35 | lookahead = nextTerminal(); 36 | }else{ 37 | throw new SyntaxException() 38 | } 39 | } 40 | ``` 41 | 42 | 2) S -> S ( S ) S | ε 43 | 44 | ``` 45 | void S(){ 46 | if(lookahead == "("){ 47 | match("("); S(); match(")"); S(); 48 | } 49 | } 50 | ``` 51 | 52 | 3) S -> 0 S 1 | 0 1 53 | 54 | ``` 55 | void S(){ 56 | switch(lookahead){ 57 | case "0": 58 | match("0"); S(); match("1"); 59 | break; 60 | case "1": 61 | // match(epsilon); 62 | break; 63 | default: 64 | throw new SyntaxException(); 65 | } 66 | } 67 | ``` 68 | 69 | 70 | -------------------------------------------------------------------------------- /ch02/2.6/2.6.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 2.6 2 | 3 | ### 2.6.1 4 | 5 | Extend the lexical analyzer in Section 2.6.5 to remove comments, defined as 6 | follows: 7 | 8 | 1. A comment begins with // and includes all characters until the end of that line. 9 | 2. A comment begins with /* and includes all characters through the next occurrence of the character sequence */. 10 | 11 | ### 2.6.2 12 | 13 | Extend the lexical analyzer in Section 2.6.5 to recognize the relational operators <, <=, ==, ! =, >=, >. 14 | 15 | ### 2.6.3 16 | 17 | Extend the lexical analyzer in Section 2.6.5 to recognize floating point numbers 18 | such as 2., 3.14, and . 5. 19 | 20 | ### Answer 21 | 22 | Source code: commit 8dd1a9a 23 | 24 | Code snippet(src/lexer/Lexer.java): 25 | 26 | ``` 27 | public Token scan() throws IOException, SyntaxException{ 28 | for(;;peek = (char)stream.read()){ 29 | if(peek == ' ' || peek == '\t'){ 30 | continue; 31 | }else if(peek == '\n'){ 32 | line = line + 1; 33 | }else{ 34 | break; 35 | } 36 | } 37 | 38 | // handle comment 39 | if(peek == '/'){ 40 | peek = (char) stream.read(); 41 | if(peek == '/'){ 42 | // single line comment 43 | for(;;peek = (char)stream.read()){ 44 | if(peek == '\n'){ 45 | break; 46 | } 47 | } 48 | }else if(peek == '*'){ 49 | // block comment 50 | char prevPeek = ' '; 51 | for(;;prevPeek = peek, peek = (char)stream.read()){ 52 | if(prevPeek == '*' && peek == '/'){ 53 | break; 54 | } 55 | } 56 | }else{ 57 | throw new SyntaxException(); 58 | } 59 | } 60 | 61 | // handle relation sign 62 | if("<=!>".indexOf(peek) > -1){ 63 | StringBuffer b = new StringBuffer(); 64 | b.append(peek); 65 | peek = (char)stream.read(); 66 | if(peek == '='){ 67 | b.append(peek); 68 | } 69 | return new Rel(b.toString()); 70 | } 71 | 72 | // handle number, no type sensitive 73 | if(Character.isDigit(peek) || peek == '.'){ 74 | Boolean isDotExist = false; 75 | StringBuffer b = new StringBuffer(); 76 | do{ 77 | if(peek == '.'){ 78 | isDotExist = true; 79 | } 80 | b.append(peek); 81 | peek = (char)stream.read(); 82 | }while(isDotExist == true ? Character.isDigit(peek) : Character.isDigit(peek) || peek == '.'); 83 | return new Num(new Float(b.toString())); 84 | } 85 | 86 | // handle word 87 | if(Character.isLetter(peek)){ 88 | StringBuffer b = new StringBuffer(); 89 | do{ 90 | b.append(peek); 91 | peek = (char)stream.read(); 92 | }while(Character.isLetterOrDigit(peek)); 93 | String s = b.toString(); 94 | Word w = words.get(s); 95 | if(w == null){ 96 | w = new Word(Tag.ID, s); 97 | words.put(s, w); 98 | } 99 | return w; 100 | } 101 | 102 | Token t = new Token(peek); 103 | peek = ' '; 104 | return t; 105 | } 106 | ``` 107 | -------------------------------------------------------------------------------- /ch02/2.8/2.8.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 2.8 2 | 3 | ### 2.8.1 4 | 5 | For-statements in C and Java have the form: 6 | 7 | for ( exprl ; expr2 ; expr3 ) stmt 8 | 9 | The first expression is executed before the loop; it is typically used for 10 | initializing the loop index. The second expression is a test made before each 11 | iteration of the loop; the loop is exited if the expression becomes 0. The loop 12 | itself can be thought of as the statement {stmt expr3 ; }. The third expression 13 | is executed at the end of each iteration; it is typically used to increment the 14 | loop index. The meaning of the for-statement is similar to 15 | 16 | expr1 ; while ( expr2 ) {stmt expr3 ; } 17 | 18 | Define a class For for for-statements, similar to class If in Fig. 2.43. 19 | 20 | #### Answer 21 | 22 | ``` 23 | class For extends Stmt { 24 | Expr E1; 25 | Expr E2; 26 | Expr E3; 27 | Stmt S; 28 | public For(Expr expr1, Expr expr2, Expr expr3, Stmt stmt){ 29 | E1 = expr1; 30 | E2 = expr2; 31 | E3 = expr3; 32 | S = stmt; 33 | } 34 | public void gen(){ 35 | E1.gen(); 36 | Label start = new Label(); 37 | Label end = new Label(); 38 | emit("ifFalse " + E2.rvalue().toString() + " goto " + end); 39 | S.gen(); 40 | E3.gen(); 41 | emit("goto " + start); 42 | emit(end + ":") 43 | } 44 | } 45 | ``` 46 | 47 | ### 2.8.2 48 | 49 | The programming language C does not have a boolean type. Show how a C compiler might translate an if-statement into three-address code. 50 | 51 | #### Answer 52 | 53 | Replace 54 | 55 | ``` 56 | emit("ifFalse " + E.rvalue().toString() + " goto " + after); 57 | ``` 58 | 59 | with 60 | 61 | ``` 62 | emit("ifEqual " + E.rvalue().toString() + " 0 goto " + after); 63 | ``` 64 | 65 | or 66 | 67 | ``` 68 | emit("ifEqualZero " + E.rvalue().toString() + " goto " + after); 69 | ``` 70 | 71 | -------------------------------------------------------------------------------- /ch02/key-point/assets/dragonbook-keypoint-2.2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch02/key-point/assets/dragonbook-keypoint-2.2-2.png -------------------------------------------------------------------------------- /ch02/key-point/key-point.md: -------------------------------------------------------------------------------- 1 | # 第2章要点 2 | 3 | ### 1. 文法、语法制导翻译方案、语法制导的翻译器 4 | 5 | 以一个仅支持个位数加减法的表达式为例 6 | 7 | 1. 文法 8 | 9 | list -> list + digit | list - digit | digit 10 | 11 | digit -> 0 | 1 | … | 9 12 | 13 | 2. (消除了左递归的)语法制导翻译方案 14 | 15 | expr -> term rest 16 | 17 | rest -> + term { print('+') } rest | - term { print('+') } rest | ε 18 | 19 | term -> 0 { print('0') } | 1 { print('1') } | … | 9 { print('9') } 20 | 21 | 4. 语法制导的翻译器 22 | 23 | java代码见 p46 24 | 25 | ### 2. 语法树、语法分析树 26 | 27 | 以 2 + 5 - 9 为例 28 | 29 | ![语法树和语法分析树](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch02/key-point/assets/dragonbook-keypoint-2.2-2.png) 30 | 31 | ### 3. 正则文法、上下文无关文法、上下文相关文法? 32 | 33 | 文法缩写: 34 | 35 | - RG:[正则文法](http://en.wikipedia.org/wiki/Regular_grammar) 36 | - CFG:[上下文无关文法](http://en.wikipedia.org/wiki/Context-free_grammar) 37 | - CSG:[上下文相关文法](http://en.wikipedia.org/wiki/Context-sensitive_grammar) 38 | 39 | #### 正则文法 40 | 41 | [wiki](http://en.wikipedia.org/wiki/Regular_grammar) 42 | 43 | 正则文法在标准之后所有产生式都应该满足下面三种情形中的一种: 44 | 45 | B -> a 46 | B -> a C 47 | B -> epsilon 48 | 49 | 关键点在于: 50 | 51 | 1. 产生式的左手边必须是一个非终结符。 52 | 2. 产生式的右手边可以什么都没有,可以有一个终结符,也可以有一个终结符加一个非终结符。 53 | 54 | 从产生式的角度看,这样的规定使得每应用一条产生规则,就可以产生出零或一个终结符,直到最后产生出我们要的那个字符串。 55 | 56 | 从匹配的角度看,这样的规定使得每应用一条规则,就可以消耗掉一个非终结符,直到整个字符串被匹配掉。 57 | 58 | 这样定义的语言所对应的自动机有一种性质:有限状态自动机。 59 | 60 | 简单来说就是只需要记录当前的一个状态,和得到下一个输入符号,就可以决定接下来的状态迁移。 61 | 62 | #### 正则文法和上下文无关文法 63 | 64 | CFG 跟 RG 最大的区别就是,产生式的右手边可以有零或多个终结符或非终结符,顺序和个数都没限制。 65 | 66 | 想像一个经典例子,括号的配对匹配: 67 | 68 | expr -> '(' expr ')' | epsilon 69 | 70 | 这个产生式里(先只看第一个子产生式),右手边有一个非终结符 expr,但它的左右两侧都有终结符,这种产生式无法被标准化为严格的 RG 。这就是CFG的一个例子。 71 | 72 | 它对应的自动机就不只要记录当前的一个状态,还得外加记录到达当前位置的历史,才可以根据下一个输入符号决定状态迁移。所谓的“历史”在这里就是存着已匹配规则的栈。 73 | 74 | CFG 对应的自动机为 PDA(下推自动机)。 75 | 76 | RG 的规定严格,对应的好处是它对应的自动机非常简单,所以可以用非常高效且简单的方式来实现。 77 | 78 | #### 上下文相关文法 79 | 80 | CSG 在 CFG的基础上进一步放宽限制。 81 | 82 | 产生式的左手边也可以有终结符和非终结符。左手边的终结符就是“上下文”的来源。也就是说匹配的时候不能光看当前匹配到哪里了,还得看当前位置的左右到底有啥(也就是上下文是啥),上下文在这条规则应用的时候并不会被消耗掉,只是“看看”。 83 | 84 | CSG 的再上一层是 PSG,phrase structure grammar。 85 | 86 | 基本上就是CSG的限制全部取消掉。 87 | 88 | 左右两边都可以有任意多个、任意顺序的终结符和非终结符。 89 | 90 | 反正不做自然语言处理的话也不会遇到这种文法,所以具体就不说了。 91 | 92 | ### 4. 为什么有 n 个运算符的优先级,就对应 n+1 个产生式? 93 | 94 | 优先级的处理可以在纯文法层面解决,也可以在parser实现中用别的办法处理掉。 95 | 96 | 纯文法层面书上介绍的,有多少个优先级就有那么多加1个产生式。 97 | 98 | 书上介绍的四则运算的文法,会使得加减法离根比较近,乘除法离根比较远。 99 | 100 | 语法树的形状决定了节点的计算顺序,离根远的节点就会先处理,这样看起来就是乘除法先计算,也就是乘除法的优先级更高。 101 | 102 | 参考:http://rednaxelafx.iteye.com/blog/492667 103 | 104 | ### 5. 避免二义性文法的有效原则? 105 | 106 | 二义性问题主要是跟 CFG 的特性有关系的。 107 | 108 | CFG 的选择结构("|")是没有规定顺序或者说优先级的, 109 | 同时,多个规则可能会有共同前缀, 110 | 这样才会有二义性问题。 111 | 112 | PEG 是跟CFG类似的一种东西,语言的表达力上跟CFG相似。 113 | 但文法层面没有二义性,因为它的选择结构("|")是有顺序或者说有优先级的。 114 | 115 | ### 6. 避免预测分析器因左递归文法造成的无限循环 116 | 117 | 产生式: 118 | 119 | A -> A x | y 120 | 121 | 语法制导翻译伪代码片段: 122 | 123 | void A(){ 124 | switch(lookahead){ 125 | case x: 126 | A();match(x);break; 127 | case y: 128 | match(y):break; 129 | default: 130 | report("syntax error") 131 | } 132 | } 133 | 134 | 当语句符合 A x 形式时, A() 运算会陷入死循环,可以通过将产生式改为等价的非左递归形式来避免: 135 | 136 | B -> y C 137 | 138 | C -> x C | ε 139 | 140 | ### 7. 为什么在右递归的文法中,包含了左结合运算符的表达式翻译会比较困难? 141 | 142 | ### 8. 中间代码生成时的左值和右值问题。 143 | 144 | 看了书上 lvalue() 和 rvalue() 的伪代码,感觉可以做左值也可以做右值的都由 lvalue() 处理,而对于右值的处理,要么自己处理掉了,对于可以作为左值的右值则调用 lvalue()。 145 | 146 | 为什么不直接弄个 value() 就结了? 147 | -------------------------------------------------------------------------------- /ch03/3.1/3.1.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 3.1 2 | 3 | ### 3.1.1 4 | 5 | Divide the following C++ program: 6 | 7 | ``` 8 | float limitedSquare(x){float x; 9 | /* returns x-squared, nut never more than 100 */ 10 | return (x <= -10.0 || x >= 10.0) ? 100 : x*x; 11 | } 12 | ``` 13 | 14 | into appropriate lexemes, using the discussion of Section 3.1.2 as a guide. 15 | Which lexemes should get associated lexical values? What should those values be? 16 | 17 | #### Answer 18 | 19 | ``` 20 | <(> <)> <{> 21 | 22 | <(> ="> <)> 23 | <}> 24 | ``` 25 | 26 | ### 3.1.2 27 | 28 | Tagged languages like HTML or XML are different from conventional programming 29 | languages in that the punctuation (tags) are either very numerous (as in HTML) 30 | or a user-definable set (as in XML). Further, tags can often have parameters. 31 | Suggest how to divide the following HTML document: 32 | 33 | ``` 34 | Here is a photo of my house; 35 |


36 | see More Picture if you 37 | liked that one.

38 | ``` 39 | 40 | into appropriate lexemes. Which lexemes should get associated lexical values, and what should those values be? 41 | 42 | #### Answer 43 | 44 | ``` 45 | 46 | 47 | 48 | 49 | ``` 50 | -------------------------------------------------------------------------------- /ch03/3.3/3.3.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 3.3 2 | 3 | ### 3.3.1 4 | 5 | Consult the language reference manuals to determine 6 | 7 | 1. the sets of characters that form the input alphabet (excluding those that may only appear in character strings or comments) 8 | 2. the lexical form of numerical constants, and 9 | 3. the lexical form of identifiers, 10 | 11 | for each of the following languages: 12 | 13 | 1. C 14 | 2. C++ 15 | 3. C# 16 | 4. Fortran 17 | 5. Java 18 | 6. Lisp 19 | 7. SQL 20 | 21 | ### 3.3.2 22 | 23 | Describe the languages denoted by the following regular expressions: 24 | 25 | 1. a(a|b)\*a 26 | 2. ((ε|a)b\*)\* 27 | 3. (a|b)*a(a|b)(a|b) 28 | 4. a\*ba\*ba\*ba\* 29 | 5. !! (aa|bb)\*((ab|ba)(aa|bb)\*(ab|ba)(aa|bb)\*)\* 30 | 31 | #### Answer 32 | 33 | 1. String of a's and b's that start and end with a. 34 | 2. String of a's and b's. 35 | 3. String of a's and b's that the character third from the last is a. 36 | 4. String of a's and b's that only contains three b. 37 | 5. String of a's and b's that has a even number of a and b. 38 | 39 | ### 3.3.3 40 | 41 | In a string of length n, how many of the following are there? 42 | 43 | 1. Prefixes. 44 | 2. Suffixes. 45 | 3. Proper prefixes. 46 | 4. ! Substrings. 47 | 5. ! Subsequences. 48 | 49 | #### Answer 50 | 51 | 1. n + 1 52 | 2. n + 1 53 | 3. n - 1 54 | 4. C(n+1,2) + 1 (need to count epsilon in) 55 | 5. Σ(i=0,n) C(n, i) 56 | 57 | ### 3.3.4 58 | 59 | Most languages are case sensitive, so keywords can be written only one way, and the regular expressions describing their lexeme is very simple. However, some languages, like SQL, are case insensitive, so a keyword can be written either in lowercase or in uppercase, or in any mixture of cases. Thus, the SQL keyword SELECT can also be written select, Select, or sElEcT, for instance. Show how to write a regular expression for a keyword in a case­ insensitive language. Illustrate the idea by writing the expression for "select" in SQL. 60 | 61 | #### Answer 62 | 63 | ``` 64 | select -> [Ss][Ee][Ll][Ee][Cc][Tt] 65 | ``` 66 | 67 | ### 3.3.5 68 | 69 | !Write regular definitions for the following languages: 70 | 71 | 1. All strings of lowercase letters that contain the five vowels in order. 72 | 2. All strings of lowercase letters in which the letters are in ascending lexicographic order. 73 | 3. Comments, consisting of a string surrounded by /* and */, without an intervening */, unless it is inside double-quotes (") 74 | 4. !! All strings of digits with no repeated digits. Hint: Try this problem first with a few digits, such as {O, 1, 2}. 75 | 5. !! All strings of digits with at most one repeated digit. 76 | 6. !! All strings of a's and b's with an even number of a's and an odd number 77 | of b's. 78 | 7. The set of Chess moves,in the informal notation,such as p-k4 or kbp*qn. 79 | 8. !! All strings of a's and b's that do not contain the substring abb. 80 | 9. All strings of a's and b's that do not contain the subsequence abb. 81 | 82 | #### Answer 83 | 84 | 1、 85 | 86 | ``` 87 | want -> other* a (other|a)* e (other|e)* i (other|i)* o (other|o)* u (other|u)* 88 | other -> [bcdfghjklmnpqrstvwxyz] 89 | ``` 90 | 91 | 2、 92 | 93 | ``` 94 | a* b* ... z* 95 | ``` 96 | 97 | 3、 98 | 99 | ``` 100 | \/\*([^*"]*|".*"|\*+[^/])*\*\/ 101 | ``` 102 | 103 | 4、 104 | 105 | ``` 106 | want -> 0|A?0?1(A0?1|01)*A?0?|A0? 107 | A -> 0?2(02)* 108 | ``` 109 | 110 | Steps: 111 | 112 | step1. Transition diagram 113 | 114 | ![Transition diagram](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-4.gif) 115 | 116 | step2. GNFA 117 | 118 | ![GNFA](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-4-1.gif) 119 | 120 | step3. Remove node 0 and simplify 121 | 122 | ![Remove node 0 and simplify](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-4-2.gif) 123 | 124 | step4. Remove node 2 and simplify 125 | 126 | ![Remove node 2 and simplify](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-4-3.gif) 127 | 128 | step5. Remove node 1 and simplify 129 | 130 | ![Remove node 1 and simplify](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-4-4.gif) 131 | 132 | 5、 133 | 134 | ``` 135 | want -> (FE*G|(aa)*b)(E|FE*G) 136 | E -> b(aa)*b 137 | F -> a(aa)*b 138 | G -> b(aa)*ab|a 139 | F -> ba(aa)*b 140 | ``` 141 | 142 | Steps: 143 | 144 | step1. Transition diagram 145 | 146 | ![转换图](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-5.gif) 147 | 148 | step2. GNFA 149 | 150 | ![GNFA](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-5-1.gif) 151 | 152 | step3. Remove node A and simplify 153 | 154 | ![Remove node A and simplify](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-5-2.gif) 155 | 156 | step4. Remove node D and simplify 157 | 158 | ![Remove node D and simplify](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-5-3.gif) 159 | 160 | step5. Remove node C and simplify 161 | 162 | ![Remove node C and simplify](https://raw.github.com/fool2fish/dragon-book-practice-answer/master/ch03/3.3/assets/3.3.5-5-4.gif) 163 | 164 | 165 | 8、 166 | 167 | ``` 168 | b*(a+b?)* 169 | ``` 170 | 171 | 9、 172 | 173 | ``` 174 | b* | b*a+ | b*a+ba* 175 | ``` 176 | 177 | ### 3.3.6 178 | 179 | Write character classes for the following sets of characters: 180 | 181 | 1. The first ten letters (up to "j") in either upper or lower case. 182 | 2. The lowercase consonants. 183 | 3. The "digits" in a hexadecimal number (choose either upper or lower case for the "digits" above 9). 184 | 4. The characters that can appear at the end of alegitimate English sentence (e.g. , exclamation point) . 185 | 186 | #### Answer 187 | 188 | 1. [A-Ja-j] 189 | 2. [bcdfghjklmnpqrstvwxzy] 190 | 3. [0-9a-f] 191 | 4. [.?!] 192 | 193 | ### 3.3.7 194 | 195 | Note that these regular expressions give all of the following symbols (operator characters) a special meaning: 196 | 197 | ``` 198 | \ " . ^ $ [ ] * + ? { } | / 199 | ``` 200 | 201 | Their special meaning must be turned off if they are needed to represent 202 | themselves in a character string. We can do so by quoting the character within a 203 | string of length one or more; e.g., the regular expression "\*\*" matches the 204 | string \*\* . We can also get the literal meaning of an operator character by 205 | preceding it by a backslash. Thus, the regular expression \\\*\\\* also matches 206 | the string \*\*. Write a regular expression that matches the string "\\. 207 | 208 | #### Answer 209 | 210 | ``` 211 | \"\\ 212 | ``` 213 | 214 | ### 3.3.9 ! 215 | 216 | The regular expression r{m, n} matches from m to n occurrences of the pattern r. 217 | For example, a [ 1 , 5] matches a string of one to five a's. Show that for every 218 | regular expression containing repetition operators of this form, there is an 219 | equivalent regular expression without repetition operators. 220 | 221 | #### Answer 222 | 223 | r{m,n} is equals to r.(m).r | r.(m + 1).r | ... | r.(n).r 224 | 225 | ### 3.3.10 ! 226 | 227 | The operator ^ matches the left end of a line, and $ matches the right end of a 228 | line. The operator ^ is also used to introduce complemented character classes, 229 | but the context always makes it clear which meaning is intended. For example, 230 | ^[^aeiou]*$ matches any complete line that does not contain a lowercase vowel. 231 | 232 | 1. How do you tell which meaning of ^ is intended? 233 | 2. Can you always replace a regular expression using the ^ and $ operators by an equivalent expression that does not use either of these operators? 234 | 235 | #### Answer 236 | 237 | 1. if ^ is in a pair of brakets, and it is the first letter, it means complemented classes, or it means the left end of a line. 238 | 239 | 240 | -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-4-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-4-1.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-4-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-4-2.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-4-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-4-3.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-4-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-4-4.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-4.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-5-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-5-1.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-5-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-5-2.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-5-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-5-3.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-5-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-5-4.gif -------------------------------------------------------------------------------- /ch03/3.3/assets/3.3.5-5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.3/assets/3.3.5-5.gif -------------------------------------------------------------------------------- /ch03/3.4/3.4.md: -------------------------------------------------------------------------------- 1 | # 3.4 节的练习 2 | 3 | ### 3.4.1 4 | 5 | 给出识别练习 3.3.2 中各个正则表达式所描述的语言状态转换图。 6 | 7 | #### 解答 8 | 9 | 解答步骤:NFA -> DFA -> 最少状态的 DFA(状态转换图) 10 | 11 | 1. a(a|b)*a 12 | 13 | NFA: 14 | 15 | ![3 4 1-1-nfa](https://f.cloud.github.com/assets/340282/412343/c8b405b2-abae-11e2-8536-c7a075ad3acd.gif) 16 | 17 | DFA: 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
NFADFAab
{0}AB
{1,2,3,5,8}BCD
{2,3,4,5,7,8,9}CCD
{2,3,5,6,7,8}DCD
55 | 56 | ![3 4 1-1-dfa](https://f.cloud.github.com/assets/340282/412345/d33f4a1e-abae-11e2-8d56-e6230fb5f651.gif) 57 | 58 | 59 | 最少状态的 DFA(状态转换图): 60 | 61 | 合并不可区分的状态 B 和 D 62 | 63 | ![3 4 1-1](https://f.cloud.github.com/assets/340282/155878/fd81a78c-7674-11e2-9cdc-8097e665161f.gif) 64 | 65 | 2. ((ε|a)b\*)\* 66 | 67 | ![3 4 1-2](https://f.cloud.github.com/assets/340282/2431092/f5420e04-ad19-11e3-9b6d-40549618e28c.gif) 68 | 69 | 3. (a|b)*a(a|b)(a|b) 70 | 71 | NFA: 72 | 73 | ![3 4 1-3-nfa](https://f.cloud.github.com/assets/340282/412439/3ad802f0-abb5-11e2-90d8-b8e9bf070744.gif) 74 | 75 | 76 | DFA: 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 |
NFADFAab
{0,1,2,4,7}ABC
{1,2,3,4,6,7,8,9,11}BDE
{1,2,4,5,6,7}CBC
{1,2,3,4,6,7,8,9,10,11,13,14,16}DFG
{1,2,4,5,6,7,12,13,14,16}EHI
{1,2,3,4,6,7,8,9,10,11,13,14,15,16,18}FFG
{1,2,4,5,6,7,12,13,14,16,17,18}GHI
{1,2,3,4,6,7,8,9,11,15,18}HDE
{1,2,4,5,6,7,17,18}IBC
144 | 145 | 最少状态的 DFA(状态转换图): 146 | 147 | 合并不可区分的状态 A 和 C 148 | 149 | ![3 4 1-3](https://f.cloud.github.com/assets/340282/412536/700de2e0-abbb-11e2-9f34-1a2605c8eff4.gif) 150 | 151 | 152 | 4. a\*ba\*ba\*ba\* 153 | 154 | ![3 4 1-4](https://f.cloud.github.com/assets/340282/155898/46631d86-7676-11e2-85a6-0d7c79993502.gif) 155 | 156 | ### 3.4.2 157 | 158 | 给出识别练习 3.3.5 中各个正则表达式所描述语言的状态转换图。 159 | 160 | ### 3.4.3 161 | 162 | 构造下列串的失效函数。 163 | 164 | 1. abababaab 165 | 2. aaaaaa 166 | 3. abbaabb 167 | 168 | #### 解答 169 | 170 | 代码详见:[src/failure-function.js](src/failure-function.js) 171 | 172 | 1. [ 0, 0, 1, 2, 3, 4, 5, 1, 2 ] 173 | 2. [ 0, 1, 2, 3, 4, 5 ] 174 | 3. [ 0, 0, 0, 1, 1, 2, 3 ] 175 | 176 | ### 3.4.4 ! 177 | 178 | 对 s 进行归纳,证明图 3-19 的算法正确地计算出了失效函数。 179 | 180 | **图 3-19:计算关键字 b_1b_2…b_n 的失效函数** 181 | 182 | 01 t = 0; 183 | 02 f(1) = 0; 184 | 03 for (s = 1; s < n; s ++){ 185 | 04 while( t > 0 && b_s+1 != b_t+1) t = f(t); 186 | 05 if(b_s+1 == b_t+1){ 187 | 06 t = t + 1; 188 | 07 f(s + 1) = t; 189 | 08 }else{ 190 | 09 f(s + 1) = 0; 191 | 10 } 192 | 11 } 193 | 194 | #### 证明 195 | 196 | 1. 已知 f(1) = 0 197 | 2. 在第 1 次 for 循环时,计算 f(2) 的值,当第5行代码 b_2 == b_1 成立时,代码进入到第7行得出 f(2) = 1,不成立时,则代码进入第9行得出 f(2) = 0。显然,这次循环正确的计算出了 f(2) 。 198 | 3. 假设在第 i-1 次进入循环时,也正确的计算出了 f(i),也有 f(i) = t (无论 t 是大于 0 还是等于 0) 199 | 4. 那么在第 1 次进入循环时,分两种情况进行考虑: 200 | 1. t == 0 201 | 202 | 这种情况比较简单,直接从第 5 行开始,当 b_i+1 == b_1 时,f(i+1) = 1,否则 f(i+1) = 0 203 | 2. t > 0 204 | while 循环会不断缩小 t 值,试图找出最大可能的使得 b_i+1 == b_t+1 成立的 t 值,如果找到了,则进入第 5 行执行,得到 f(i+1) = t+1;或者直到 t == 0 时也没有找到,则跳出循环,这时进入第 5 行执行,过程类似于前一种情况。 205 | 206 | ### 3.4.5 !! 207 | 208 | 说明图 3-19 中的第 4 行的复制语句 t = f(t) 最多被执行 n 次。进而说明整个算法的时间复杂度是 O(n),其中 n 是关键字长度。 209 | 210 | #### 解答 211 | 212 | 详见 matrix 的博文 [KMP算法详解](http://www.matrix67.com/blog/archives/115)。 213 | 214 | ### 3.4.6 215 | 216 | 应用 KMP 算法判断关键字 ababaa 是否为下面字符串的子串: 217 | 218 | 1. abababaab 219 | 2. abababbaa 220 | 221 | #### 解答 222 | 223 | 代码详见:[src/failure-function.js](src/kmp.js) 224 | 225 | 1. true 226 | 2. false 227 | 228 | 229 | ### 3.4.7 !! 230 | 231 | 说明图 3-20 中的算法可以正确的表示输入关键字是否为一个给定字符串的子串。 232 | 233 | **图 3-20:KMP 算法在 O(m + n) 的时间内检测字符串a_1a_3...a_n 中是否包含单个关键字 b1b2...bn** 234 | 235 | s = 0; 236 | for(i = 1; i <= m; i ++){ 237 | while(s > 0 && a_i != b_s+1) s = f(s); 238 | if(a_i == b_s+1) s = s + 1; 239 | if(s == n) return "yes"; 240 | } 241 | return "no"; 242 | 243 | ### 3.4.8 244 | 245 | 假设已经计算得到函数 f 且他的值存储在一个以 s 为下标的数字中,说明图 3-20 中算法的时间复杂度为 O(m + n)。 246 | 247 | #### 解答 248 | 249 | 详见 matrix 的博文 [KMP算法详解](http://www.matrix67.com/blog/archives/115)。 250 | 251 | ### 3.4.9 252 | 253 | Fibonacci 字符串的定义如下: 254 | 255 | 1. s1 = b 256 | 2. s2 = a 257 | 3. 当 k > 2 时, sk = sk-1 sk-2 258 | 259 | 例如:s3 = ab, s4 = aba, s5 = abaab 260 | 261 | 1. sn 的长度是多少? 262 | 2. 构造 s6 的失效函数。 263 | 3. 构造 s7 的失效函数。 264 | 4. !! 说明任何 sn 的失效函数都可以被表示为:f(1) = f(2) = 0,且对于 2 < j <= |sn|, f(j) = j - |sk-1|,其中 k 是使得 |sk| <= j + 1 的最大整数。 265 | 5. !! 在 KMP 算法中,当我们试图确定关键字 sk 是否出现在字符串 sk+1 中,最多会连续多少次调用失效函数? 266 | 267 | #### 解答 268 | 269 | 1. 见 [维基百科](http://zh.wikipedia.org/wiki/%E6%96%90%E6%B3%A2%E9%82%A3%E5%A5%91%E6%95%B0%E5%88%97) 270 | 2. s6 = abaababa 271 | 272 | failure = [ 0, 0, 1, 1, 2, 3, 2, 3 ] 273 | 274 | 3. s7 = abaababaabaab 275 | 276 | failure = [ 0, 0, 1, 1, 2, 3, 2, 3, 4, 5, 6, 4, 5 ] 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-1-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-1-dfa.gif -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-1-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-1-nfa.gif -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-1.gif -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-2.gif -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-3-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-3-nfa.gif -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-3.gif -------------------------------------------------------------------------------- /ch03/3.4/assets/3.4.1-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.4/assets/3.4.1-4.gif -------------------------------------------------------------------------------- /ch03/3.4/src/failure-function.js: -------------------------------------------------------------------------------- 1 | module.exports = failureFunction 2 | 3 | function failureFunction(str) { 4 | var failure = [0] 5 | var j = 0 6 | for (var i = 1; i < str.length; i++) { 7 | while(j > 0 && str[j] != str[i]) { 8 | j = failure[j-1] 9 | } 10 | if(str[j] == str[i]){ 11 | j++ 12 | } 13 | failure[i] = j 14 | } 15 | return failure 16 | } 17 | -------------------------------------------------------------------------------- /ch03/3.4/src/kmp.js: -------------------------------------------------------------------------------- 1 | var failureFunction = require('./failure-function') 2 | 3 | module.exports = kmp 4 | 5 | function kmp(str, search) { 6 | var failure = failureFunction(search) 7 | var s = 0 8 | for (var i = 0; i < str.length; i++) { 9 | while (s > 0 && str[i] != search[s]) { 10 | s = failure[s-1] 11 | } 12 | if(str[i] == search[s]){ 13 | s = s + 1 14 | } 15 | if(s == search.length){ 16 | return true 17 | } 18 | } 19 | return false 20 | } -------------------------------------------------------------------------------- /ch03/3.5/3.5.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 3.5 2 | 3 | ### 3.5.1 4 | 5 | Describe how to make the following modifications to the Lex 6 | program of Fig. 3.23: 7 | 8 | 1. Add the keyword while. 9 | 2. Change the comparison operators to be the C operators of that kind. 10 | 3. Allow the underscore ( _ ) as an additional letter. 11 | 4. ! Add a new pattern with token STRING. The pattern consists of a double­ 12 | quote ( " ) , any string of characters and a final double-quote. However, 13 | if a double-quote appears in the string, it must be escaped by preceding 14 | it with a backslash (\) , and therefore a backslash in the string must be 15 | represented by two backslashes. The lexical value, which is the string 16 | without the surrounding double-quotes, and with backslashes used to es.,. 17 | cape a character removed. Strings are to be installed in a table of strings. 18 | 19 | [source](./src/lex.l) 20 | 21 | ### 3.5.2 22 | 23 | Write a Lex program that copies a file, replacing each non­ 24 | empty sequence of white space by a single blank 25 | 26 | ### 3.5.3 27 | 28 | Write a Lex program that copies a C program, replacing each 29 | instance of the keyword f loat by double.。 30 | 31 | ### 3.5.4 ! 32 | 33 | Write a Lex program that converts a file to "Pig latin." 34 | Specifically, assume the file is a sequence of words (groups . of letters) separated 35 | by whitespace. Every time you encounter a word: 36 | 37 | 1. If the first letter is a consonant, move it to the end of the word and then 38 | add ay! 39 | 2. If the first letter is a vowel, just add ay to the end of the word. 40 | 41 | All nonletters are copied int act to the output. 42 | 43 | [source](./src/lex2.l) 44 | 45 | ### 3.5.5 ! 46 | 47 | In SQL, keywords and identifiers are case-insensitive. Write 48 | a Lex program that recognizes the keywords SELECT, FROM, and WHERE (in any 49 | combination of capital and lower-case letters) , and token ID, which for the 50 | purposes of this exercise you may take to be any sequence of letters and digits, 51 | beginning with a letter. You need not install identifiers in a symbol table, but 52 | tell how the "install" function would differ from that described for case-sensitive 53 | identifiers as in Fig. 3.23. 54 | 55 | [source](./src/lex3.l) 56 | -------------------------------------------------------------------------------- /ch03/3.5/src/lex.l: -------------------------------------------------------------------------------- 1 | %{ 2 | /* definitions of manifest constants */ 3 | LT, LE, EQ, NE, GT, GE, 4 | IF, THEN, ELSE, ID, NUMBER, RELOP, WHILE 5 | %} 6 | /* regular definitions */ 7 | delim [ \t\n] 8 | ws {delim}+ 9 | 10 | /* Exercise 3.5.1 - c) We can allow the underscore here, but I think it's better in the ID definition. */ 11 | letter [A-Za-z] 12 | digit [0-9] 13 | 14 | /* Exercise 3.5.1 - c) */ 15 | id {letter}({letter}|{digit}|_)* 16 | 17 | number {digit}+(\.{digit}+)?(E[+-]?{digit }+)? 18 | doubleq \" 19 | 20 | %% 21 | {ws} {/* no action and no return */} 22 | if {return (IF); } 23 | while { return (WHILE); /* Exercise 3.5.1 - a) */ } 24 | then {return (THEN); } 25 | else {return (ELSE) ; } 26 | {id} {yylval = (int) installID(); return (ID); } 27 | {doubleq}.*{doubleq} { yylval = (int) installString(); return (STRING); /* Exercise 3.5.1 - d) */} 28 | {number} {yylval = (int) installNum(); return (NUMBER); } 29 | "<" {yylval = LT; return (RELOP); } 30 | "<=" {yylval = LE; return (RELOP);} 31 | "==" {yylval = EQ; return (RELOP); /* Exercise 3.5.1 - b) */ } 32 | "!=" {yylval = NE; return (RELOP ); /* Exercise 3.5.1 - b) */ } 33 | ">" {yylval = GT; return (RELOP); } 34 | ">=" {yylval = GE; return (RELOP); } 35 | 36 | %% 37 | 38 | int installID() { /* function to install the lexeme, whose first character is pointed to bu yytext, 39 | and whose length is yyleng, into the symbol table and return a pointer thereto */ 40 | } 41 | 42 | int installNum() { /* similar to installID, but puts numerical constants into a seperate table */ 43 | } 44 | 45 | int installString() { 46 | /* ... */ 47 | } 48 | 49 | -------------------------------------------------------------------------------- /ch03/3.5/src/lex2.l: -------------------------------------------------------------------------------- 1 | %{ 2 | %} 3 | 4 | delim [ \t] 5 | ws {delim}+ 6 | letter [a-zA-Z] 7 | word {letter}+ 8 | 9 | %% 10 | {ws} { printf("%s", yytext); } 11 | {word} { 12 | if (starts_vowel(yytext)) 13 | printf("%say", yytext); 14 | else 15 | printf("%s%cay", yytext+1, yytext[0]);} 16 | . { printf("%s", yytext); } 17 | %% 18 | 19 | int starts_vowel(char *c){ 20 | switch(c[0]){ 21 | case 'a': 22 | case 'e': 23 | case 'i': 24 | case 'o': 25 | case 'u': 26 | case 'A': 27 | case 'E': 28 | case 'I': 29 | case 'O': 30 | case 'U': 31 | return 1; 32 | default: 33 | return 0; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /ch03/3.5/src/lex3.l: -------------------------------------------------------------------------------- 1 | %{ 2 | 3 | %} 4 | 5 | letter [a-zA-Z] 6 | digit [0-9] 7 | 8 | /* [w-W][h-H][i-I][l-L][e-E] works too. */ 9 | while (?i:while) 10 | 11 | from (?i:from) 12 | select (?i:select) 13 | id {letter}({letter}|{digit})* 14 | 15 | %% 16 | {while} { printf("WHILE KEYWORD"); return (WHILE); } 17 | {from} { printf("FROM KEYWORD"); return (FROM); } 18 | {select} { printf("SELECT KEYWORD"); return (SELECT); } 19 | {id} { printf("ID"); yytext = lowercase(yytext); yylval = (int) installID(); return (ID); } 20 | %% 21 | 22 | 23 | int installID(){ 24 | /* We've to convert the lexeme to lowercase and install it 25 | 26 | example: 27 | yytext: foO 28 | yytext = lowercase(yytext) 29 | yylval = (int) installID() 30 | 31 | yytext: Strstr 32 | yytext = lowercase(yytext) 33 | yylval = (int) installID() 34 | 35 | yytext: strstr (already exists in the symbol table) 36 | yytext = lowercase(yytext) 37 | yylval = (int) installID() 38 | 39 | */ 40 | } 41 | -------------------------------------------------------------------------------- /ch03/3.6/3.6.md: -------------------------------------------------------------------------------- 1 | # 3.6 Exercises for Section 3.6 2 | 3 | ### 3.6.1 ! 4 | 5 | Figure 3.19 in the exercises of Section 3.4 computes the failure 6 | function for the KMP algorithm. Show how, given that failure function, we 7 | can construct, from a keyword b1b2...bn an n + 1-state DFA that recognizes .*b1b2...bn, where the dot stands for "any character." Moreover, this DFA can 8 | be constructed in O(n) time. 9 | 10 | #### Answer 11 | 12 | Take the string "abbaabb" in exercise 3.4.3-3 as example, the failure function is: 13 | 14 | - n : 1, 2, 3, 4, 5, 6, 7 15 | - f(n): 0, 0, 0, 1, 1, 2, 3 16 | 17 | The DFA is: 18 | 19 | ![3 6 1](https://f.cloud.github.com/assets/340282/441972/36094510-b130-11e2-8e22-14aba49e8213.gif) 20 | 21 | Pseudocode of building the DFA: 22 | 23 | ``` 24 | for (i = 0; i< n; i ++) { 25 | move[s[i], c] = { 26 | if ( c == b1b2…bn[i] ) { 27 | goto s[i+1] 28 | } else { 29 | goto s[f(i)] 30 | } 31 | } 32 | } 33 | ``` 34 | 35 | It is obviously that with the known f(n), this DFA can be constructed in O(n) time. 36 | 37 | ### 3.6.2 38 | 39 | Design finite automata (deterministic or nondeterministic) 40 | for each of the languages of Exercise 3.3.5. 41 | 42 | ### 3.6.3 43 | 44 | For the NFA of Fig. 3.29, indicate all the paths labeled aabb. 45 | Does the NFA accept aabb? 46 | 47 | #### Answer 48 | 49 | - (0) -a-> (1) -a-> (2) -b-> (2) -b-> ((3)) 50 | - (0) -a-> (0) -a-> (0) -b-> (0) -b-> (0) 51 | - (0) -a-> (0) -a-> (1) -b-> (1) -b-> (1) 52 | - (0) -a-> (1) -a-> (1) -b-> (1) -b-> (1) 53 | - (0) -a-> (1) -a-> (2) -b-> (2) -b-> (2) 54 | - (0) -a-> (1) -a-> (2) -b-> (2) -ε-> (0) -b-> (0) 55 | - (0) -a-> (1) -a-> (2) -ε-> (0) -b-> (0) -b-> (0) 56 | 57 | This NFA accepts "aabb" 58 | 59 | ### 3.6.4 60 | 61 | Repeat Exercise 3.6.3 for the NFA of Fig. 3.30. 62 | 63 | ### 3.6.5 64 | 65 | Give the transition tables for the NFA of: 66 | 67 | 1. Exercise 3.6.3. 2. Exercise 3.6.4. 68 | 3. Figure 3.26. 69 | 70 | #### Answer 71 | 72 | **Table 1** 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 |
stateabε
0{0,1}{0}
1{1,2}{1}
2{2}{2,3}{0}
3
110 | 111 | 112 | **Table 2** 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 |
stateabε
0{1}{3}
1{2}{0}
2{3}{1}
3{0}{2}
150 | 151 | **Table 3** 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 |
stateabε
0{1,2}
1{2}
2{2}
3{4}
4{4}
-------------------------------------------------------------------------------- /ch03/3.6/assets/3.6.1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.6/assets/3.6.1.gif -------------------------------------------------------------------------------- /ch03/3.7/3.7.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 3.7 2 | 3 | ### 3.7.1 4 | 5 | Convert to DFA's the NFA's of: 6 | 1. Fig. 3.26. 7 | 2. Fig. 3.29. 8 | 3. Fig. 3.30. 9 | 10 | #### Answer 11 | 12 | 1、 13 | 14 | **Transition table** 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 |
NFA StateDFA Stateab
{0,1,3}ABC
{2}BB
{4}CC
46 | 47 | **DFA** 48 | 49 | ![3 7 1-1](https://f.cloud.github.com/assets/340282/155929/27107b6a-7679-11e2-958e-4aa0435cfcab.gif) 50 | 51 | 2、 52 | 53 | **Transition table** 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 |
NFA StateDFA Stateab
{0}ABA
{0,1}BCB
{0,1,2}CCD
{0,2,3}DCD
91 | 92 | **DFA** 93 | 94 | ![3 7 1-2](https://f.cloud.github.com/assets/340282/155931/753b9cd4-7679-11e2-8a33-131b36ef7a00.gif) 95 | 96 | 3、 97 | 98 | **Transition table** 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 |
NFA StateDFA Stateab
{0,1,2,3}AAA
118 | 119 | **DFA** 120 | 121 | ![3 7 1-3](https://f.cloud.github.com/assets/340282/155934/dc3e04d0-7679-11e2-951b-a5e49f8a7627.gif) 122 | 123 | ### 3.7.2 124 | 125 | use Algorithm 3.22 to simulate the NFA's: 126 | 127 | 1. Fig. 3.29. 2. Fig. 3.30. 128 | on input aabb. 129 | 130 | #### Answer 131 | 132 | 1. -start->{0}-a->{0,1}-a->{0,1,2}-b->{0,2,3}-b->{0,2,3} 133 | 2. -start->{0,1,2,3}-a->{0,1,2,3}-a->{0,1,2,3}-b->{0,1,2,3}-b->{0,1,2,3} 134 | 135 | 136 | ### 3.7.3 137 | 138 | Convert the following regular expressions to deterministic finite automata, using algorithms 3.23 and 3.20: 139 | 140 | 1. (a|b)\* 141 | 2. (a\*|b\*)\* 142 | 3. ((ε|a)|b\*)\* 143 | 4. (a|b)\*abb(a|b)\* 144 | 145 | #### Answer 146 | 147 | 1、 148 | 149 | **NFA** 150 | 151 | ![3 7 3-1-nfa](https://f.cloud.github.com/assets/340282/155956/dfabdba4-767b-11e2-891d-4338a045a978.gif) 152 | 153 | **Transition table** 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 |
NFA StateDFA Stateab
{0,1,2,3,7}ABC
{1,2,3,4,6,7}BBC
{1,2,3,5,6,7}CBC
185 | 186 | **DFA** 187 | 188 | ![3 7 3-1-dfa](https://f.cloud.github.com/assets/340282/155957/ed859b48-767b-11e2-8b2f-c67cb76fcaec.gif) 189 | 190 | 2、 191 | 192 | **NFA** 193 | 194 | ![3 7 3-2-nfa](https://f.cloud.github.com/assets/340282/155978/30c00c48-767e-11e2-8008-c6b39898eedc.gif) 195 | 196 | **Transition table** 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 |
NFA StateDFA Stateab
{0,1,2,3,4,5,8,9,10,11}ABC
{1,2,3,4,5,6,8,9,10,11}BBC
{1,2,3,4,5,7,8,9,10,11}CBC
228 | 229 | **DFA** 230 | 231 | ![3 7 3-2-dfa](https://f.cloud.github.com/assets/340282/155979/3a265bde-767e-11e2-91ca-41e62e8284af.gif) 232 | 233 | 3、 234 | 235 | **NFA** 236 | 237 | ![3 7 3-3-nfa](https://f.cloud.github.com/assets/340282/155981/c3f0dcc2-767e-11e2-9355-986018bab034.gif) 238 | 239 | **Transition table** 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 |
NFA StateDFA Stateab
{0,1,2,3,4,6,7,9,10}ABC
{1,2,3,4,5,6,7,9,10}BBC
{1,2,3,4,6,7,8,9,10}CBC
271 | 272 | **DFA** 273 | 274 | ![3 7 3-3-dfa](https://f.cloud.github.com/assets/340282/155982/c6e24ede-767e-11e2-9e4f-de621927cdd9.gif) 275 | 276 | 4、 277 | 278 | **NFA** 279 | 280 | ![3 7 3-4-nfa](https://f.cloud.github.com/assets/340282/156070/ec5f1872-7683-11e2-9a28-8e048b007475.gif) 281 | 282 | **Transition table** 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 |
NFA StateDFA Stateab
{0,1,2,4,7}ABC
{1,2,3,4,6,7,8}BBD
{1,2,4,5,6,7}CBC
{1,2,4,5,6,7,9}DBE
{1,2,4,5,6,7,10,11,12,14,17}EFG
{1,2,3,4,6,7,8,11,12,13,14,16,17}FFH
{1,2,4,5,6,7,11,12,13,15,16,17}GFG
{1,2,4,5,6,7,9,11,12,14,15,16,17}HFI
{1,2,4,5,6,7,10,11,12,14,15,16,17}IFG
350 | 351 | **DFA** 352 | 353 | ![3 7 3-4-dfa](https://f.cloud.github.com/assets/340282/2433196/0f7471c4-ad86-11e3-95d4-2da73c2e50ba.gif) 354 | -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.1-1.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.1-2.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.1-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.1-3.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.1-3.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | A 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | start 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | a 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | b 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-1-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-1-dfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-1-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-1-nfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-2-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-2-dfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-2-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-2-nfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-3-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-3-dfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-3-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-3-nfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-4-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-4-dfa.gif -------------------------------------------------------------------------------- /ch03/3.7/assets/3.7.3-4-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.7/assets/3.7.3-4-nfa.gif -------------------------------------------------------------------------------- /ch03/3.8/3.8.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 3.8 2 | 3 | ### 3.8.1 4 | 5 | Suppose we have two tokens: (1) the keyword if, and (2) 6 | id­entifiers, which are strings of letters other than if. Show: 7 | 1. The NFA for these tokens, and 1. The DFA for these tokens. 8 | 9 | #### Answer 10 | 11 | 1. NFA 12 | 13 | ![3 8 1-nfa](https://f.cloud.github.com/assets/340282/448499/c5cb2ba0-b248-11e2-94f4-90d117eabdfd.gif) 14 | 15 | NOTE: this NFA has potential conflict, we can decide the matched lexeme by 1. take the longest 2. take the first listed. 16 | 17 | 2. DFA 18 | 19 | ![3 8 1-dfa](https://f.cloud.github.com/assets/340282/448502/cb3623ba-b248-11e2-8397-d15e14def501.gif) 20 | 21 | 22 | ### 3.8.2 23 | 24 | Repeat Exercise 3.8.1 for tokens consisting of (1) the keyword 25 | while, (2) the keyword when, and (3) identifiers consisting of strings of letters 26 | and digits, beginning with a letter. 27 | 28 | #### Answer 29 | 30 | 1. NFA 31 | 32 | ![3 8 2-nfa](https://f.cloud.github.com/assets/340282/448535/182bd758-b24b-11e2-8375-454b3a9dc812.gif) 33 | 34 | 2. DFA 35 | 36 | bother to paint 37 | 38 | ### 3.8.3 ! 39 | 40 | Suppose we were to revise the definition of a DFA to allow 41 | zero or one transition out of each state on each input symbol (rather than 42 | exactly one such transition, as in the standard DFA definition). Some regular 43 | expressions would then have smaller "DFA's" than they do under the standard 44 | definition of a DFA. Give an example of one such regular expression. 45 | 46 | #### Answer 47 | 48 | Take the language defined by regular expression "ab" as the example, assume that the set of input symbols is {a, b} 49 | 50 | Standard DFA 51 | 52 | ![3 8 3-1](https://f.cloud.github.com/assets/340282/451932/881521e6-b2c3-11e2-935c-0a6c8177ad87.gif) 53 | 54 | 55 | Revised DFA 56 | 57 | ![3 8 3-2](https://f.cloud.github.com/assets/340282/448895/11fa403a-b260-11e2-9f30-f6c2b6c72539.gif) 58 | 59 | Obviously, the revised DFA is smaller than the standard DFA. 60 | 61 | ### 3.8.4 !! 62 | 63 | Design an algorithm to recognize Lex-lookahead patterns of 64 | the form rl/r2, where rl and r2 are regular expressions. Show how your 65 | algo­rithm works on the following inputs: 66 | 67 | 1. (abcd|abc)/d 68 | 2. (a|ab)/ba 69 | 3. aa\*/a\* 70 | 71 | 72 | -------------------------------------------------------------------------------- /ch03/3.8/assets/3.8.1-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.8/assets/3.8.1-dfa.gif -------------------------------------------------------------------------------- /ch03/3.8/assets/3.8.1-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.8/assets/3.8.1-nfa.gif -------------------------------------------------------------------------------- /ch03/3.8/assets/3.8.2-nfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.8/assets/3.8.2-nfa.gif -------------------------------------------------------------------------------- /ch03/3.8/assets/3.8.3-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.8/assets/3.8.3-1.gif -------------------------------------------------------------------------------- /ch03/3.8/assets/3.8.3-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.8/assets/3.8.3-2.gif -------------------------------------------------------------------------------- /ch03/3.8/assets/3.8.3-2.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 0 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 1 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 2 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | a 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | b 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /ch03/3.9/3.9.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 3.9 2 | 3 | ### 3.9.1 4 | 5 | Extend the table of Fig. 3.58 to include the operators 6 | 7 | 1. ? 8 | 2. + 9 | 10 | #### Answer 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 |
node nnullable(n)firstpos(n)
n = c_1 ?truefirstpos(c_1)
n = c_1 +nullable(c_1)firstpos(c_1)
33 | 34 | ### 3.9.2 35 | 36 | Use Algorithm 3.36 to convert the regular expressions of 37 | Ex­ercise 3.7.3 directly to deterministic finite automata. 38 | 39 | #### Answer 40 | 41 | 1. (a|b)\* 42 | 43 | - Syntax tree 44 | 45 | ![3 9 2-1-1](https://f.cloud.github.com/assets/340282/457244/4b667ef0-b38d-11e2-9539-90718147591d.gif) 46 | 47 | - firstpos and lastpos for nodes in the syntax tree 48 | 49 | ![3 9 2-1-2](https://f.cloud.github.com/assets/340282/457267/42ee2b9a-b38f-11e2-8a13-05e62d7dca7e.gif) 50 | 51 | 52 | - The function followpos 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 |
node nfollowpos(n)
1{1, 2, 3}
2{1, 2, 3}
3
76 | 77 | - Steps 78 | 79 | The value of firstpos for the root of the tree is {1, 2, 3}, so this set is the start state of D. Call this set of states A. We compute Dtran[A, a] and Dtran[A, b]. Among the positions of A, 1 correspond to a, while 2 correspond to b. Thus Dtran[A, a] = followpos(1) = {1, 2, 3}, Dtran[A, b] = followpos(2) = {1, 2, 3}. Both the results are set A, so dose not have new state, end the computation. 80 | 81 | - DFA 82 | 83 | ![3 9 2-1-dfa](https://f.cloud.github.com/assets/340282/457270/916fb2b6-b38f-11e2-9ad2-d3445e758b5e.gif) 84 | 85 | 86 | 2. (a\*|b\*)\* 87 | 88 | 3. ((ε|a)|b\*)\* 89 | 90 | 4. (a|b)\*abb(a|b)\* 91 | 92 | 93 | ### 3.9.3 ! 94 | 95 | We can prove that two regular expressions are equivalent by 96 | showing that their minimum-state DFA's are the same up to renaming of states. 97 | Show in this way that the following regular expressions: (a|b)\*, (a\*|b\*)\*, and ((ε|a)b\*)\* are all equivalent. Note: You may have constructed the DFA's for 98 | these expressions in response to Exercise 3.7.3. 99 | 100 | #### Answer 101 | 102 | Refer to the answers of 3.7.3 and 3.9.2-1 103 | 104 | ### 3.9.4 ! 105 | 106 | Construct the minimum-state DFA's for the following regular expressions: 107 | 108 | 1. (a|b)\*a(a|b) 109 | 2. (a|b)\*a(a|b)(a|b) 110 | 3. (a|b)\*a(a|b)(a|b)(a|b) 111 | 112 | Do you see a pattern? 113 | 114 | ### 3.9.5 !! 115 | 116 | To make formal the informal claim of Example 3.25, show 117 | that any deterministic finite automaton for the regular expression 118 | 119 | (a|b)\*a(a|b)...(a|b) 120 | 121 | where (a|b) appears n - 1 times at the end, must have at least 2n states. Hint: 122 | Observe the pattern in Exercise 3.9.4. What condition regarding the history of 123 | inputs does each state represent? 124 | -------------------------------------------------------------------------------- /ch03/3.9/assets/3.9.2-1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.9/assets/3.9.2-1-1.gif -------------------------------------------------------------------------------- /ch03/3.9/assets/3.9.2-1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.9/assets/3.9.2-1-2.gif -------------------------------------------------------------------------------- /ch03/3.9/assets/3.9.2-1-dfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch03/3.9/assets/3.9.2-1-dfa.gif -------------------------------------------------------------------------------- /ch03/3.9/assets/3.9.2-1-dfa.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 123 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | start 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | a 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | b 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /ch03/key-point/key-point.md: -------------------------------------------------------------------------------- 1 | # 第3章要点 2 | 3 | ### 1. 从 NFA、DFA 到正则表达式的转换 4 | 5 | http://courses.engr.illinois.edu/cs373/sp2009/lectures/lect_08.pdf 6 | 7 | ### 2. KMP 及其扩展算法(p87) 8 | 9 | 参考 matrix 的博文 [KMP算法详解](http://www.matrix67.com/blog/archives/115)。文中提供了例子,比较容易理解。 10 | 11 | ### 3. 字符串处理算法的效率(p103) 12 | 13 | 对于每个构造得到的 DFA 状态,我们最多必须构造 4|r| 个新状态 14 | 15 | ### 4. DFA 模拟中的时间和空间的权衡(p116) 16 | 17 | 图 3-66 表示的算法 18 | 19 | ### 5. 最小化一个 DFA 的状态数量(p115) 20 | 21 | 注意图 3-64 的第 4 行:“状态 s 和 t 在 a 上的转换都到达 Π 中的同一组”,而不是到达同一个状态。如果通过是否到达同一个状态来判定,那么如果 s 和 t 在 a 上的转换到了两个不同但不能区分的状态时,就会认为 s 和 t 是可区分的。 -------------------------------------------------------------------------------- /ch04/4.2/assets/4.2.1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch04/4.2/assets/4.2.1.gif -------------------------------------------------------------------------------- /ch04/4.3/4.3.md: -------------------------------------------------------------------------------- 1 | # 4.3 节的练习 2 | 3 | ### 4.3.1 4 | 5 | 下面是一个只包含符号 a 和 b 的正则表达式文法。它使用 + 替代表示并运算的字符 | ,以避免和文法中作为元符号使用的竖线相混淆: 6 | 7 | rexpr -> rexpr + rterm | rterm 8 | rterm -> rterm rfactor | rfactor 9 | rfactor -> rfactor * | rprimary 10 | rprimary -> a | b 11 | 12 | 1. 对这个文法提取左公因子。 13 | 2. 提取左公因子的变换能使这个文法适用于自顶向下的语法分析技术吗? 14 | 3. 提取左公因子之后,从原文法中消除左递归。 15 | 4. 得到的文法适用于自顶向下的语法分析吗? 16 | 17 | #### 解答 18 | 19 | 1. 无左公因子 20 | 2. 不适合 21 | 3. 消除左递归 22 | 23 | rexpr -> rterm A 24 | A -> + rterm A | ε 25 | rterm -> rfactor B 26 | B -> rfactor B | ε 27 | rfactor -> rprimary C 28 | C -> * C | ε 29 | rprimary -> a | b 30 | 31 | 4. 适合? 32 | 33 | ### 4.3.2 34 | 35 | 对下面的文法重复练习 4.3.1 36 | 37 | 1. 练习 4.2.1 的文法 38 | 2. 练习 4.2.2-1 的文法 39 | 3. 练习 4.2.2-3 的文法 40 | 4. 练习 4.2.2-5 的文法 41 | 5. 练习 4.2.2-7 的文法 42 | 43 | #### 解答 44 | 45 | 1. S -> S S + | S S * | a 46 | 1. 提取左公因子 47 | 48 | S -> S S A | a 49 | A -> + | * 50 | 51 | 2. 不适合 52 | 3. 消除左递归 53 | 54 | // initial status 55 | 1)S -> S S A | a 56 | 2) A -> + | * 57 | 58 | // i = 1 59 | 1) S -> a B 60 | 2) B -> S A B | ε 61 | 3) A -> + | * 62 | 63 | // i = 2, j = 1 64 | 1) S -> a B 65 | 2) B -> a B A B | ε 66 | 3) A -> + | * 67 | 68 | // i = 3, j = 1 ~ 2 69 | // nothing changed 70 | 71 | 4. 适合 72 | 73 | 2. S -> 0 S 1 | 0 1 74 | 1. 提取左公因子 75 | 76 | S -> 0 A 77 | A -> S 1 | 1 78 | 79 | 2. 不适合,有间接左递归 80 | 3. 消除左递归 81 | 82 | // initial status 83 | 1) S -> 0 A 84 | 2) A -> S 1 | 1 85 | 86 | // i = 1 87 | // nothing changed 88 | 89 | // i = 2, j = 1 90 | 1) S -> 0 A 91 | 2) A -> 0 A 1 | 1 92 | 93 | 4. 合适 94 | 95 | 3. S -> S (S) S | ε 96 | 1. 无左公因子 97 | 2. 不合适 98 | 3. 消除左递归 99 | 100 | // initial status 101 | 1) S -> S (S) S | ε 102 | 103 | // i = 1 104 | 1) S -> A 105 | 2) A -> (S) S A | ε 106 | 107 | // i = 2, j = 1 108 | // nothing changed 109 | 4. 合适 110 | 111 | 4. S -> (L) | a 以及 L -> L, S | S 112 | 1. 无左公因子 113 | 2. 不合适 114 | 3. 消除左递归 115 | 116 | // initial status 117 | 1) S -> (L) | a 118 | 2) L -> L, S | S 119 | 120 | // i = 1 121 | // nothing changed 122 | 123 | // i = 2, j = 1 124 | 1) S -> (L) | a 125 | 2) L -> (L) A | a A 126 | 3) A -> , S A | ε 127 | 128 | // i = 3, j = 1~2 129 | // nothing changed 130 | 131 | 4. 合适 132 | 133 | ### 4.3.3 ! 134 | 135 | 下面文法的目的是消除 4.3.2 节中讨论的 “悬空-else 二义性”: 136 | 137 | stmt -> if expr then stmt 138 | | matchedStmt 139 | matchedStmt -> if expr then matchedStmt else stmt 140 | | other 141 | 142 | 说明这个文法仍然是二义性的。 143 | 144 | #### 解答 145 | 146 | 看一段示范代码,我们通过缩进来表示代码解析的层次结构 147 | 148 | if expr 149 | then 150 | if expr 151 | then matchedStmt 152 | else 153 | if expr 154 | then matchedStmt 155 | else stmt 156 | 157 | 这段代码还可以被解析成 158 | 159 | if expr 160 | then 161 | if expr 162 | then matchedStmt 163 | else 164 | if expr 165 | then matchedStmt 166 | else stmt 167 | 168 | 所以这仍然是一个二义性的文法。原因在于 `matchedStmt -> if expr then matchedStmt else stmt` 中的最后一个 `stmt`,如果包含 `else` 语句的话,既可以认为是属于这个 `stmt` 的,也可以认为是属于包含这个 `matchedStmt` 的语句的。 -------------------------------------------------------------------------------- /ch04/4.4/courses.engr.illinois.edu-cs373-lec14.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch04/4.4/courses.engr.illinois.edu-cs373-lec14.pdf -------------------------------------------------------------------------------- /ch04/4.5/4.5.md: -------------------------------------------------------------------------------- 1 | # 4.5 节的练习 2 | 3 | ### 4.5.1 4 | 5 | 对于练习 4.2.2(a)中的文法 S -> 0 S 1 | 0 1,指出下面各个最右句型的句柄。 6 | 7 | 1. 000111 8 | 2. 00S11 9 | 10 | #### 解答 11 | 12 | 1. 01 13 | 2. 0S1 14 | 15 | ### 4.5.2 16 | 17 | 对于练习 4.2.1 的文法 S -> S S + | S S \* | a 和下面各个最右句型,重复练习 4.5.1 。 18 | 19 | 1. SSS+a\*+ 20 | 2. SS+a\*a+ 21 | 3. aaa\*a++ 22 | 23 | #### 解答 24 | 25 | 1. SS+ 26 | 2. SS+ 27 | 3. a 28 | 29 | ### 4.5.3 30 | 31 | 对于下面的输入符号串和文法,说明相应的自底向上语法分析过程。 32 | 33 | 1. 练习 4.5.1 的文法的串 000111 。 34 | 2. 练习 4.5.2 的文法的串 aaa*a++ 。 35 | 36 | #### 解答 37 | 38 | 1、 000111 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 |
输入句柄动作
$000111$移入
$000111$移入
$000111$移入
$000111$移入
$000111$01规约:S -> 01
$00S11$移入
$00S11$0S1规约:S -> 0S1
$0S1$移入
$0S1$0S1规约:S -> 0S1
$S$接受
112 | 113 | 2、 aaa*a++ 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 |
输入句柄动作
$aaa*a++$移入
$aaa*a++$a规约: S -> a
$Saa*a++$移入
$Saa*a++$a规约: S -> a
$SSa*a++$移入
$SSa*a++$a规约: S -> a
$SSS*a++$移入
$SSS*a++$SS*规约: S -> SS*
$SSa++$移入
$SSa++$a规约: S -> a
$SSS++$移入
$SSS++$SS+规约: S -> SS+
$SS+$移入
$SS+$SS+规约: S -> SS+
$S$接受
-------------------------------------------------------------------------------- /ch04/4.6/assets/4.6.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch04/4.6/assets/4.6.1-1.gif -------------------------------------------------------------------------------- /ch04/4.6/assets/4.6.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch04/4.6/assets/4.6.1-2.gif -------------------------------------------------------------------------------- /ch04/4.6/assets/4.6.1-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch04/4.6/assets/4.6.1-3.gif -------------------------------------------------------------------------------- /ch04/4.7/4.7.md: -------------------------------------------------------------------------------- 1 | # 4.7 节的练习 2 | 3 | ### 4.7.1 4 | 5 | 为练习 4.2.1 的文法 S -> S S + | S S * | a 构造 6 | 7 | 1. 规范 LR 项集族 8 | 2. LALR 项集族 9 | 10 | ### 4.7.2 11 | 12 | 对练习 4.2.2-1 ~ 4.4.2-7 的各个文法重复练习 4.7.1 13 | 14 | ### ! 4.7.3 15 | 16 | 对练习 4.7.1 的文法,使用算法 4.63,根据该文法的 LR(0) 项集的内核构造出它的 LALR 项集族 17 | 18 | ### ! 4.7.4 19 | 20 | 说明下面的文法 21 | 22 | S -> A a | b A c | d c | b d a 23 | A -> d 24 | 25 | 是 LALR(1) 的,但不是 SLR(1) 的 26 | 27 | ### ! 4.7.5 28 | 29 | 说明下面的文法 30 | 31 | S -> A a | b A c | B c | b B a 32 | A -> d 33 | B -> d 34 | 35 | 是 LR(1) 的,但不是 LALR(1) 的 36 | -------------------------------------------------------------------------------- /ch04/key-point/key-point.md: -------------------------------------------------------------------------------- 1 | # 第4章要点 2 | 3 | ### !LR(0), SLR, LR, LALR 之间的区别 4 | 5 | p157: LR(0) 自动机是如何做出移入-规约决定的?假设文法符号串 γ 使得 LR(0) 自动机从开始状态 0 运行到某个状态 j,那么如果下一个输入符号为 a 且状态 j 有一个在 a 上的转换,就移入 a,否则就进行规约。 6 | 7 | 这种方法会导致一些错误的规约,假定规约后的符号为 X,但 a 并不在 FOLLOW(X) 中,这种情况下就会有问题。所以 SLR 在这方面进行了改进。 8 | 9 | p161:构造一个 SLR 分析表时,如果 [A -> α.] 在 I_i 中,那么对于 FOLLOW(A) 中的所有 a,将 ACTION[i, a] 设置为 “规约 A -> α” 10 | 11 | SLR 一定程度上解决了错误规约的问题,但没有完全解决。因为虽然 a 在 FOLLOW(A) 中才会选择规约,但是就当前所处的状态 I_i 而言,并不是每个 FOLLOW(A) 中的终结符都可以出现在状态 I_i 中的 A 后面。 12 | 13 | p166: 用更正式一点的语言来讲,必须要为 I_i 精确得指明哪些输入符号可以更在句柄 α 后面,从而使 α 可以被规约为 A。 14 | 15 | LR 通过在项中加入第二个分量,即向前看符号来解决这个问题。但新的问题是 LR 会使得状态表及其庞大,而 LALR 就是一种比较经济的做法,它具有和 SLR 一样多的状态。 16 | 17 | p170:一般地说,通过将具有相同核心项集的 LR 项集合并,可以得到 LALR 项集。虽然 LALR 可能会进行一些错误的规约,但最终会在输入任何新的符号之前发现这个错误。 18 | 19 | ### 消除二义性 (p134) 20 | 21 | 图 4-10,如何得出这个消除方法的? 22 | 23 | ### 消除左递归 (p135) 24 | 25 | 为什么图 4-11 的算法能消除文法中的左递归? 26 | 27 | 消除递归需满足两个条件: 28 | 29 | 1. 不存在立即左递归,即不存在形似这样的产生式 A -> Aα 。 30 | 2. 不存在由多步推导可产生的左递归。 31 | 32 | 算法 3~5 行循环的结果使得形如 A_i -> A_m α 的产生式一定满足 m >= i ,就消除了形如 S => Aa => Sda 这样的转换可能,也就是说由 A_m 一定推导不出以 A_i 开头的产生式,A_m α 就不存在产生 A-i 左递归的可能。 33 | 34 | **同时需要注意的是:** 只需要处理 A_i -> A_j α 这样的产生式,而不需要处理形如 A_i -> α A_j β 这样的产生式 35 | 36 | 循环完成后,第 6 行消除了替换后的产生式中的立即左递归。 37 | 38 | ### 使用 LR(0) 创建出 LALR(1) 项集的内核 (p173) 39 | 40 | 自发生成的和传播的向前看符号 41 | 42 | ### CNF 和 BNF 43 | 44 | - [Chomsky normal form](http://en.wikipedia.org/wiki/Chomsky_normal_form) 45 | - [Backus Naur Form](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form) 46 | 47 | 48 | -------------------------------------------------------------------------------- /ch05/5.1/5.1.md: -------------------------------------------------------------------------------- 1 | # 5.1 节的练习 2 | 3 | ### 5.1.1 4 | 5 | 对于图 5-1 中的 SDD,给出下列表达式对应的注释语法分析树 6 | 7 | 1. (3+4)\*(5+6)n 8 | 2. 1\*2\*3\*(4+5)n 9 | 3. (9+8\*(7+6)+5)\*4n 10 | 11 | #### 解答 12 | 13 | 1. (3+4)\*(5+6)n 14 | 15 | ![5 1 1-1](https://f.cloud.github.com/assets/340282/869233/d376f6d0-f7ee-11e2-9ce1-5a268c1e77c8.gif) 16 | 17 | 2. 1\*2\*3\*(4+5)n 18 | 19 | ![5 1 1-2](https://f.cloud.github.com/assets/340282/869221/d42a32be-f7ed-11e2-940d-7db7f93b75a0.gif) 20 | 21 | 22 | ### 5.1.2 23 | 24 | 扩展图 5-4 中的 SDD,使它可以像图 5-1 所示的那样处理表达式 25 | 26 | #### 解答 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 |
产生式语法规则
1)L -> EnL.val = E.val
2)E -> TE'E'.inh = T.val
E.val = E'.syn
3)E' -> +TE_1'E_1'.inh = E'.inh + T.val
E'.syn = E_1'.syn
4)E' -> εE'.syn = E'.inh
5)T -> FT'T'.inh = F.val
T.val = T'.syn
6)T' -> *FT_1'T_1'.inh = T'.inh * F.val
T'.syn = T_1'.syn
7)T' -> εT'.syn = T'.inh
8)F -> (E)F.val = E.val
9)F -> digitF.val = digit.lexval
84 | 85 | ### 5.1.3 86 | 87 | 使用你在练习 5.1.2 中得到的 SDD,重复练习 5.1.1 88 | 89 | #### 解答 90 | 91 | 1. (3+4)\*(5+6)n 92 | 93 | ![5 1 3-1](https://f.cloud.github.com/assets/340282/869333/278de5de-f7f5-11e2-9c63-c0aca2b8f843.gif) 94 | 95 | 96 | 2. 1\*2\*3\*(4+5)n 97 | 98 | ![5 1 3-2](https://f.cloud.github.com/assets/340282/883253/4a39c628-f97d-11e2-992a-4efbe81cce27.gif) 99 | 100 | 101 | -------------------------------------------------------------------------------- /ch05/5.1/assets/5.1.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch05/5.1/assets/5.1.1-1.gif -------------------------------------------------------------------------------- /ch05/5.1/assets/5.1.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch05/5.1/assets/5.1.1-2.gif -------------------------------------------------------------------------------- /ch05/5.1/assets/5.1.3-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch05/5.1/assets/5.1.3-1.gif -------------------------------------------------------------------------------- /ch05/5.1/assets/5.1.3-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch05/5.1/assets/5.1.3-2.gif -------------------------------------------------------------------------------- /ch05/5.2/5.2.1.js: -------------------------------------------------------------------------------- 1 | var preArr = [1, 2, 3, 4, 5] 2 | var postArr = [6, 7, 8, 9] 3 | 4 | function arrange(arr, pre, rt) { 5 | pre = pre || [] 6 | rt = rt || [] 7 | 8 | arr.forEach(function(item) { 9 | var pre2 = pre.filter(function(){ 10 | return true 11 | }) 12 | pre2.push(item) 13 | 14 | if(arr.length > 1) { 15 | var arr2 = arr.filter(function(item2){ 16 | if (item !== item2) { 17 | return true 18 | } 19 | }) 20 | arrange(arr2, pre2, rt) 21 | 22 | } else { 23 | rt.push(pre2) 24 | } 25 | }) 26 | 27 | return rt 28 | } 29 | 30 | 31 | function filter(arr) { 32 | return arr.filter(function(item) { 33 | var idx1 = item.indexOf(1) 34 | var idx3 = item.indexOf(3) 35 | var idx5 = item.indexOf(5) 36 | var idx2 = item.indexOf(2) 37 | var idx4 = item.indexOf(4) 38 | 39 | if (idx1 < idx3 && idx3 < idx5 && idx2 < idx4) { 40 | return true 41 | } 42 | }) 43 | } 44 | 45 | 46 | console.log(filter(arrange(preArr)).map(function(item) { 47 | return item.concat(postArr) 48 | })) 49 | -------------------------------------------------------------------------------- /ch05/5.2/5.2.md: -------------------------------------------------------------------------------- 1 | # 5.2 节的练习 2 | 3 | ### 5.2.1 4 | 5 | 图 5-7 中的依赖图的全部拓扑顺序有哪些 6 | 7 | #### 解答 8 | 9 | [ 1, 2, 3, 4, 5, 6, 7, 8, 9 ], 10 | [ 1, 2, 3, 5, 4, 6, 7, 8, 9 ], 11 | [ 1, 2, 4, 3, 5, 6, 7, 8, 9 ], 12 | [ 1, 3, 2, 4, 5, 6, 7, 8, 9 ], 13 | [ 1, 3, 2, 5, 4, 6, 7, 8, 9 ], 14 | [ 1, 3, 5, 2, 4, 6, 7, 8, 9 ], 15 | [ 2, 1, 3, 4, 5, 6, 7, 8, 9 ], 16 | [ 2, 1, 3, 5, 4, 6, 7, 8, 9 ], 17 | [ 2, 1, 4, 3, 5, 6, 7, 8, 9 ], 18 | [ 2, 4, 1, 3, 5, 6, 7, 8, 9 ] 19 | 20 | 算法见 [5.2.1.js](5.2.1.js) 21 | 22 | ### 5.2.2 23 | 24 | 对于图 5-8 中的 SDD,给出下列表达式对应的注释语法分析树: 25 | 26 | 1. int a, b , c 27 | 2. float w, x, y, z 28 | 29 | #### 解答 30 | 31 | 1. int a, b, c 32 | 33 | ![5 2 2-1](https://f.cloud.github.com/assets/340282/890975/faf883c0-fa43-11e2-8d6c-eec2e33f771e.gif) 34 | 35 | 36 | ### 5.2.3 37 | 38 | 假设我们有一个产生式 A -> BCD。A, B, C, D 这四个非终结符号都有两个属性,综合属性 s 和继承属性 i。对于下面的每组规则,指出(1)这些规则是否满足 S 属性定义的要求(2)这些规则是否满足 L 属性定义的要求(3)是否存在和这些规则一致的求值过程? 39 | 40 | 1. A.s = B.i + C.s 41 | 2. A.s = B.i + C.s , D.i = A.i + B.s 42 | 3. A.s = B.s + D.s 43 | 4. ! A.s = D.i , B.i = A.s + C.s , C.i = B.s , D.i = B.i + C.i 44 | 45 | #### 解答 46 | 47 | 1. 否, ? 48 | 2. 否, 是 49 | 3. 是, 是 50 | 4. 否, 否 51 | 52 | 53 | ### 5.2.4 ! 54 | 55 | 这个文法生成了含“小数点”的二进制数: 56 | 57 | S -> L.L|L 58 | L -> LB|B 59 | B -> 0|1 60 | 61 | 设计一个 L 属性的 SDD 来计算 S.val,即输入串的十进制数值。比如,串 101.101 应该被翻译为十进制数 5.625。 62 | 63 | #### 解答 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 82 | 83 | 84 | 85 | 86 | 90 | 91 | 92 | 93 | 94 | 99 | 100 | 101 | 102 | 103 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 |
产生式语法规则
1)S -> L_1.L_2 78 | L_1.isLeft = true
79 | L_2.isLeft = false
80 | S.val = L_1.val + L_2.val 81 |
2)S -> L 87 | L.isLeft = true
88 | S.val = L.val 89 |
3)L -> L_1B 95 | L_1.isLeft = L.isLeft
96 | L.len = L_1.len + 1
97 | L.val = L.isLeft ? L_1.val * 2 + B.val : L_1.val + B.val * 2^(-L.len) 98 |
4)L -> B 104 | L.len = 1
105 | L.val = L.isLeft ? B.val : B.val/2 106 |
5)B -> 0B.val = 0
6)B -> 1B.val = 1
120 | 121 | 其中: 122 | 123 | - isLeft 为继承属性,表示节点是否在小数点的左边 124 | - len 为综合属性,表示节点包含的二进制串的长度 125 | - val 为综合属性 126 | 127 | ### 5.2.5 !! 128 | 129 | 为练习 5.2.4 中描述的文法和翻译设计一个 S 属性的 SDD。 130 | 131 | #### 解答 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 148 | 149 | 150 | 151 | 152 | 155 | 156 | 157 | 158 | 159 | 163 | 164 | 165 | 166 | 167 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 |
产生式语法规则
1)S -> L_1.L_2 146 | S.val = L_1.val + L_2.val/L_2.f 147 |
2)S -> L 153 | S.val = L.val 154 |
3)L -> L_1B 160 | L.val = L_1.val*2 + B.val
161 | L.f = L_1.f * 2 162 |
4)L -> B 168 | L.val = B.val
169 | L.f = 2 170 |
5)B -> 0B.val = 0
6)B -> 1B.val = 1
184 | 185 | ### 5.2.6 !! 186 | 187 | 使用一个自顶向下的语法分析文法上的 L 属性 SDD 来实现算法 3.23。这个算法把一个正则表达式转换为一个 NFA。假设有一个表示任意字符的词法单元 char,并且 char.lexval 是它所表示的字符。你可以假设存在一个函数 new(),该函数范围一个新的状态页就是一个之前尚未被这个函数返回的状态。使用任何方便的表示来描述这个 NFA 的翻译。 -------------------------------------------------------------------------------- /ch05/5.2/assets/5.2.2-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch05/5.2/assets/5.2.2-1.gif -------------------------------------------------------------------------------- /ch05/5.3/5.3.md: -------------------------------------------------------------------------------- 1 | # 5.3 节的练习 2 | 3 | ### 5.3.1 4 | 5 | 下面是涉及运算符 + 和整数或浮点运算分量的表达式的文法。区分浮点数的方法是看它有无小数点。 6 | 7 | E -> E + T | T 8 | T -> num.num | num 9 | 10 | 1. 给出一个 SDD 来确定每个项 T 和表达式 E 的类型 11 | 2. 扩展这个得到的 SDD,使得它可以把表达式转换成为后缀表达式。使用一个单目运算符 intToFloat 把一个整数转换为相等的浮点数。 12 | 13 | #### 解答 14 | 15 | 1. 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 |
产生式语法规则
1)E -> E_1 + TE.type = E_1.type === float || T.type === float ? float : int
2)E -> TE.type = T.type
3)T -> num.numT.type = float
4)T -> numT.type = int
48 | 49 | 50 | ### 5.3.2 ! 51 | 52 | 给出一个 SDD,将一个带有 + 和 * 的中缀表达式翻译成没有冗余括号的表达式。比如因为两个运算符都是左结合的,并且 * 的优先级高于 +,所以 ((a\*(b+c))\*(d)) 可翻译为 a\*(b+c)\*d 53 | 54 | #### 解答 55 | 56 | 几个属性设置: 57 | 58 | - wrapped: 表达式最外层是否有括号。 59 | - precedence: 令 +,\*,() 和单 digit 的优先级分别为 0,1,2,3。 如果表达式最外层有括号,则为去掉括号后最后被计算的运算符的优先级,否则为表达式最后被计算的运算符的优先级。 60 | - expr: 表达式。 61 | - cleanExpr: 去除了冗余括号的表达式。 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 78 | 79 | 80 | 81 | 82 | 88 | 89 | 90 | 91 | 92 | 98 | 99 | 100 | 101 | 102 | 108 | 109 | 110 | 111 | 112 | 118 | 119 | 120 | 121 | 122 | 128 | 129 | 130 | 131 | 132 | 138 | 139 | 140 |
产生式语法规则
1)L -> En 76 | L.cleanExpr = E.wrapped ? E.cleanExpr : E.expr 77 |
2)E -> E_1 + T 83 | E.wrapped = false
84 | E.precedence = 0
85 | E.expr = E_1.expr || "+" || T.expr
86 | E.cleanExpr = (E_1.wrapped ? E_1.cleanExpr : E_1.expr) || "+" || (T.wrapped ? T.cleanExpr : T.expr) 87 |
3)E -> T 93 | E.wrapped = T.wrapped
94 | E.precedence = T.precedence
95 | E.expr = T.expr
96 | E.cleanExpr = T.cleanExpr 97 |
4)T -> T_1 * F 103 | T.wrapped = false
104 | T.precedence = 1
105 | T.expr = T_1.expr || "*" || F.expr
106 | T.cleanExpr = (T_1.wrapped && T_1.precedence >= 1 ? T_1.cleanExpr : T_1) || * || (F.wrapped && F.precedence >= 1 ? F.cleanExpr : F.expr) 107 |
5)T -> F 113 | T.wrapped = F.wrapped
114 | T.precedence = F.precedence
115 | T.expr = F.expr
116 | T.cleanExpr = F.cleanExpr 117 |
6)F -> (E) 123 | F.wrapped = true
124 | F.precedence = E.precedence
125 | F.expr = "(" || E.expr || ")"
126 | F.cleanExpr = E.expr 127 |
7)F -> digit 133 | F.wrapped = false
134 | F.precedence = 3
135 | F.expr = digit
136 | F.cleanExpr = digit 137 |
141 | 142 | 143 | ### 5.3.3 ! 144 | 145 | 给出一个 SDD 对 x\*(3\*x+x\*x) 这样的表达式求微分。表达式中涉及运算符 + 和 * 、变量 x 和常量。假设不进行任何简化,也就是说,比如 3\*x 将被翻译为 3\*1+0\*x。 -------------------------------------------------------------------------------- /ch05/5.4/5.4.md: -------------------------------------------------------------------------------- 1 | # 5.4 节的练习 2 | 3 | ### 5.4.1 4 | 5 | 我们在 5.4.2 节中提到可能根据语法分析栈中的 LR 状态来推导出这个状态表示了什么文法符号。我们如何推导这个信息? 6 | 7 | #### 解答 8 | 9 | 见算法 4.44 10 | 11 | ### 5.4.2 12 | 13 | 改写下面的 SDT: 14 | 15 | A -> A {a} B | A B {b} | 0 16 | B -> B {c} A | B A {d} | 1 17 | 18 | 使得基础文法变成非左递归的。 19 | 20 | ### 5.4.3 ! 21 | 22 | 下面的 SDT 计算了一个由 0 和 1 组成的串的值。它把输入的符号串当做正二进制数来解释。 23 | 24 | B -> B_1 0 {B.val = 2 * B_1.val} 25 | | B_1 1 {B.val = 2 * B_1.val + 1} 26 | | 1 {B.val = 1} 27 | 28 | 改写这个 SDT,使得基础文法不再是左递归的,但仍然可以计算出整个输入串的相同的 B.val 的值。 29 | 30 | #### 解答 31 | 32 | 提取左公因子 33 | 34 | B -> B_1 digit {B.val = 2 * B_1.val + digit.val} 35 | | 1 {B.val = 1} 36 | digit -> 0 {digit.val = 0} 37 | | 1 {digit.val = 1} 38 | 39 | 在形如 `A = A a | b` 的左递归产生式中, a 为 `digit {B.val = 2 * B_1.val + digit.val}`, b 为 `1` 40 | 41 | 消除左递归后得 42 | 43 | B -> 1 {A.i = 1} A 44 | A -> digit {A_1.i = 2 * A.i + digit.val} A_1 {A.val = A_1.val} 45 | | ε {A.val = A.i} 46 | digit -> 0 {digit.val = 0} 47 | | 1 {digit.val = 1} 48 | 49 | ### 5.4.4 ! 50 | 51 | 为下面的产生式写出一个和例 5.19 类似的 L 属性 SDD。这里的每个产生式表示一个常见的 C 语言那样的控制流结构。你可能需要生成一个三地址语句来跳转到某个标号 L,此时你可以生成语句 goto L。 52 | 53 | 1. S -> if ( C ) S_1 else S_2 54 | 2. S -> do S_1 while ( C ) 55 | 3. S -> '{' L '}'; L -> L S | ε 56 | 57 | 请注意,列表中的任何语句都可能包含一条从它的内部跳转到下一个语句的跳转指令,因此简单地为各个语句按顺序生成代码是不够的。 58 | 59 | #### 解答 60 | 61 | 1. S -> if ( C ) S_1 else S_2 62 | 63 | L_1 = new() 64 | C.false = L_1 65 | S_1.next = S.next 66 | S.code = C.code || S_1.code || label || L_1 || S_2.code 67 | 68 | 2. S -> do S_1 while ( C ) 69 | 70 | L_1 = new() 71 | C.true = L_1 72 | S.code = label || L_1 || S_1.code || C.code 73 | 74 | 75 | ### 5.4.5 76 | 77 | 按照例 5.19 的方法,把在练习 5.4.4 中得到的各个 SDD 转换成一个 SDT。 78 | 79 | #### 解答 80 | 81 | 1. S -> if ( C ) S_1 else S_2 82 | 83 | S -> if ( {new L_1; C.false = L_1} 84 | C ) {S_1.next = S.next} 85 | S_1 else 86 | S_2 {S.code = C.code || S_1.code || label || L_1 || S_2.code} 87 | 88 | 2. S -> do S_1 while ( C ) 89 | 90 | S -> do {new L_1} 91 | S_1 while ( {C.true = L_1} 92 | C ) {S.code = label || L_1 || S_1.code || C.code} 93 | 94 | ### 5.4.6 95 | 96 | 修改图 5.25 中的 SDD,使它包含一个综合属性 B.le,即一个方框的长度。两个方框并列后得到的方框的长度是这两个方框的长度和。然后把你的新规则加入到图 5.26 中 SDT 的合适位置上。 97 | 98 | ### 5.4.7 99 | 100 | 修改图 5.25 中的 SDD,使它包含上标,用方框之间的运算符 sup 表示。如果方框 B_2 是方框 B_1 的一个上标,那么将 B_2 的基线放在 B_1 的基线上方,两条基线的距离是 0.6 乘以 B_1 的大小。把新的产生式和规则加入到图 5.26 的 SDT 中去。 101 | 102 | #### 5.4.6 和 5.4.7 的解答 103 | 104 | 1) S -> B B.ps = 10 105 | B.wd = 106 | 107 | 2) S -> B_1 B_2 B_1.ps = B.ps 108 | B_2.ps = B.ps 109 | B.wd = B_1.wd + B_2.wd 110 | B.ht = max(B_1.ht, B_2.ht) 111 | B.dp = max(B_1.dp, B_2.dp) 112 | 113 | 3) B -> B_1 sub B_2 B_1.ps = B.ps 114 | B_2.ps = 0.7 * B.ps 115 | B.wd = B_1.wd + B_2.wd 116 | B.ht = max(B_1.ht, B_2.ht - 0.25 * B.ps) 117 | B.dp = max(B_1.dp, B_2.dp + 0.25 * B.ps) 118 | 119 | 4) B -> B_1 sup B_2 B_1.ps = B.ps 120 | B_2.ps = 0.6 * B.ps 121 | B.wd = B_1.wd + B_2.wd 122 | B.ht = max(B_1.ht, B_2.ht + 0.6 * B.ps) 123 | B.dp = max(B_1.dp, B_2.dp - 0.6 * B.ps) 124 | 125 | 5) B -> ( B_1 ) B_1.ps = B.ps 126 | B.wd = B_1.wd 127 | B.ht = B_1.ht 128 | B.dp = B_1.dp 129 | 130 | 6) B -> text B.wd = getWd(B.ps, text.lexval) 131 | B.ht = getHt(B.ps, text.lexval) 132 | B.dp = getDp(B.ps, text.lexval) 133 | 134 | 135 | -------------------------------------------------------------------------------- /ch05/5.5/5.5.md: -------------------------------------------------------------------------------- 1 | # 5.5 节的练习 2 | 3 | ### 5.5.1 4 | 5 | 按照 5.5.1 节的风格,将练习 5.4.4 中得到的每个 SDD 实现为递归下降的语法分析器。 6 | 7 | ### 5.5.2 8 | 9 | 按照 5.5.2 节的风格,将练习 5.4.4 中得到的每个 SDD 实现为递归下降的语法分析器。 10 | 11 | ### 5.5.3 12 | 13 | 按照 5.5.3 节的风格,将练习 5.4.4 中得到的每个 SDD 和一个 LL 语法分析器一起实现。它们应该边扫描输入边生成代码。 14 | 15 | ### 5.5.4 16 | 17 | 按照 5.5.3 节的风格,将练习 5.4.4 中得到的每个 SDD 和一个 LL 语法分析器一起实现,但是代码(或者指向代码的指针)存放在栈中。 18 | 19 | ### 5.5.5 20 | 21 | 按照 5.5.4 节的风格,将练习 5.4.4 中得到的每个 SDD 和一个 LR 语法分析器一起实现。 22 | 23 | ### 5.5.6 24 | 25 | 按照 5.5.1 节 的风格实现练习 5.2.4 中得到的 SDD。按照 5.5.2 节的风格得到的实现和这个实现相比有什么不同吗? -------------------------------------------------------------------------------- /ch06/6.1/6.1.md: -------------------------------------------------------------------------------- 1 | # 6.1 节的练习 2 | 3 | ### 为下面的表达式构造 DAG 4 | 5 | ((x+y)-((x+y)*(x-y)))+((x+y)*(x-y)) 6 | 7 | #### 解答 8 | 9 | ![6.1.1](https://f.cloud.github.com/assets/340282/1062040/9c986b90-121e-11e3-9baa-a316f8f3008c.gif) 10 | 11 | 12 | ### 为下列表达式构造 DAG,且指出他们每个子表达式的值编码。假定 + 是左结合的。 13 | 14 | 1. a+b+(a+b) 15 | 2. a+b+a+b 16 | 3. a+a+(a+a+a+(a+a+a+a)) 17 | 18 | #### 解答 19 | 20 | 1. a+b+(a+b) 21 | 22 | ![6 1 2-1](https://f.cloud.github.com/assets/340282/1062041/01abd1a2-121f-11e3-8b44-b41c5a30442b.gif) 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 |
1ida
2idb
3+12
4+33
52 | 53 | 2. a+b+a+b 54 | 55 | ![6 1 2-2](https://f.cloud.github.com/assets/340282/1062050/8ea74744-121f-11e3-9230-604af4a60cf7.gif) 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 |
1ida
2idb
3+12
4+31
5+42
91 | 92 | 3. a+a+(a+a+a+(a+a+a+a)) 93 | 94 | ![6 1 2-3](https://f.cloud.github.com/assets/340282/1062059/ee983d66-121f-11e3-9937-f8ec477df4ed.gif) 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 |
1ida
2+11
3+21
4+31
5+34
6+25
135 | -------------------------------------------------------------------------------- /ch06/6.1/assets/6.1.1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.1/assets/6.1.1.gif -------------------------------------------------------------------------------- /ch06/6.1/assets/6.1.2-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.1/assets/6.1.2-1.gif -------------------------------------------------------------------------------- /ch06/6.1/assets/6.1.2-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.1/assets/6.1.2-2.gif -------------------------------------------------------------------------------- /ch06/6.1/assets/6.1.2-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.1/assets/6.1.2-3.gif -------------------------------------------------------------------------------- /ch06/6.2/6.2.md: -------------------------------------------------------------------------------- 1 | # 6.2 节的练习 2 | 3 | ### 6.2.1 4 | 5 | 将算数表达式 a+-(b+c) 翻译成 6 | 7 | 1. 抽象语法树 8 | 2. 四元式序列 9 | 3. 三元式序列 10 | 4. 间接三元式序列 11 | 12 | #### 解答 13 | 14 | 1. 抽象语法树 15 | 16 | ![6 2 1](https://f.cloud.github.com/assets/340282/1062210/dae8e64c-1230-11e3-9518-a128ca9a4d45.gif) 17 | 18 | 19 | 2. 四元式序列 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
oparg1arg2result
0+bct1
1minust1t2
2+at2t3
55 | 56 | 3. 三元式序列 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 |
oparg1arg2
0+bc
1minus(0)
2+a(1)
88 | 89 | 4. 间接三元式序列 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 |
oparg1arg2
0+bc
1minus(0)
2+a(1)
121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 |
instruction
0(0)
1(1)
2(2)
144 | 145 | #### 参考 146 | 147 | - [间接三元式更详细的讲解](http://jpkc.nwpu.edu.cn/jp2005/20/kcwz/wlkc/wlkc/05/5_3_2.htm) 148 | 149 | ### 6.2.2 150 | 151 | 对下列赋值语句重复练习 6.2.1 152 | 153 | 1. a = b[i] + c[j] 154 | 2. a[i] = b\*c - b\*d 155 | 3. x = f(y+1) + 2 156 | 4. x = \*p + &y 157 | 158 | #### 解答 159 | 160 | 1. a = b[i] + c[j] 161 | 162 | - 四元式 163 | 164 | 0) =[] b i t1 165 | 1) =[] c j t2 166 | 2) + t1 t2 t3 167 | 3) = t3 a 168 | 169 | - 三元式 170 | 171 | 0) =[] b i 172 | 1) =[] c j 173 | 2) + (0) (1) 174 | 3) = a (2) 175 | 176 | - 间接三元式 177 | 178 | 0) =[] b i 179 | 1) =[] c j 180 | 2) + (0) (1) 181 | 3) = a (2) 182 | 183 | 0) 184 | 1) 185 | 2) 186 | 3) 187 | 188 | 2. a[i] = b\*c - b\*d 189 | 190 | - 四元式 191 | 192 | 0) * b c t1 193 | 1) * b d t2 194 | 2) - t1 t2 t3 195 | 3) []= a i t4 196 | 4) = t3 t4 197 | 198 | - 三元式 199 | 200 | 0) * b c 201 | 1) * b d 202 | 2) - (0) (1) 203 | 3) []= a i 204 | 4) = (3) (2) 205 | 206 | - 间接三元式 207 | 208 | 0) * b c 209 | 1) * b d 210 | 2) - (0) (1) 211 | 3) []= a i 212 | 4) = (3) (2) 213 | 214 | 0) 215 | 1) 216 | 2) 217 | 3) 218 | 4) 219 | 220 | 3. x = f(y+1) + 2 221 | 222 | - 四元式 223 | 224 | 0) + y 1 t1 225 | 1) param t1 226 | 2) call f 1 t2 227 | 3) + t2 2 t3 228 | 4) = t3 x 229 | 230 | - 三元式 231 | 232 | 0) + y 1 233 | 1) param (0) 234 | 2) call f 1 235 | 3) + (2) 2 236 | 4) = x (3) 237 | 238 | - 间接三元式 239 | 240 | 0) + y 1 241 | 1) param (0) 242 | 2) call f 1 243 | 3) + (2) 2 244 | 4) = x (3) 245 | 246 | 0) 247 | 1) 248 | 2) 249 | 3) 250 | 4) 251 | 252 | #### 参考 253 | 254 | - [数组元素的取值和赋值](http://www.mec.ac.in/resources/notes/notes/compiler/module5/intermediate.htm) 255 | 256 | ### 6.2.3 ! 257 | 258 | 说明如何对一个三地址代码序列进行转换,使得每个被定值的变量都有唯一的变量名。 259 | 260 | -------------------------------------------------------------------------------- /ch06/6.2/assets/6.2.1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.2/assets/6.2.1.gif -------------------------------------------------------------------------------- /ch06/6.3/6.3.md: -------------------------------------------------------------------------------- 1 | # 6.3 节的练习 2 | 3 | ### 6.3.1 4 | 5 | 确定下列声明序列中各个标识符的类型和相对地址。 6 | 7 | float x; 8 | record {float x; float y;} p; 9 | record {int tag; float x; float y;} q; 10 | 11 | #### 解答 12 | 13 | SDT 14 | 15 | S -> {top = new Evn(); offset = 0;} 16 | D 17 | D -> T id; {top.put(id.lexeme, T.type, offset); 18 | offset += T.width} 19 | D1 20 | D -> ε 21 | T -> int {T.type = interget; T.width = 4;} 22 | T -> float {T.type = float; T.width = 8;} 23 | T -> record '{' 24 | {Evn.push(top), top = new Evn(); 25 | Stack.push(offset), offset = 0;} 26 | D '}' {T.type = record(top); T.width = offset; 27 | top = Evn.top(); offset = Stack.pop();} 28 | 29 | 标识符类型和相对地址 30 | 31 | line id type offset Evn 32 | 33 | 1) x float 0 1 34 | 35 | 2) x float 0 2 36 | 2) y float 8 2 37 | 2) p record() 8 1 38 | 39 | 3) tag int 0 3 40 | 3) x float 4 3 41 | 3) y float 12 3 42 | 3) q record() 24 1 43 | 44 | ### 6.3.2 ! 45 | 46 | 将图 6-18 对字段名的处理方法扩展到类和单继承的层次结构。 47 | 48 | 1. 给出类 Evn 的一个实现。该实现支持符号表链,使得子类可以重定义一个字段名,也可以直接引用某个超类中的字段名。 49 | 2. 给出一个翻译方案,该方案能够为类中的字段分配连续的数据区域,这些字段中包含继承而来的域。继承而来的字段必须保持在对超类进行存储分配时获得的相对地址。 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /ch06/6.4/6.4.md: -------------------------------------------------------------------------------- 1 | # 6.4 节的练习 2 | 3 | ### 6.4.1 4 | 5 | 向图 6-19 的翻译方案中加入对应于下列产生式的规则: 6 | 7 | 1. E -> E1 * E2 8 | 2. E -> +E1 9 | 10 | #### 解答 11 | 12 | 产生式 语义规则 13 | 14 | E -> E1 * E2 { E.addr = new Temp(); 15 | E.code = E1.code || E2.code || 16 | gen(E.addr '=' E1.addr '*' E2.addr); } 17 | 18 | | +E1 { E.addr = E1.addr; 19 | E.code = E1.code; } 20 | 21 | ### 6.4.2 22 | 23 | 使用图 6-20 的增量式翻译方案重复练习 6.4.1 24 | 25 | #### 解答 26 | 27 | 产生式 语义规则 28 | 29 | E -> E1 * E2 { E.addr = new Temp(); 30 | gen(E.addr '=' E1.addr '*' E2.addr; } 31 | 32 | | +E1 { E.addr = E1.addr; } 33 | 34 | ### 6.4.3 35 | 36 | 使用图 6-22 的翻译方案来翻译下列赋值语句: 37 | 38 | 1. x = a[i] + b[j] 39 | 2. x = a[i][j] + b[i][j] 40 | 3. ! x = a[b[i][j]][c[k]] 41 | 42 | #### 解答 43 | 44 | 1. x = a[i] + b[j] 45 | 46 | 语法分析树: 47 | 48 | ![6 4 3-1](https://f.cloud.github.com/assets/340282/1085302/1cba4a7e-15ca-11e3-842b-29a5d658b808.gif) 49 | 50 | 三地址代码 51 | 52 | t_1 = i * awidth 53 | t_2 = a[t_1] 54 | t_3 = j * bwidth 55 | t_4 = b[t_3] 56 | t_5 = t_2 + t_4 57 | x = t_5 58 | 59 | 2. x = a[i][j] + b[i][j] 60 | 61 | 语法分析树: 62 | 63 | ![6 4 3-2](https://f.cloud.github.com/assets/340282/1087467/fb6b0634-1618-11e3-9ccc-2044c8c62c8b.gif) 64 | 65 | 三地址代码: 66 | 67 | t_1 = i * ai_width 68 | t_2 = j * aj_width 69 | t_3 = t_1 + t_2 70 | t_4 = a[t_3] 71 | t_5 = i * bi_width 72 | t_6 = j * bj_width 73 | t_7 = t_5 + t_6 74 | t_8 = b[t_7] 75 | t_9 = t_4 + t_8 76 | x = t_9 77 | 78 | 3. ! x = a[b[i][j]][c[k]] 79 | 80 | ### 6.4.4 ! 81 | 82 | 修改图 6-22 的翻译方案,使之适合 Fortran 风格的数据引用,也就是说 n 维数组的引用为 id[E1, E2, …, En] 83 | 84 | #### 解答 85 | 86 | 仅需修改 L 产生式(同图 6-22 一样,未考虑消除左递归) 87 | 88 | L -> id[A] { L.addr = A.addr; 89 | global.array = top.get(id.lexeme); } 90 | 91 | A -> E { A.array = global.array; 92 | A.type = A.array.type.elem; 93 | A.addr = new Temp(); 94 | gen(A.addr '=' E.addr '*' A.type.width; } 95 | 96 | A -> A1,E { A.array = A1.array; 97 | A.type = A1.type.elem; 98 | t = new Temp(); 99 | A.addr = new Temp(); 100 | gen(t '=' E.addr '*' A.type.length); 101 | gen(A.addr '=' A1.addr '+' t); } 102 | 103 | #### 注意 104 | 105 | 令 a 表示一个 i*j 的数组,单个元素宽度为 w 106 | 107 | a.type = array(i, array(j, w)) 108 | a.type.length = i 109 | a.type.elem = array(j, w) 110 | 111 | 112 | ### 6.4.5 113 | 114 | 将公式 6.7 推广到多维数据上,并指出哪些值可以被存放到符号表中并用来计算偏移量。考虑下列情况: 115 | 116 | 1. 一个二维数组 A,按行存放。第一维的下标从 l_1 到 h_1,第二维的下标从 l_2 到 h_2。单个数组元素的宽度为 w。 117 | 2. 其他条件和 1 相同,但是采用按列存放方式。 118 | 3. !一个 k 维数组 A,按行存放,元素宽度为 w,第 j 维的下标从 l_j 到 h_j。 119 | 4. !其他条件和 3 相同,但是采用按列存放方式。 120 | 121 | #### 解答 122 | 123 | 令 n_i 为第 i 维数组的元素个数,计算公式:n_i = h_i - l_i + 1 124 | 125 | 3. A[i_1]]…[i_k] = base + 126 | ( 127 | (i_1 - l_1) * n_2 * … * n_k + 128 | … + 129 | (i_k-1 - l_k-1) * n_k + 130 | (i_k - l_k) 131 | ) * w 132 | 133 | 4. A[i_1]]…[i_k] = base + 134 | ( 135 | (i_1 - l_1) + 136 | (i_2 - l_2) * n_1 + 137 | … + 138 | (i_k - l_k) * n_k-1 * n_k-2 * … * n_1 139 | ) * w 140 | 141 | ### 6.4.6 142 | 143 | 一个按行存放的整数数组 A[i, j] 的下标 i 的范围为 1~10,下标 j 的范围为 1~20。每个整数占 4 个字节。假设数组 A 从 0 字节开始存放,请给出下列元素的位置: 144 | 145 | 1. A[4, 5] 146 | 2. A[10, 8] 147 | 3. A[3, 17] 148 | 149 | #### 解答 150 | 151 | 计算公式:((i-1) * 20 + (j-1)) * 4 152 | 153 | 1. (3 * 20 + 4) * 4 = 256 154 | 2. (9 * 20 + 7) * 4 = 748 155 | 3. (2 * 20 + 16) * 4 = 224 156 | 157 | ### 6.4.7 158 | 159 | 假定 A 是按列存放的,重复练习 6.4.6 160 | 161 | #### 解答 162 | 163 | 计算公式:((j-1) \* 10 + (j-1)) * 4 164 | 165 | 1. (4 * 10 + 3) * 4 = 172 166 | 2. (7 * 10 + 9) * 4 = 316 167 | 3. (16 * 10 + 2) * 4 = 648 168 | 169 | ### 6.4.8 170 | 171 | 一个按行存放的实数型数组 A[i, j, k] 的下标 i 的范围为 1~4,下标 j 的范围为 0~4,且下标 k 的范围为 5~10。每个实数占 8 个字节。假设数组 A 从 0 字节开始存放,计算下列元素的位置: 172 | 173 | 1. A[3, 4, 5] 174 | 2. A[1, 2, 7] 175 | 3. A[4, 3, 9] 176 | 177 | #### 解答 178 | 179 | 计算公式:((i-1) * 5 * 6 + j * 6 + (k-5)) * 8 180 | 181 | 1. ((3-1) * 5 * 6 + 4 * 6 + (5-5)) * 8 = 672 182 | 2. ((1-1) * 5 * 6 + 2 * 6 + (7-5)) * 8 = 112 183 | 3. ((4-1) * 5 * 6 + 3 * 6 + (9-5)) * 8 = 896 184 | 185 | ### 6.4.9 186 | 187 | 假定 A 是按列存放的,重复练习 6.4.8 188 | 189 | #### 解答 190 | 191 | 计算公式:((i-1) + j * 4 + (k-5) * 5 * 4) * 8 192 | 193 | 1. ((3-1) + 4 * 4 + (5-5) * 5 * 4) * 8 = 144 194 | 2. ((1-1) + 2 * 4 + (7-5) * 5 * 4) * 8 = 384 195 | 3. ((4-1) + 3 * 4 + (9-5) * 5 * 4) * 8 = 760 196 | 197 | -------------------------------------------------------------------------------- /ch06/6.4/assets/6.4.3-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.4/assets/6.4.3-1.gif -------------------------------------------------------------------------------- /ch06/6.4/assets/6.4.3-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.4/assets/6.4.3-2.gif -------------------------------------------------------------------------------- /ch06/6.5/6.5.md: -------------------------------------------------------------------------------- 1 | # 6.5 节的练习 2 | 3 | ### 6.5.1 4 | 5 | 假定图 6-26 中的函数 widen 可以处理图 6-25a 的层次结构中的所有类型,翻译下列表达式。假定 c 和 d 是字符类型,s 和 t 是短整型, i 和 j 为整型, x 是浮点型。 6 | 7 | 1. x = s + c 8 | 2. i = s + c 9 | 3. x = (s + c) * (t + d) 10 | 11 | #### 解答 12 | 13 | 1. x = s + c 14 | 15 | t1 = (int) s 16 | t2 = (int) c 17 | t3 = t1 + t2 18 | x = (float) t3 19 | 20 | 2. i = s + c 21 | 22 | t1 = (int) s 23 | t2 = (int) c 24 | i = t1 + t2 25 | 26 | 3. x = (s + c) * (t + d) 27 | 28 | t1 = (int) s 29 | t2 = (int) c 30 | t3 = t1 + t2 31 | t4 = (int) t 32 | t5 = (int) d 33 | t6 = t4 + t5 34 | t7 = t3 + t6 35 | x = (float) t7 36 | 37 | 38 | ### 6.5.2 39 | 40 | 像 Ada 中那样,我们假设每个表达式必须具有唯一的类型,但是我们根据一个子表达式本身只能推导出一个可能类型的集合。也就是说,将函数 E1 应用于参数 E2(文法产生式为 E -> E1(E2))有如下规则: 41 | 42 | E.type = {t | 对 E2.type 中的某个 s, s -> t 在 E1.type 中} 43 | 44 | 描述一个可以确定每个字表达式的唯一类型的 SDD。它首先使用属性 type,按照自底向上的方式综合得到一个可能类型的集合。在确定了整个表达式的唯一类型之后,自顶向下地确定属性 unique 的值,整个属性表示各子表达式的类型。 -------------------------------------------------------------------------------- /ch06/6.6/6.6.md: -------------------------------------------------------------------------------- 1 | # 6.6 节的练习 2 | 3 | ### 6.6.1 4 | 5 | 在图 6-36 的语法制导定义中添加处理下列控制流构造的规则: 6 | 7 | 1. 一个 repeat 语句:repeat S while B 8 | 2. !一个 for 循环语句:for (S1; B; S2) S3 9 | 10 | #### 解答 11 | 12 | Production Syntax Rule 13 | 14 | S -> repeat S1 while B S1.next = newlabel() 15 | B.true = newlabel() 16 | B.false = S.next 17 | S.code = label(B.true) || S1.code 18 | || label(S1.next) || B.code 19 | 20 | S -> for (S1; B; S2) S3 S1.next = newlabel() 21 | B.true = newlabel() 22 | B.false = S.next 23 | S2.next = S1.next 24 | S3.next = newlabel() 25 | S.code = S1.code 26 | || lable(S1.next) || B.code 27 | || lable(B.true) || S3.code 28 | || label(S3.next) || S2.code 29 | || gen('goto', S1.next) 30 | 31 | ### 6.6.2 32 | 33 | 现代计算机试图在同一个时刻执行多条指令,其中包括各种分支指令。因此,当计算机投机性地预先执行某个分支,但实际控制流却进入另一个分支时,付出的代价是很大的。因此我们希望尽可能地减少分支数量。请注意,在图 6-35c 中 while 循环语句的实现中,每个迭代有两个分支:一个是从条件 B 进入到循环体中,另一个分支跳转回 B 的代码。基于尽量减少分支的考虑,我们通常更倾向于将 while(B) S 当作 if(B) {repeat S until !(B)} 来实现。给出这种翻译方法的代码布局,并修改图 6-36 中 while 循环语句的规则。 34 | 35 | #### 解答 36 | 37 | Production Syntax Rule 38 | 39 | S -> if(B) { B.true = newlabel() 40 | repeat S1 B.false = S.next 41 | until !(B) S1.next = newlabel() 42 | } S.code = B.code 43 | || label(B.true) || S1.code 44 | || label(S1.next) || B.code 45 | 46 | ### 6.6.3! 47 | 48 | 假设 C 中存在一个异或运算。按照图 6-37 的风格写出这个运算符的代码生成规则。 49 | 50 | #### 解答 51 | 52 | B1 ^ B2 等价于 !B1 && B2 || B1 && !B2 (运算符优先级 ! > && > ||) 53 | 54 | Production Syntax Rule 55 | 56 | B -> B1 ^ B2 B1.true = newlabel() 57 | B1.false = newlabel() 58 | 59 | B2.true = B.true 60 | B2.false = B1.true 61 | 62 | b3 = newboolean() 63 | b3.code = B1.code 64 | b3.true = newlabel() 65 | b3.false = B.false 66 | 67 | b4 = newboolean() 68 | b4.code = B2.code 69 | b4.true = B.false 70 | b4.false = B.true 71 | 72 | S.code = B1.code 73 | || label(B1.false) || B2.code 74 | || label(B1.true) || b3.code 75 | || label(b3.true) || b4.code 76 | 77 | ### 6.6.4 78 | 79 | 使用 6.6.5 节中介绍的避免 goto 语句的翻译方案,翻译下列表达式: 80 | 81 | 1. if (a==b && c==d || e==f) x == 1 82 | 2. if (a==b || c==d || e==f) x == 1 83 | 3. if (a==b || c==d && e==f) x == 1 84 | 85 | #### 解答 86 | 87 | 1. if (a==b && c==d || e==f) x == 1 88 | 89 | ifFalse a==b goto L3 90 | if c==d goto L2 91 | L3: ifFalse e==f goto L1 92 | L2: x == 1 93 | L1: 94 | 95 | 2. if (a==b || c==d || e==f) x == 1 96 | 97 | if a==b goto L2 98 | if c==d goto L2 99 | ifFalse e==f goto L1 100 | L2: x==1 101 | L1: 102 | 103 | 3. if (a==b || c==d && e==f) x == 1 104 | 105 | if a==b goto L2 106 | ifFalse c==d goto L1 107 | ifFalse e==f goto L1 108 | L2: x==1 109 | L1: 110 | 111 | ### 6.6.5 112 | 113 | 基于图 6-36 和图 6-37 中给出的语法制导定义,给出一个翻译方案。 114 | 115 | ### 6.6.6 116 | 117 | 使用类似于图 6-39 和图 6-40 中的规则,修改图 6-36 和图 6-37 的语义规则,使之允许控制流穿越。 118 | 119 | #### 解答 120 | 121 | 仅补充完毕书中未解答部分 122 | 123 | Production Syntax Rule 124 | 125 | S -> if(B) S1 else S2 B.true = fall 126 | B.false = newlabel() 127 | S1.next = S.next 128 | S2.next = S.next 129 | S.code = B.code 130 | || S1.code 131 | || gen('goto' S1.next) 132 | || label(B.false) || S2.code 133 | 134 | S -> while(B) S1 begin = newlabel() 135 | B.true = fall 136 | B.false = S.next 137 | S1.next = begin 138 | S.code = label(begin) || B.code 139 | || S1.code 140 | || gen('goto' begin) 141 | 142 | S -> S1 S2 S1.next = fall 143 | S2.next = S.next 144 | S.code = S1.code || S2.code 145 | 146 | B -> B1 && B2 B1.true = fall 147 | B1.false = if B.false == fall 148 | then newlabel() 149 | else B.false 150 | B2.true = B.true 151 | B2.false = B.false 152 | B.code = if B.false == fall 153 | then B1.code || B2.code || label(B1.false) 154 | else B1.code || B2.code 155 | 156 | ### 6.6.7! 157 | 158 | 练习 6.6.6 中的语义规则产生了一些不必要的标号。修改图 6-36 中语句的规则,使之只创建必要的标号。你可以使用特殊符号 deferred 来表示还没有创建的一个标号。你的语义规则必须能生成类似于例 6.21 的代码。 159 | 160 | ### 6.6.8!! 161 | 162 | 6.6.5 节中讨论了如何使用穿越代码来尽可能减少生成的中间代码中跳转指令的数据。然而,它并没有充分考虑将一个条件替换为它的补的方法,例如将 `if a < b goto L1; goto L2` 替换成 `ifFalse a >= b goto L2; goto L1`。给出语法制导定义,它在需要时可以利用这种替换方法。 -------------------------------------------------------------------------------- /ch06/6.7/6.7.md: -------------------------------------------------------------------------------- 1 | # 6.7 节的练习 2 | 3 | ### 6.7.1 4 | 5 | 使用图 6-43 中的翻译方案翻译下列表达式。给出每个子表达式的 truelist 和 falselist。你可以假设第一条被生成的指令的地址是 100. 6 | 7 | 1. a==b && (c==d || e==f) 8 | 2. (a==b || c==d) || e==f 9 | 3. (a==b && c==d) && e==f 10 | 11 | #### 解答 12 | 13 | 1. a==b && (c==d || e==f) 14 | 15 | ![6 7 1-1](https://f.cloud.github.com/assets/340282/1251465/1387a7ec-2b2a-11e3-8f3b-a91b7bc31c23.gif) 16 | 17 | ### 6.7.2 18 | 19 | #### 解答 20 | 21 | 1. E3.false = i1 22 | 2. S2.next = i7 23 | 3. E4.false = i7 24 | 4. S1.next = i3 25 | 5. E2.true = i3 26 | 27 | ### 6.7.3 28 | 29 | 当使用图 6-46 中的翻译方案对图 6-47 进行翻译时,我们为每条语句创建 S.next 列表。一开始是赋值语句 S1, S2, S3,然后逐步处理越来越大的 if 语句,if-else 语句,while 语句和语句块。在图 6-47 中有 5 个这种类型的结构语句: 30 | 31 | - S4: while (E3) S1 32 | - S5: if(E4) S2 33 | - S6: 包含 S5 和 S3 的语句块 34 | - S7: if(E2) S4 else S6 35 | - S8: 整个程序 36 | 37 | 对于这些结构语句,我们可以通过一个规则用其他的 Sj.next 列表以及程序中的表达式的列表 Ek.true 和 Ek.false 构造出 Si.next。给出计算下列 next 列表的规则: 38 | 39 | 1. S4.next 40 | 2. S5.next 41 | 3. S6.next 42 | 4. S7.next 43 | 5. S8.next 44 | 45 | #### 解答 46 | 47 | (该题解答不是很肯定) 48 | 49 | 1. S4.next = S3.next 50 | 2. S5.next = S2.next 51 | 3. S6.next = S3.next 52 | 4. S7.next = S3.next 53 | 5. S8.next = E1.false -------------------------------------------------------------------------------- /ch06/6.7/assets/6.7.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch06/6.7/assets/6.7.1-1.gif -------------------------------------------------------------------------------- /ch07/7.2/7.2.6.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int f(int x, int *py, int **ppz) { 4 | **ppz += 1; 5 | *py += 2; 6 | x += 3; 7 | return x + *py + **ppz; 8 | 9 | } 10 | 11 | int main() { 12 | int c = 4; 13 | int *b = &c; 14 | int **a = &b; 15 | printf("%d\n", f(c, b, a)); 16 | } 17 | -------------------------------------------------------------------------------- /ch07/7.2/7.2.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 7.2 2 | 3 | ### 7.2.1 4 | 5 | Suppose that the program of Fig.7.2 uses a partition function that always picks a[m] as the separator v. Also, when the array a[m], … , a[n] is reordered, assume that the order is preserved as much as possible. That is, first come all the elements less than v, in their original order, then all elements equal to v, and finally all elements greater than v, in their original order. 6 | 7 | 1. Draw the activation tree when the numbers 9,8,7,6,5,4,3,2,1 are sorted. 8 | 2. What is the largest number of activation records that ever appear together 9 | on the stack? 10 | 11 | #### Answer 12 | 13 | 1. Draw the activation tree when the numbers 9,8,7,6,5,4,3,2,1 are sorted. 14 | 15 | ![7 2 1-1](https://f.cloud.github.com/assets/340282/1266950/7194fe12-2ca0-11e3-919f-2e6870287e71.gif) 16 | 17 | 2. What is the largest number of activation records that ever appear together 18 | on the stack? 19 | 20 | 9 21 | 22 | ### 7.2.2 23 | 24 | Repeat Exercise 7.2.1 when the initial order of the numbers 25 | is 1,3,5,7,9,2,4,6,8. 26 | 27 | ### 7.2.3 28 | 29 | In Fig. 7.9 is C code to compute Fibonacci numbers recur­sively. Suppose that the activation record for f includes the following elements in order: (return value, argument n, local s, local t); there will normally be other elements in the activation record as well. The questions below assume that the initial call is f(5). 30 | 31 | int f(int n) { 32 | int t, s; 33 | if (n < 2) return 1; 34 | s = f(n-1); 35 | t = f(n-2); 36 | return s+t; 37 | } 38 | 39 | Figure 7.9: Fibonacci program for Exercise 7.2.3 40 | 41 | 42 | 1. Show the complete activation tree. 43 | 2. What dose the stack and its activation records look like the first time f(1) is about to return? 44 | 3. ! What does the stack and its activation records look like the fifth time f(1) is about to return? 45 | 46 | #### Answer 47 | 48 | 1. Show the complete activation tree. 49 | 50 | ![7 2 3-1](https://f.cloud.github.com/assets/340282/1266985/e78871c4-2ca2-11e3-831c-501caa1fecde.gif) 51 | 52 | 2. What dose the stack and its activation records look like the first time f(1) is about to return? 53 | 54 | ![7 2 3-2](https://f.cloud.github.com/assets/340282/1267036/7d762abc-2ca5-11e3-8ad9-c62bdc30bc7f.gif) 55 | 56 | 57 | 3. ! What does the stack and its activation records look like the fifth time f(1) is about to return? 58 | 59 | ![7 2 3-3](https://f.cloud.github.com/assets/340282/1267034/6196935e-2ca5-11e3-9d4d-c9e4aa861842.gif) 60 | 61 | 62 | 63 | ### 7.2.4 64 | 65 | Here is a sketch of two C functions f and g: 66 | 67 | int f(int x){int i;...return i+1;...} 68 | int g(int y) {int j;...f(j+1). ..} 69 | 70 | That is, function g calls f. Draw the top of the stack, starting with the acti­vation record for g, after g calls f, and f is about to return. You can consider only return values, parameters, control links, and space for local variables; you do not have to consider stored state or temporary or local values not shown in the code sketch. However, you should indicate: 71 | 72 | 1. Which function creates the space on the stack for each element? 73 | 2. Which function writes the value of each element? 74 | 3. To which activation record does the element belong? 75 | 76 | #### Answer 77 | 78 | ![7 2 4](https://f.cloud.github.com/assets/340282/1267088/836d0550-2ca8-11e3-923d-757450951b13.gif) 79 | 80 | 81 | ### 7.2.5 82 | 83 | In a language that passes parameters by reference, there is a function f(x, y) that does the following: 84 | 85 | x = x + 1; 86 | y = y + 2; 87 | return x+y; 88 | 89 | If a is assigned the value 3, and then f(a, a) is called, what is returned? 90 | 91 | #### Answer 92 | 93 | x = x + 1 -> a = a + 1 -> now a is 4 94 | y = y + 2 -> a = a + 2 -> now a is 6 95 | x + y -> a + a -> 6 + 6 -> 12 96 | 97 | f(a, a) is 12 98 | 99 | ### 7.2.6 100 | 101 | The C function f is defined by: 102 | 103 | int f(int x, *py, **ppz) { 104 | **ppz += 1; 105 | *py += 2; 106 | x += 3; 107 | return x+y+z; 108 | } 109 | 110 | Variable a is a pointer to b; variable b is a pointer to c, and c is an integer currently with value 4. If we call f(c, b, a) , what is returned? 111 | 112 | #### Answer 113 | 114 | f(c, b, a) is 21 115 | 116 | view [source code](7.2.6.c) 117 | 118 | mind that c is passed by value, so the process is: 119 | 120 | sentence x in f() x out of f() *py **ppz 121 | 122 | **ppz += 1; 4 5 5 5 123 | *py += 2; 4 7 7 7 124 | x += 3; 7 7 7 7 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /ch07/7.2/assets/7.2.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.2/assets/7.2.1-1.gif -------------------------------------------------------------------------------- /ch07/7.2/assets/7.2.3-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.2/assets/7.2.3-1.gif -------------------------------------------------------------------------------- /ch07/7.2/assets/7.2.3-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.2/assets/7.2.3-2.gif -------------------------------------------------------------------------------- /ch07/7.2/assets/7.2.3-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.2/assets/7.2.3-3.gif -------------------------------------------------------------------------------- /ch07/7.2/assets/7.2.4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.2/assets/7.2.4.gif -------------------------------------------------------------------------------- /ch07/7.2/assets/7.2.4.graphml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | int y 24 | -------------------- 25 | g(y) 26 | -------------------- 27 | point to caller of g 28 | -------------------- 29 | int j 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | int x 66 | -------------------- 67 | f(x) 68 | -------------------- 69 | point to g 70 | -------------------- 71 | int i 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /ch07/7.3/7.3.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 7.3 2 | 3 | ### 7.3.1 4 | 5 | In Fig. 7.15 is a ML function main that computes Fibonacci numbers in a nonstandard way. Function fibO will compute the nth Fibonacci number for any n >= O. Nested within in is fib1, which computes the nth Fibonacci number on the assumption n >= 2, and nested within fib1 is fib2, which assumes n >= 4. Note that neither fib1 nor fib2 need to check for the basis cases. Show the stack of activation records that result from a call to main, up until the time that the first call (to fibO(1)) is about to return. Show the access link in each of the activation records on the stack. 6 | 7 | 8 | fun main() { 9 | let 10 | fun fibO(n) 11 | let 12 | fun fib1(n) = 13 | let 14 | fun fib2(n) = fib1(n-l) + fib1(n-2) 15 | in 16 | if n >= 4 then fib2(n) 17 | else fibO(n-l) + fibO(n-2) 18 | end 19 | in 20 | if n >= 2 then fib1(n) else 1 21 | end 22 | in 23 | fibO(4) 24 | end ; 25 | 26 | Figure 7.15: Nested functions computing Fibonacci numbers 27 | 28 | #### Answer 29 | 30 | activation tree: 31 | 32 | ![7 3 1-activation-tree](https://f.cloud.github.com/assets/340282/1274670/bccd7442-2dc0-11e3-9f3f-7c8122b10240.gif) 33 | 34 | activation stack when first call to fib0(1) is about to return: 35 | 36 | ![7 3 1-activation-stack](https://f.cloud.github.com/assets/340282/1274682/718986ea-2dc2-11e3-8e80-a450f8cb17d3.gif) 37 | 38 | ### 7.3.2 39 | 40 | Suppose that we implement the functions of Fig. 7.15 using a display. Show the display at the moment the first call to fibO(1) is about to return. Also, indicate the saved display entry in each of the activation records on the stack at that time. 41 |  42 | #### Answer 43 | 44 | ![7 3 2](https://f.cloud.github.com/assets/340282/1274690/d564dbc8-2dc3-11e3-828e-4740db58898d.gif) 45 | 46 | -------------------------------------------------------------------------------- /ch07/7.3/assets/7.3.1-activation-stack.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.3/assets/7.3.1-activation-stack.gif -------------------------------------------------------------------------------- /ch07/7.3/assets/7.3.1-activation-tree.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.3/assets/7.3.1-activation-tree.gif -------------------------------------------------------------------------------- /ch07/7.3/assets/7.3.2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.3/assets/7.3.2.gif -------------------------------------------------------------------------------- /ch07/7.4/7.4.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 7.4 2 | 3 | ### 7.4.1 4 | 5 | Suppose the heap consists of seven chunks, starting at address 0. The sizes of the chunks, in order, are 80, 30, 60, 50, 70, 20, 40 bytes. When we place an object in a chunk, we put it at the high end if there is enough space remaining to form a smaller chunk (so that the smaller chunk can easily remain on the linked list of free space) . However , we cannot tolerate chunks of fewer that 8 bytes, so if an object is almost as large as the selected chunk, we give it the entire chunk and place the object at the low end of the chunk. If we request space for objects of the following sizes: 32, 64, 48, 16, in that order, what does the free space list look like after satisfying the requests, if the method of selecting chunks is 6 | 7 | 1. First fit. 8 | 2. Best fit. 9 | 10 | #### Answer 11 | 12 | values in parentheses are sizes actually in use 13 | 14 | 1. First fit. 15 | 16 | 48, 32(32), 14, 16(16), 60, 50(48), 70(64), 20, 40 17 | 18 | 2. Best fit. 19 | 20 | 80, 30, 60, 50(48), 70(64), 20(16), 8, 32(32) 21 | -------------------------------------------------------------------------------- /ch07/7.5/7.5.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 7.5 2 | 3 | ### 7.5.1 4 | 5 | What happens to the reference counts of the objects in Fig. 7.19 if: 6 | 7 | 1. The pointer from A to B is deleted. 8 | 2. The pointer from X to A is deleted. 9 | 3. The node C is deleted. 10 | 11 | ![A network of objects](https://f.cloud.github.com/assets/340282/1276364/72fc7390-2e60-11e3-8816-a5a535a9d67a.png) 12 | 13 | Figure 7.19: A network of objects 14 | 15 | #### Answer 16 | 17 | 1. The pointer from A to B is deleted. 18 | 19 | ![7 5 1-1](https://f.cloud.github.com/assets/340282/1276399/35528b7e-2e65-11e3-8056-8c54e0dae1c8.gif) 20 | 21 | 22 | 2. The pointer from X to A is deleted. 23 | 24 | ![7 5 1-2](https://f.cloud.github.com/assets/340282/1276400/42f0b468-2e65-11e3-940a-e7f7b4cce5da.gif) 25 | 26 | 27 | 3. The node C is deleted. 28 | 29 | ![7 5 1-3](https://f.cloud.github.com/assets/340282/1276401/4c9b1d14-2e65-11e3-9ae7-d4dd4f75bd40.gif) 30 | 31 | 32 | ### 7.5.2 33 | 34 | What happens to reference counts when the pointer from A to D in Fig. 7.20 is deleted? 35 | 36 | ![Another network of objects](https://f.cloud.github.com/assets/340282/1276366/87f56964-2e60-11e3-9d88-fd56f7e2d3f4.png) 37 | 38 | Figure 7.20: Another network of objects 39 | 40 | #### Answer 41 | 42 | ![7 5 2](https://f.cloud.github.com/assets/340282/1276406/eac449f2-2e65-11e3-8e4c-def958552810.gif) 43 | -------------------------------------------------------------------------------- /ch07/7.5/assets/7.5.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.5/assets/7.5.1-1.gif -------------------------------------------------------------------------------- /ch07/7.5/assets/7.5.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.5/assets/7.5.1-2.gif -------------------------------------------------------------------------------- /ch07/7.5/assets/7.5.1-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.5/assets/7.5.1-3.gif -------------------------------------------------------------------------------- /ch07/7.5/assets/7.5.2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.5/assets/7.5.2.gif -------------------------------------------------------------------------------- /ch07/7.6/7.6.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 7.6 2 | 3 | ### 7.6.1 4 | 5 | Show the steps of a mark-and-sweep garbage collector on 6 | 7 | 1. Fig. 7.19 with the pointer A to B deleted. 8 | 2. Fig. 7.19 with the pointer A to C deleted. 9 | 3. Fig. 7.20 with the pointer A to D deleted. 10 | 4. Fig. 7.20 with the object B deleted. 11 | 12 | #### Answer 13 | 14 | 1. Fig. 7.19 with the pointer A to B deleted. 15 | 16 | before: A.reached = … = I.reached = 0 17 | Unscanned = [] 18 | 19 | 20 | line1: A.reached = 1 21 | Unscanned.push(A) 22 | 23 | line2~7: 24 | 25 | loop1: Unscanned.shift() 26 | C.reached = 1 27 | Unscanned.push( C ) 28 | 29 | loop2: Unscanned.shift() 30 | F.reached = 1 31 | Uncanned.push(F) 32 | 33 | loop3: Unscanned.shift() 34 | H.reached = 1 35 | Uncanned.push(H) 36 | 37 | loop4: Unscanned.shift() 38 | I.reached = 1 39 | Uncanned.push(I) 40 | 41 | loop5: Unscanned.shift() 42 | G.reached = 1 43 | Uncanned.push(G) 44 | 45 | loop6: Unscanned.shift() 46 | E.reached = 1 47 | Uncanned.push(E) 48 | 49 | loop7: Unscanned.shift() 50 | // no more object add to list Unscanned 51 | // now it is empty, loop end 52 | 53 | line8: Free = [] 54 | 55 | line9~11: Free = [B, D] 56 | A.reached = C.reached = E.reached = … = I.reached = 0 57 | 58 | 59 | ### 7.6.2 60 | 61 | The Baker mark-and-sweep algorithm moves objects among four lists: Free, Unreached, Unscanned, and Scanned. For each of the object networks of Exercise 7.6.1, indicate for each object the sequence of lists on which it finds itself from just before garbage collection begins until just after it finishes. 62 | 63 | #### Answer 64 | 65 | 1. Fig. 7.19 with the pointer A to B deleted. 66 | 67 | line1: Free = [] // assume it is empty 68 | Unreached = [A, B, C, D, E, F, G, H, I] 69 | Unscanned = [] 70 | Scanned = [] 71 | 72 | line2: Unscanned = [A] 73 | Unreached = [B, C, D, E, F, G, H, I] 74 | 75 | line3~7: 76 | 77 | loop1: Scanned = [A] 78 | Unscanned = [C] 79 | Unreached = [B, D, E, F, G, H, I] 80 | 81 | loop2: Scanned = [A, C] 82 | Unscanned = [F] 83 | Unreached = [B, D, E, G, H, I] 84 | 85 | loop3: Scanned = [A, C, F] 86 | Unscanned = [H] 87 | Unreached = [B, D, E, G, I] 88 | 89 | loop4: Scanned = [A, C, F, H] 90 | Unscanned = [I] 91 | Unreached = [B, D, E, G] 92 | 93 | loop5: Scanned = [A, C, F, H, I] 94 | Unscanned = [G] 95 | Unreached = [B, D, E] 96 | 97 | loop6: Scanned = [A, C, F, H, I, G] 98 | Unscanned = [E] 99 | Unreached = [B, D] 100 | 101 | loop7: Scanned = [A, C, F, H, I, G, E] 102 | Unscanned = [] 103 | Unreached = [B, D] 104 | 105 | line8: Free = [B, D] 106 | 107 | line9: Unreached = [A, C, F, H, I, G, E] 108 | 109 | 110 | ### 7.6.3 111 | 112 | Suppose we perform a mark-and-compact garbage collection on each of the networks of Exercise 7.6.1. Also, suppose that 113 | 114 | 1. Each object has size 100 bytes, and 115 | 2. Initially, the nine objects in the heap are arranged in alphabetical order, 116 | starting at byte 0 of the heap. 117 | 118 | What is the address of each object after garbage collection? 119 | 120 | #### Answer 121 | 122 | 1. Fig. 7.19 with the pointer A to B deleted. 123 | 124 | A(0), C(100), E(200), F(300), G(400), H(500), I(600) 125 | 126 | ### 7.6.4 127 | 128 | Suppose we execute Cheney's copying garbage collection al­gorithm on each of the networks of Exercise 7.6.1. Also, suppose that 129 | 130 | 1. Each object has size 100 bytes, 131 | 2. The unscanned list is managed as a queue, and when an object has more than one pointer, the reached objects are added to the queue in alpha­ betical order, and 132 | 3. The From semispace starts at location 0, and the To semispace starts at location 10,000. 133 | 134 | What is the value of NewLocation(o) for each object o that remains after garbage collection? 135 | 136 | #### Answer 137 | 138 | 1. Fig. 7.19 with the pointer A to B deleted. 139 | 140 | A(10000), C(10100), F(10200), H(10300), I(10400), G(10500), E(10600) 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /ch07/7.7/7.7.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 7.7 2 | 3 | ### 7.7.1 4 | 5 | Suppose that the network of objects from Fig.7.20 is managed by an incremental algorithm that uses the four lists Unreached, Unscanned, Scanned, and Free, as in Baker's algorithm. To be specific, the Unscanned list is managed as a queue, and when more than one object is to be placed on this list due to the scanning of one object, we do so in alphabetical order. Suppose also that we use write barriers to assure that no reachable object is made garbage. Starting with A and B on the Unscanned list, suppose the following events occur: 6 | 7 | 8 | 1. A is scanned. 9 | 2. The pointer A -> D is rewritten to be A -> H. 10 | 3. B is scanned. 11 | 4. D is scanned. 12 | 6. The pointer B -> C is rewritten to be B -> I. 13 | 14 | Simulate the entire incremental garbage collection, assuming no more pointers are rewritten. Which objects are garbage? Which objects are placed on the Free list? 15 | 16 | #### Answer 17 | 18 | 19 | 0. init 20 | 21 | ![Another network of objects](https://f.cloud.github.com/assets/340282/1276366/87f56964-2e60-11e3-9d88-fd56f7e2d3f4.png) 22 | 23 | Free = [] 24 | Unreached = [C, D, E, F, G, H, I] 25 | Uscanned = [A, B] 26 | Scanned = [] 27 | 28 | 1. A is scanned. 29 | 30 | Unreached = [C, F, G, H, I] 31 | Uscanned = [B, D, E] 32 | Scanned = [A] 33 | 34 | 35 | 2. The pointer A -> D is rewritten to be A -> H. 36 | 37 | ![7 7 1-1](https://f.cloud.github.com/assets/340282/1313843/079b394a-3263-11e3-8659-a54bcc5ea3d8.gif) 38 | 39 | Unreached = [C, F, G, I] 40 | Uscanned = [B, D, E, H] 41 | Scanned = [A] 42 | 43 | 3. B is scanned. 44 | 45 | Unreached = [F, G, I] 46 | Uscanned = [D, E, H, C] 47 | Scanned = [A, B] 48 | 49 | 4. D is scanned. 50 | 51 | Unreached = [F, G, I] 52 | Uscanned = [E, H, C] 53 | Scanned = [A, B, D] 54 | 55 | 6. The pointer B -> C is rewritten to be B -> I. 56 | 57 | 58 | ![7 7 1-2](https://f.cloud.github.com/assets/340282/1313847/144a01e4-3263-11e3-8037-b09e2c3b03f4.gif) 59 | 60 | Unreached = [F, G] 61 | Uscanned = [E, H, C, I] 62 | Scanned = [A, B, D] 63 | 64 | 65 | 7. E is scanned. 66 | 67 | Unreached = [F, G] 68 | Uscanned = [H, C, I] 69 | Scanned = [A, B, D, E] 70 | 71 | 8. H is scanned. 72 | 73 | Unreached = [F, G] 74 | Uscanned = [C, I] 75 | Scanned = [A, B, D, E, H] 76 | 77 | 9. C is scanned. 78 | 79 | Unreached = [F, G] 80 | Uscanned = [I] 81 | Scanned = [A, B, D, E, H, C] 82 | 83 | 10. I is scanned. 84 | 85 | Unreached = [F, G] 86 | Uscanned = [] 87 | Scanned = [A, B, D, E, H, C, I] 88 | 89 | 11. end 90 | 91 | Free = [F, G] 92 | Unreached = [A, B, D, E, H, C, I] 93 | Unscanned = [] 94 | Scanned = [] 95 | 96 | 97 | so, `[C, D, F, G]` is garbage, Free list is `[F, G]`. 98 | 99 | 100 | ### 7.7.2 101 | 102 | Repeat Exercise 7.7.1 on the assumption that 103 | 104 | 1. Events (2) and (5) are interchanged in order. 105 | 2. Events (2) and (5) occur before (1), (3), and (4). 106 | 107 | 108 | #### Answer 109 | 110 | 111 | 1. Events (2) and (5) are interchanged in order. 112 | 113 | omit 114 | 115 | 2. Events (2) and (5) occur before (1), (3), and (4). 116 | 117 | 0. init 118 | 119 | ![Another network of objects](https://f.cloud.github.com/assets/340282/1276366/87f56964-2e60-11e3-9d88-fd56f7e2d3f4.png) 120 | 121 | Free = [] 122 | Unreached = [C, D, E, F, G, H, I] 123 | Uscanned = [A, B] 124 | Scanned = [] 125 | 126 | 1. The pointer A -> D is rewritten to be A -> H. 127 | 128 | ![7 7 1-1](https://f.cloud.github.com/assets/340282/1313843/079b394a-3263-11e3-8659-a54bcc5ea3d8.gif) 129 | 130 | Unreached = [C, D, E, F, G, I] 131 | Uscanned = [A, B, H] 132 | 133 | 2. The pointer B -> C is rewritten to be B -> I. 134 | 135 | 136 | ![7 7 1-2](https://f.cloud.github.com/assets/340282/1313847/144a01e4-3263-11e3-8037-b09e2c3b03f4.gif) 137 | 138 | Unreached = [C, D, E, F, G] 139 | Uscanned = [A, B, H, I] 140 | 141 | 3. A is scanned. 142 | 143 | Unreached = [C, D, F, G] 144 | Unscanned = [B, H, I, E] 145 | Scanned = [A] 146 | 147 | 4. B is scanned. 148 | 149 | Unreached = [C, D, F, G] 150 | Unscanned = [H, I, E] 151 | Scanned = [A, B] 152 | 153 | 5. H is scanned. 154 | 155 | Unreached = [C, D, F, G] 156 | Unscanned = [I, E] 157 | Scanned = [A, B, H] 158 | 159 | 5. I is scanned. 160 | 161 | Unreached = [C, D, F, G] 162 | Unscanned = [E] 163 | Scanned = [A, B, H, I] 164 | 165 | 5. E is scanned. 166 | 167 | Unreached = [C, D, F, G] 168 | Unscanned = [] 169 | Scanned = [A, B, H, I, E] 170 | 171 | 6. end 172 | 173 | Free = [C, D, F, G] 174 | Unreached = [A, B, H, I, E] 175 | Unscanned = [] 176 | Scanned = [] 177 | 178 | so, `[C, D, F, G]` is garbage, Free list also is `[C, D, F, G]`. 179 | 180 | ### 7.7.3 181 | 182 | Suppose the heap consists of exactly the nine cars on three trains shown in Fig. 7.30 (i.e., ignore the ellipses). Object o in car 11 has references from cars 12, 23, and 32. When we garbage collect car 11, where might o wind up? 183 | 184 | #### Answer 185 | 186 | if any room in trains 2 and 3 187 | o can go in some existing car of either trains 2 and 3. 188 | else 189 | o can go in a new, last car of either trains 2 and 3. 190 | 191 | 192 | 193 | ### 7.7.4 194 | 195 | Repeat Exercise 7.7.3 for the cases that o has 196 | 197 | 1. Only references from cars 22 and 31. 198 | 2. No references other than from car 11. 199 | 200 | #### Answer 201 | 202 | 1. Only references from cars 22 and 31. 203 | 204 | The same with Exercise 7.7.3. 205 | 206 | 2. No references other than from car 11. 207 | 208 | if there is room in car 12 209 | o can go in car 12 210 | else if there is room in other cars of train 1 211 | o can go in any car has room 212 | else 213 | o can go in a new, last car of train 1 214 | 215 | ### 7.7.5 216 | 217 | Suppose the heap consists of exactly the nine cars on three trains shown in Fig. 7.30 (i.e., ignore the ellipses). We are currently in panic mode. Object o1 in car 11 has only one reference, from object o2 in car 12. That reference is rewritten. When we garbage collect car 11, what could happen to o1? 218 | 219 | #### Answer 220 | 221 | It is not important which train we move it to, as long as it is not the first train? 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /ch07/7.7/assets/7.7.1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.7/assets/7.7.1-1.gif -------------------------------------------------------------------------------- /ch07/7.7/assets/7.7.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch07/7.7/assets/7.7.1-2.gif -------------------------------------------------------------------------------- /ch08/8.2/8.2.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 8.2 2 | 3 | ### 8.2.1 4 | 5 | Generate code for the following three-address statements assuming all variables are stored in memory locations. 6 | 7 | 1. x = 1 8 | 2. x = a 9 | 3. x = a + 1 10 | 4. x = a + b 11 | 5. The two statements 12 | - x = b * c 13 | - y = a + x 14 | 15 | #### answer 16 | 17 | 1. LD R1, #1 18 | ST x, R1 19 | 20 | 2. LD R1, a 21 | ST x, R1 22 | 23 | 3. LD R1, a 24 | ADD R1, R1, #1 25 | ST x, R1 26 | 27 | 4. LD R1, a 28 | LD R2, b 29 | ADD R1, R1, R2 30 | ST x, R1 31 | 32 | 5. LD R1, b 33 | LD R2, c 34 | MUL R1, R1, R2 35 | LD R3, a 36 | ADD R3, R3, R1 37 | ST y, R3 38 | 39 | Note:第 5 小题,可以在生成的汇编码第三行后插入 `ST x, R1` 和 `LD R1, x` 两句,这两句属于冗余代码(redundant store-load)。使用简易代码生成策略很容易生成这种冗余代码,慢是慢一些但是也是正确的,有专门处理这种问题的优化(redundant store-load elimination),所以生不生成在这题的答案里感觉都行。 40 | 41 | ### 8.2.2 42 | 43 | Generate code for the following three-address statements assuming a and b are arrays whose elements are 4-byte values. 44 | 45 | 1. The four-statement sequence 46 | 47 | x = a[i] 48 | y = b[j] 49 | a[i] = y 50 | b[j] = x 51 | 52 | 2. The three-statement sequence 53 | 54 | x = a[i] 55 | y = b[i] 56 | z = x * y 57 | 58 | 3. The three-statement sequence 59 | 60 | x = a[i] 61 | y = b[x] 62 | a[i] = y 63 | 64 | #### answer 65 | 66 | 1. LD R1, i 67 | MUL R1, R1, #4 68 | LD R2, a(R1) 69 | LD R3, j 70 | MUL R3, R3, #4 71 | LD R4, b(R3) 72 | ST a(R1), R4 73 | ST b(R3), R2 74 | 75 | 2. LD R1, i 76 | MUL R1, R1, #4 77 | LD R2, a(R1) 78 | LD R1, b(R1) 79 | MUL R1, R2, R1 80 | ST z, R1 81 | 82 | 3. LD R1, i 83 | MUL R1, R1, #4 84 | LD R2, a(R1) 85 | MUL R2, R2, #4 86 | LD R2, b(R2) 87 | ST a(R1), R2 88 | 89 | ### 8.2.3 90 | 91 | Generate code for the following three-address sequence assuming that p and q are in memory locations: 92 | 93 | y = *q 94 | q = q + 4 95 | *p = y 96 | p = p + 4 97 | 98 | #### answer 99 | 100 | LD R1, q 101 | LD R2, 0(R1) 102 | ADD R1, R1, #4 103 | ST q, R1 104 | LD R1, p 105 | ST 0(R1), R2 106 | ADD R1, R1, #4 107 | ST p, R1 108 | 109 | ### 8.2.4 110 | 111 | Generate code for the following sequence assuming that x, y, and z are in memory locations: 112 | 113 | if x < y goto L1 114 | z = 0 115 | goto L2 116 | L1: z = 1 117 | 118 | #### answer 119 | 120 | LD R1, x 121 | LD R2, y 122 | SUB R1, R1, R2 123 | BLTZ R1, L1 124 | LD R1, #0 125 | ST z, R1 126 | BR L2 127 | L1: LD R1, #1 128 | ST z, R1 129 | 130 | Note:实际生成代码时会把标签对应到具体的数字地址上,但这小节还没到那一步,把原本题目里的标签名拿来随便写写就好啦。 131 | 132 | ### 8.2.5 133 | 134 | Generate code for the following sequence assuming that n is in a memory location: 135 | 136 | s = 0 137 | i = 0 138 | L1: if i > n goto L2 139 | s = s + i 140 | i = i + 1 141 | goto L1 142 | L2: 143 | 144 | #### answer 145 | 146 | Long version: 147 | 148 | LD R1, #0 149 | ST s, R1 150 | ST i, R1 151 | L1: LD R1, i 152 | LD R2, n 153 | SUB R2, R1, R2 154 | BGTZ R2, L2 155 | LD R2, s 156 | ADD R2, R2, R1 157 | ST s, R2 158 | ADD R1, R1, #1 159 | ST i, R1 160 | BR L1 161 | L2: 162 | 163 | Short version: 164 | 165 | LD R2, #0 166 | LD R1, R2 167 | LD R3, n 168 | L1: SUB R4, R1, R3 169 | BGTZ R4, L2 170 | ADD R2, R2, R1 171 | ADD R1, R1, #1 172 | BR L1 173 | L2: 174 | 175 | Note:短版本的优化 1)消除冗余存-读 2)循环不变代码外提 3)然后外加寄存器分配 176 | 177 | ### 8.2.6 178 | 179 | Determine the costs of the following instruction sequences: 180 | 181 | 1. LD R0, y 182 | LD R1, z 183 | ADD R0, R0, R1 184 | ST x, R0 185 | 186 | 2. LD R0, i 187 | MUL R0, R0, 8 188 | LD R1, a(R0) 189 | ST b, R1 190 | 191 | 3. LD R0, c 192 | LD R1, i 193 | MUL R1, R1, 8 194 | ST a(R1),R0 195 | 196 | 4. LD R0, p 197 | LD R1, 0(R0) 198 | ST x, R1 199 | 200 | 5. LD R0, p 201 | LD R1, x 202 | ST 0(R0), R1 203 | 204 | 6. LD R0, x 205 | LD R1, y 206 | SUB R0, R0, R1 207 | BLTZ *R3, R0 208 | 209 | #### answer 210 | 211 | 1. 2 + 2 + 1 + 2 = 7 212 | 2. 2 + 2 + 2 + 2 = 8 213 | 3. 2 + 2 + 2 + 2 = 8 214 | 4. 2 + 2 + 2 = 6 215 | 5. 2 + 2 + 2 = 6 216 | 6. 2 + 2 + 1 + 1 = 6 217 | 218 | Note:这本书用的指令集没明确定义所有指令的细节,但看起来所谓用变量名来指定内存地址实际上隐含着这些变量是静态分配的假设,也就是说在真正生成完的指令里这些变量名都会被替换为它们对应的数字形式的地址常量,而地址存在指令后的一个额外的word里,这就算多一单位的开销。 219 | 220 | --- 221 | 222 | ### Note 223 | 224 | 1. 很明显本节内容写得非常随意,推荐数字常量是应该都加#前缀的,除了放在地址里用。比如 `LD R1, #1` 和 `ADD R1, R1, #1`。 225 | 226 | 2. 本书中 Ri 表示第 i 号寄存器。 227 | 228 | 1. 在翻译成汇编码的过程中,是可以随意指定 i 的值(比如 R3, R4, R1000)呢还是会有某种限制? 229 | 230 | 回答:现在暂时随意。等后面说寄存器个数有限制的时候再考虑有限制的情况。 231 | 232 | 2. 另外,如果代码中所示的 R1 在后面的代码中用不着了,那么新的值是不是可以被加载到 R1 中?如果可以的话,如何知道之前的 R1 用不着了? 233 | 234 | 回答:可以覆盖。至于如何知道前面的值死了就要看 def-use 链。这是优化的重要问题。例如9.2.5小节讲 live variable 就跟这个有关。 235 | 236 | 3. b = a[i] 对应的汇编码: 237 | 238 | ``` 239 | LD R1, i 240 | MUL R1, R1, 8 241 | LD R2, a(R1) 242 | ... 243 | ``` 244 | 245 | 其中 a 为什么不需要先 load 到寄存器? 246 | 247 | 回答:这里隐含一个假设:变量是静态分配存储的。后面涉及不是静态变量的时候情况会有变化。 248 | -------------------------------------------------------------------------------- /ch08/8.3/8.3.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 8.3 2 | 3 | ### 8.3.1 4 | 5 | Generate code for the following three-address statements assuming stack 6 | allocation where register SP points to the top of the stack. 7 | 8 | ``` 9 | call p 10 | call q 11 | return 12 | call r 13 | return 14 | return 15 | ``` 16 | 17 | #### Answer 18 | 19 | ``` 20 | 100: LD SP, #stackStart 21 | 108: ADD SP, SP, #psize 22 | 116: ST *SP, #132 23 | 124: BR pStart 24 | 132: SUB SP, SP, #psize 25 | 140: ADD SP, SP, #qsize 26 | 148: ST *SP, #164 27 | 156: BR qStart 28 | 164: SUB SP, SP, #qsize 29 | 172: BR **SP 30 | ``` 31 | 32 | 33 | ### 8.3.2 34 | 35 | Generate code for the following three-address statements assuming stack 36 | allocation where register SP points to the top of the stack. 37 | 38 | 1. x = 1 39 | 2. x=a 40 | 3. x = a + 1 41 | 4. x = a+b 42 | 5. The two statements 43 | - x = b * c 44 | - y = a + x 45 | 46 | ### 8.3.3 47 | 48 | Generate code for the following three-address statements again assuming stack 49 | allocation and assuming a and b are arrays whose elements are 4-byte values. 50 | 51 | 1. The four-statement sequence 52 | 53 | ``` 54 | x = a[i] 55 | y = b[j] 56 | a[i] = y 57 | b[j] = x 58 | ``` 59 | 2. The three-statement sequence 60 | 61 | ``` 62 | x = a[i] 63 | y = b[i] 64 | z = x * y 65 | ``` 66 | 67 | 3. The three-statement sequence 68 | 69 | ``` 70 | x = a[i] 71 | y = b[x] 72 | a[i] = y 73 | ``` 74 | --- 75 | 76 | ## Note 77 | 78 | #### 1. 指令长度 79 | 80 | ``` 81 | 120: ST 364, #140 82 | 132: BR 200 83 | 140: ACTION2 84 | ``` 85 | 图 8-4 部分代码 86 | 87 | - 每行指令前面的标号代表了这行代码的起始位置(即偏移量),和下一行指令的标号差代表这行指令的长度。 88 | - 第一行有 1 个指令和 2 个常量,所以指令长度是 12,同理第二行有 1 个指令和 1 个常量,所以长度为 8. 89 | 90 | 91 | ``` 92 | 100: LD, SP, #600 93 | 108: ACTION1 94 | 128: ADD SP, SP, #msize 95 | 136: ST *SP, #152 96 | ``` 97 | 图 8-6 部分代码 98 | 99 | - 由于 SP 不占空间,所以上图中的几行指令长度均为 8。 100 | -------------------------------------------------------------------------------- /ch08/8.4/assets/8.4.1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch08/8.4/assets/8.4.1-2.gif -------------------------------------------------------------------------------- /ch08/8.4/assets/8.4.2-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch08/8.4/assets/8.4.2-2.gif -------------------------------------------------------------------------------- /ch08/8.5/8.5.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 8.5 2 | 3 | ### 8.5.1 4 | 5 | Construct the DAG for the basic block 6 | 7 | ``` 8 | d = b * c 9 | e = a + b 10 | b = b * c 11 | a = e - d 12 | ``` 13 | 14 | #### Answer 15 | 16 | ![8 5 1](https://f.cloud.github.com/assets/340282/1357594/df203a26-379c-11e3-970b-349a410c6cb5.gif) 17 | 18 | ### 8.5.2 19 | 20 | Simplify the three-address code of Exercise 8.5.1, assuming 21 | 22 | 1. Only a is live on exit from the block. 23 | 2. a, b, and c are live on exit from the block. 24 | 25 | #### Answer 26 | 27 | 1. Only a is live on exit from the block. 28 | 29 | ``` 30 | e = a + b 31 | d = b * c 32 | a = e - d 33 | ``` 34 | 35 | 2. a, b, and c are live on exit from the block. 36 | 37 | ``` 38 | e = a + b 39 | b = b * c 40 | a = e - b 41 | ``` 42 | 43 | ### 8.5.3 44 | 45 | Construct the basic block for the code in block B6 of Fig. 8.9. Do not forget to include the comparison i <= 10. 46 | 47 | #### Answer 48 | 49 | ![8 5 3](https://f.cloud.github.com/assets/340282/1365769/12f90536-388c-11e3-9892-643783ee9915.gif) 50 | 51 | #### 疑问 52 | 53 | - “Construct the basic block” 被翻译成 “构造 DAG”,是这个意思吗? 54 | - 如何为一个 “if goto” 语句 construct the basic block? 55 | 56 | 57 | ### 8.5.4 58 | 59 | Construct the DAG for the code in block B3 of Fig. 8.9. 60 | 61 | #### Answer 62 | 63 | ![8 5 4](https://f.cloud.github.com/assets/340282/1365782/396accf2-388e-11e3-946b-7154333ba871.gif) 64 | 65 | ### 8.5.5 66 | 67 | Extend Algorithm 8.7 to process three-statements of the form 68 | 69 | 1. a[i] = b 70 | 2. a = b[i] 71 | 3. a = *b 72 | 4. *a = b 73 | 74 | ### 8.5.6 75 | 76 | Construct the DAG for the basic block 77 | 78 | ``` 79 | a[i] = b 80 | *p = c 81 | d = a[j] 82 | e = *p 83 | *p = a[i] 84 | ``` 85 | 86 | on the assumption that 87 | 88 | 1. p can point anywhere. 89 | 2. p can point only to b or d. 90 | 91 | #### 疑问 92 | 93 | 8.5.6 节讲指针赋值这里又没有 demo 啊!!! 94 | 95 | - `*p = c` 和 `c = *p`翻译成 DAG 是不是这样的:![screen shot 2013-10-19 at 4 27 34 pm](https://f.cloud.github.com/assets/340282/1365867/563bfc66-3898-11e3-9b2b-f536f294e165.png) 96 | - `*p = a[i]` 这样的语句用 DAG 如何表示? 97 | - 8.5.6 节讲到:the operator =* must take all nodes that are currently associated with identifiers as arguments。这句话再 DAG 中如何表示? 98 | 99 | ### 8.5.7 ! 100 | 101 | If a pointer or array expression, such as a[i] or \*p is assigned and then used, without the possibility of being changed in the interim, we can take advantage of the situation to simplify the DAG. For example, in the code of Exercise 8.5.6, since p is not assigned between the second and fourth statements,the statement e = *p can be replaced by e = c, regardless of what p points to. Revise the DAG-construction algorithm to take advantage of such situations, and apply your algorithm to the code of Example 8.5.6. 102 | 103 | ### 8.5.8 104 | 105 | Suppose a basic block is formed from the C assignment statements 106 | 107 | ``` 108 | x = a + b + c + d + e + f; 109 | y = a + c + e; 110 | ``` 111 | 1. Give the three-address statements (only one addition per statement) for this block. 112 | 2. Use the associative and commutative laws to modify the block to use the fewest possible number of instructions, assuming both x and y are live on exit from the block. 113 | 114 | #### Answer 115 | 116 | 1. three-address statements 117 | 118 | ``` 119 | t1 = a + b 120 | t2 = t1 + c 121 | t3 = t2 + d 122 | t4 = t3 + e 123 | t5 = t4 + f 124 | x = t5 125 | t6 = a + c 126 | t7 = c + e 127 | y = t6 + t7 128 | ``` 129 | 130 | 2. optimized statments 131 | 132 | ``` 133 | t1 = a + c 134 | t2 = t1 + e 135 | y = t2 136 | t3 = t2 + b 137 | t4 = t3 + d 138 | t5 = t4 + f 139 | x = t5 140 | ``` 141 | -------------------------------------------------------------------------------- /ch08/8.5/assets/8.5.1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch08/8.5/assets/8.5.1.gif -------------------------------------------------------------------------------- /ch08/8.5/assets/8.5.3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch08/8.5/assets/8.5.3.gif -------------------------------------------------------------------------------- /ch08/8.5/assets/8.5.4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch08/8.5/assets/8.5.4.gif -------------------------------------------------------------------------------- /ch12/12.3/12.3.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 12.3 2 | 3 | ### 12.3.1 4 | 5 | 6 | #### Answer 7 | 1) *kill(I,D,X)* :- *defines(I,X) & defines(D,X)* 8 | 2) *out(D,D,X)* :- *defines(D,X)* 9 | 3) *out(I,D,X)* :- *in(I,D,X) & NOT kill (I,D,X)* 10 | 4) *in(I,D,X)* :- *out(J,D,X) & pred(J,I,X)* 11 | ### 12.3.2 12 | 13 | 14 | #### Answer 15 | 1) *kill(I,X,O,Y)* :- *defines(I,X) OR defines(I,Y)* 16 | 2) *out(I,X,O,Y)* :- *eval(I,X,O,Y)* 17 | 3) *out(I,X,O,Y)* :- *in(I,X,O,Y) & NOT kill(I,X,O,Y)* 18 | 4) *in(I,X,O,Y)* :- *out(J,X,O,Y) & pred(J,I)* 19 | 20 | ### 12.3.3 21 | #### Answer 22 | 1) *out(I,X)* :- *in(J,X) & pred(I,J)* 23 | 2) *in(I,X)* :- *use(I,X) & NOT defines(I,X)* 24 | 25 | ### 12.3.5 26 | #### Answer 27 | a) 28 | 29 | R1: Path(1,2) Path(2,3) Path(3,4) Path(4,1) Path(4,5) Path(5,6) 30 | R2: Path(1,3) Path(2,4) Path(3,1) Path(3,5) Path(4,6) 31 | R3: Path(1,4) Path(1,5) Path(2,1) Path(2,5) Path(3,6) Path(4,3) Path(2,6) Path(3,2) 32 | R4: Path(1,6) Path(4,2) 33 | 34 | b) Same to above 35 | -------------------------------------------------------------------------------- /ch12/12.7/12.7.md: -------------------------------------------------------------------------------- 1 | # Exercises for Section 12.7 2 | 3 | ### 12.7.1 4 | 5 | Using the encoding of symbols in Example 12.28, develop a BDD that represents 6 | the relation consisting of the tuples (b,b), (c,a), and (b,a). You may order the 7 | boolean variables in whatever way gives you the most succinct BDD. 8 | 9 | #### Answer 10 | 11 | ![12.7.1](https://raw.github.com/fangang190/dragon-book-exercise-answers/master/ch12/12.7/assets/12.7.1.png) 12 | 13 | 14 | 15 | ### 12.7.2 16 | 17 | As a function of n, how many nodes are there in the most succinct BDD that 18 | represents the exclusive-or function on n variables. That is, the functions is 19 | true if an odd number of the n variables are true and false if an even number 20 | are true. 21 | 22 | #### Answer 23 | 24 | For each variable, we put it on one layer. Their is only two possible arrangement left 0 or left 1. So for each variable, we need at most two nodes for it. And we need only one node for the first variable. So the answer is: 25 | *2n-1* 26 | For example, when n is 4: 27 | ![12.7.2](https://raw.github.com/fangang190/dragon-book-exercise-answers/master/ch12/12.7/assets/12.7.2.png) 28 | 29 | ### 12.7.3 30 | 31 | Modify Algorithm 12.29 so it produces the intersection (logical AND) of two 32 | BDD's. 33 | 34 | #### Answer 35 | 36 | There are two places to modify: 37 | 38 | a. BASIS: Zero variables. The BDD's must both be leaves, labeled either 0 or 1. The output is the leaf labeled 1 if `both` input are 1, or the leaf labeled 0 if `either` is 0. 39 | b. INDEUCTION 2. ...The first of these BDD's represents the function that is true for all truth assignments that have y1 = 0 and that make `both` of the give BDD's true. 40 | -------------------------------------------------------------------------------- /ch12/12.7/assets/12.7.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch12/12.7/assets/12.7.1.png -------------------------------------------------------------------------------- /ch12/12.7/assets/12.7.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/muhammadalmuzahid/compiler/b4cce261315be3e6a4b49d66eeb3cc8d0591bf60/ch12/12.7/assets/12.7.2.png -------------------------------------------------------------------------------- /src/lexer/Lexer.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | import java.io.*; 4 | import java.util.*; 5 | 6 | public class Lexer { 7 | private int line = 1; 8 | private char peek = ' '; 9 | private InputStream stream; 10 | private Hashtable words = new Hashtable(); 11 | 12 | public Lexer(InputStream stream){ 13 | this.stream = stream; 14 | reserve(new Word(Tag.TRUE, "true")); 15 | reserve(new Word(Tag.FALSE, "false")); 16 | } 17 | 18 | private void reserve(Word t){ 19 | words.put(t.lexeme, t); 20 | } 21 | 22 | public Token scan() throws IOException, SyntaxException{ 23 | for(;;peek = (char)stream.read()){ 24 | if(peek == ' ' || peek == '\t'){ 25 | continue; 26 | }else if(peek == '\n'){ 27 | line = line + 1; 28 | }else{ 29 | break; 30 | } 31 | } 32 | 33 | // handle comment 34 | if(peek == '/'){ 35 | peek = (char) stream.read(); 36 | if(peek == '/'){ 37 | // single line comment 38 | for(;;peek = (char)stream.read()){ 39 | if(peek == '\n'){ 40 | break; 41 | } 42 | } 43 | }else if(peek == '*'){ 44 | // block comment 45 | char prevPeek = ' '; 46 | for(;;prevPeek = peek, peek = (char)stream.read()){ 47 | if(prevPeek == '*' && peek == '/'){ 48 | break; 49 | } 50 | } 51 | }else{ 52 | throw new SyntaxException(); 53 | } 54 | } 55 | 56 | // handle relation sign 57 | if("<=!>".indexOf(peek) > -1){ 58 | StringBuffer b = new StringBuffer(); 59 | b.append(peek); 60 | peek = (char)stream.read(); 61 | if(peek == '='){ 62 | b.append(peek); 63 | } 64 | return new Rel(b.toString()); 65 | } 66 | 67 | // handle number, no type sensitive 68 | if(Character.isDigit(peek) || peek == '.'){ 69 | Boolean isDotExist = false; 70 | StringBuffer b = new StringBuffer(); 71 | do{ 72 | if(peek == '.'){ 73 | isDotExist = true; 74 | } 75 | b.append(peek); 76 | peek = (char)stream.read(); 77 | }while(isDotExist == true ? Character.isDigit(peek) : Character.isDigit(peek) || peek == '.'); 78 | return new Num(new Float(b.toString())); 79 | } 80 | 81 | // handle word 82 | if(Character.isLetter(peek)){ 83 | StringBuffer b = new StringBuffer(); 84 | do{ 85 | b.append(peek); 86 | peek = (char)stream.read(); 87 | }while(Character.isLetterOrDigit(peek)); 88 | String s = b.toString(); 89 | Word w = words.get(s); 90 | if(w == null){ 91 | w = new Word(Tag.ID, s); 92 | words.put(s, w); 93 | } 94 | return w; 95 | } 96 | 97 | Token t = new Token(peek); 98 | peek = ' '; 99 | return t; 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/lexer/Num.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | public class Num extends Token { 4 | public final float value; 5 | public Num(float v){ 6 | super(Tag.NUM); 7 | value = v; 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/lexer/Rel.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | public class Rel extends Token{ 4 | public final String lexeme; 5 | public Rel(String s){ 6 | super(Tag.REL); 7 | lexeme = new String(s); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/lexer/SyntaxException.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | public class SyntaxException extends Exception { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /src/lexer/Tag.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | public class Tag { 4 | public final static int 5 | NUM = 256, 6 | ID = 257, 7 | TRUE = 258, 8 | FALSE = 259, 9 | REL = 260; 10 | } 11 | -------------------------------------------------------------------------------- /src/lexer/Token.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | public class Token{ 4 | public final int tag; 5 | public Token(int t){ 6 | tag = t; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/lexer/Word.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | public class Word extends Token{ 4 | public final String lexeme; 5 | public Word(int t, String s){ 6 | super(t); 7 | lexeme = new String(s); 8 | } 9 | } 10 | --------------------------------------------------------------------------------