├── .settings └── org.eclipse.core.resources.prefs ├── src ├── parsing │ ├── recursive_descent │ │ ├── NameList.g │ │ ├── Test.java │ │ ├── Parser.java │ │ └── ListParser.java │ ├── memoize │ │ ├── List.g │ │ ├── Test.java │ │ ├── BacktrackParser.java │ │ └── Parser.java │ ├── multi │ │ ├── NameList.g │ │ ├── Test.java │ │ ├── Parser.java │ │ └── LookaheadParser.java │ ├── backtrack │ │ ├── NameListWithParallelAssign.g │ │ ├── Test.java │ │ ├── Parser.java │ │ └── BacktrackParser.java │ ├── exception │ │ ├── NoViableAltException.java │ │ ├── RecognitionException.java │ │ ├── MismatchedTokenException.java │ │ └── PreviousParseFailedException.java │ └── lexer │ │ ├── Test.java │ │ ├── Lexer.java │ │ ├── Token.java │ │ └── ListLexer.java └── IR │ ├── Hetero │ ├── ExprNode.java │ ├── IntNode.java │ ├── VectorNode.java │ ├── Test.java │ ├── HeteroAST.java │ ├── AddNode.java │ └── ListNode.java │ ├── RuleNode.java │ ├── Normalized │ ├── IntNode.java │ ├── VectorNode.java │ ├── AddNode.java │ ├── Test.java │ └── ExprNode.java │ ├── TokenNode.java │ ├── Homo │ ├── Test.java │ └── AST.java │ └── ParseTree.java ├── .gitignore ├── .classpath ├── .project └── README.md /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding/=UTF-8 3 | -------------------------------------------------------------------------------- /src/parsing/recursive_descent/NameList.g: -------------------------------------------------------------------------------- 1 | list : '[' elements ']' ; // 匹配方括号内的列表 2 | elements : element (',' element)* ; // 匹配中间有逗号的列表 3 | element : NAME | list ; // 一个element要么是NAME,要么是嵌套的列表 -------------------------------------------------------------------------------- /src/parsing/memoize/List.g: -------------------------------------------------------------------------------- 1 | stat : list EOF 2 | | list '=' list 3 | ; 4 | list : '[' elements ']' ; 5 | elements : element (',' element)* ; 6 | element : NAME '=' NAME | NAME | list ; -------------------------------------------------------------------------------- /src/IR/Hetero/ExprNode.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class ExprNode extends HeteroAST { 6 | public ExprNode(Token t) { 7 | super(t); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/parsing/multi/NameList.g: -------------------------------------------------------------------------------- 1 | list : '[' elements ']' ; // 匹配方括号内的列表 2 | elements : element (',' element)* ; // 匹配中间有逗号的列表 3 | element : NAME '=' NAME // 匹配a=b这样的赋值语句 4 | | NAME 5 | | list 6 | ; -------------------------------------------------------------------------------- /src/parsing/backtrack/NameListWithParallelAssign.g: -------------------------------------------------------------------------------- 1 | stat : list EOF | assign EOF ; 2 | assign : list '=' list ; 3 | list : '[' elements ']' ; 4 | elements : element (',' element)* ; 5 | element : NAME '=' NAME | NAME | list ; -------------------------------------------------------------------------------- /src/IR/Hetero/IntNode.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class IntNode extends ExprNode { 6 | 7 | public IntNode(Token t) { 8 | super(t); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/IR/RuleNode.java: -------------------------------------------------------------------------------- 1 | package IR; 2 | 3 | public class RuleNode extends ParseTree { 4 | 5 | public String value; 6 | 7 | public RuleNode(String value) { 8 | this.value = value; 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/IR/Normalized/IntNode.java: -------------------------------------------------------------------------------- 1 | package IR.Normalized; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class IntNode extends ExprNode { 6 | public IntNode(Token t) { 7 | super(t); 8 | evalType = tINTEGER; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | /bin/ 14 | -------------------------------------------------------------------------------- /src/IR/TokenNode.java: -------------------------------------------------------------------------------- 1 | package IR; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class TokenNode extends ParseTree { 6 | 7 | public Token value; 8 | 9 | public TokenNode(Token value) { 10 | this.value = value; 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /src/parsing/exception/NoViableAltException.java: -------------------------------------------------------------------------------- 1 | package parsing.exception; 2 | 3 | public class NoViableAltException extends RecognitionException { 4 | 5 | private static final long serialVersionUID = -7904010744795487060L; 6 | 7 | public NoViableAltException(String msg) { 8 | super(msg); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/parsing/multi/Test.java: -------------------------------------------------------------------------------- 1 | package parsing.multi; 2 | 3 | import parsing.lexer.ListLexer; 4 | 5 | public class Test { 6 | public static void main(String args[]) { 7 | ListLexer lexer = new ListLexer(args[0]); 8 | LookaheadParser parser = new LookaheadParser(lexer, 2); 9 | parser.list(); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/parsing/exception/RecognitionException.java: -------------------------------------------------------------------------------- 1 | package parsing.exception; 2 | 3 | public class RecognitionException extends Exception { 4 | private static final long serialVersionUID = 7211846070499983580L; 5 | 6 | public RecognitionException() { 7 | } 8 | 9 | public RecognitionException(String msg) { 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/IR/Normalized/VectorNode.java: -------------------------------------------------------------------------------- 1 | package IR.Normalized; 2 | 3 | import java.util.List; 4 | 5 | import parsing.lexer.Token; 6 | 7 | public class VectorNode extends ExprNode { 8 | public VectorNode(Token t, List elements) { 9 | super(t); 10 | evalType = tVECTOR; 11 | for (ExprNode e : elements) { 12 | addChild(e); 13 | } 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/IR/Normalized/AddNode.java: -------------------------------------------------------------------------------- 1 | package IR.Normalized; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class AddNode extends ExprNode { 6 | public AddNode(ExprNode left, Token addToken, ExprNode right) { 7 | super(addToken); 8 | addChild(left); 9 | addChild(right); 10 | } 11 | 12 | public int getEvalType() { 13 | return super.getEvalType(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/IR/Hetero/VectorNode.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import parsing.lexer.Token; 7 | 8 | public class VectorNode extends ExprNode { 9 | List elements = new ArrayList(); 10 | 11 | public VectorNode(Token t, List elements) { 12 | super(t); 13 | this.elements = elements; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/parsing/memoize/Test.java: -------------------------------------------------------------------------------- 1 | package parsing.memoize; 2 | 3 | import parsing.exception.RecognitionException; 4 | import parsing.lexer.ListLexer; 5 | 6 | public class Test { 7 | public static void main(String args[]) throws RecognitionException { 8 | ListLexer lexer = new ListLexer(args[0]); 9 | BacktrackParser parser = new BacktrackParser(lexer); 10 | parser.stat(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/parsing/backtrack/Test.java: -------------------------------------------------------------------------------- 1 | package parsing.backtrack; 2 | 3 | import parsing.exception.RecognitionException; 4 | import parsing.lexer.ListLexer; 5 | 6 | public class Test { 7 | public static void main(String args[]) throws RecognitionException { 8 | ListLexer lexer = new ListLexer(args[0]); 9 | BacktrackParser parser = new BacktrackParser(lexer); 10 | parser.stat(); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/parsing/exception/MismatchedTokenException.java: -------------------------------------------------------------------------------- 1 | package parsing.exception; 2 | 3 | public class MismatchedTokenException extends RecognitionException { 4 | 5 | private static final long serialVersionUID = 7817191432293370895L; 6 | 7 | public MismatchedTokenException() { 8 | super(); 9 | } 10 | 11 | public MismatchedTokenException(String msg) { 12 | super(msg); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/parsing/exception/PreviousParseFailedException.java: -------------------------------------------------------------------------------- 1 | package parsing.exception; 2 | 3 | public class PreviousParseFailedException extends RecognitionException { 4 | 5 | private static final long serialVersionUID = -7266107606570387545L; 6 | 7 | public PreviousParseFailedException() { 8 | super(); 9 | } 10 | 11 | public PreviousParseFailedException(String msg) { 12 | super(msg); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | Implementation_Patterns 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/IR/Normalized/Test.java: -------------------------------------------------------------------------------- 1 | package IR.Normalized; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class Test { 6 | public static void main(String[] args) { 7 | Token plus = new Token(Token.PLUS, "+"); 8 | Token one = new Token(Token.INT, "1"); 9 | Token two = new Token(Token.INT, "2"); 10 | ExprNode root = new AddNode(new IntNode(one), plus, new IntNode(two)); 11 | System.out.println(root.toStringTree()); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/IR/Hetero/Test.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class Test { 6 | public static void main(String[] args) { 7 | Token plus = new Token(Token.PLUS, "+"); 8 | Token one = new Token(Token.INT, "1"); 9 | Token two = new Token(Token.INT, "2"); 10 | ExprNode root = new AddNode(new IntNode(one), plus, new IntNode(two)); 11 | System.out.println("1+2 tree: " + root.toStringTree()); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/parsing/lexer/Test.java: -------------------------------------------------------------------------------- 1 | package parsing.lexer; 2 | 3 | public class Test { 4 | public static void main(String[] args) { 5 | ListLexer lexer = new ListLexer(args[0]); 6 | Token t = lexer.nextToken(); 7 | while (t.type != Token.EOF_TYPE) { 8 | System.out.println(t); 9 | t = lexer.nextToken(); 10 | } 11 | System.out.println(t);// EOF 12 | } 13 | } 14 | 15 | /* 16 | * input: java Test '[a, b ]' 17 | * 18 | * output: 19 | * <'[',LBACK> 20 | * <'a',NAME> 21 | * <',',COMMA> 22 | * <'b',NAME> 23 | * <']',RBACK> 24 | * <'',> 25 | */ -------------------------------------------------------------------------------- /src/IR/Hetero/HeteroAST.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import parsing.lexer.Token; 4 | 5 | /** 6 | * 不规则异性AST节点类型 7 | */ 8 | public abstract class HeteroAST { 9 | Token token; 10 | 11 | public HeteroAST() { 12 | } 13 | 14 | public HeteroAST(Token t) { 15 | this.token = t; 16 | } 17 | 18 | public HeteroAST(int tokenType) { 19 | this.token = new Token(tokenType); 20 | } 21 | 22 | /** 23 | * 返回节点的文本表示 24 | */ 25 | public String toString() { 26 | return token.toText(); 27 | } 28 | 29 | /** 30 | * 返回树的文本表示,默认返回节点的文本表示 31 | */ 32 | public String toStringTree() { 33 | return toString(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/IR/Homo/Test.java: -------------------------------------------------------------------------------- 1 | package IR.Homo; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class Test { 6 | public static void main(String[] arg) { 7 | Token plus = new Token(Token.PLUS, "+"); 8 | Token one = new Token(Token.INT, "1"); 9 | Token two = new Token(Token.INT, "2"); 10 | 11 | AST root = new AST(plus); 12 | root.addChild(new AST(one)); 13 | root.addChild(new AST(two)); 14 | 15 | System.out.println("1+2 tree: " + root.toStringTree()); 16 | 17 | AST list = new AST(); // 空节点是某个列表的根节点 18 | list.addChild(new AST(one)); 19 | list.addChild(new AST(two)); 20 | System.out.println("1 and 2 in list: " + list.toStringTree()); 21 | 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/parsing/recursive_descent/Test.java: -------------------------------------------------------------------------------- 1 | package parsing.recursive_descent; 2 | 3 | import parsing.lexer.ListLexer; 4 | 5 | public class Test { 6 | public static void main(String args[]) { 7 | ListLexer lexer = new ListLexer(args[0]); 8 | ListParser parser = new ListParser(lexer); 9 | parser.list(); 10 | } 11 | } 12 | 13 | /* 14 | * input: "[a, ]" 15 | * output: Exception in thread "main" java.lang.Error: expecting name or list; found <']',RBACK> at 16 | * parsing.recursive_descent.ListParser.element(ListParser.java:40) at 17 | * parsing.recursive_descent.ListParser.elements(ListParser.java:27) at 18 | * parsing.recursive_descent.ListParser.list(ListParser.java:16) at 19 | * parsing.recursive_descent.Test.main(Test.java:9) 20 | */ 21 | -------------------------------------------------------------------------------- /src/IR/ParseTree.java: -------------------------------------------------------------------------------- 1 | package IR; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import parsing.lexer.Token; 7 | 8 | /** 9 | * 解析树——节点都是此类的实例,实际上并没有Node节点 10 | */ 11 | public abstract class ParseTree { 12 | public List children; 13 | 14 | public RuleNode addChild(String value) { 15 | RuleNode r = new RuleNode(value); 16 | addChild(r); 17 | return r; 18 | } 19 | 20 | public TokenNode addChild(Token value) { 21 | TokenNode t = new TokenNode(value); 22 | addChild(t); 23 | return t; 24 | } 25 | 26 | public void addChild(ParseTree t) { 27 | if (children == null) { 28 | children = new ArrayList(); 29 | } 30 | children.add(t); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/parsing/recursive_descent/Parser.java: -------------------------------------------------------------------------------- 1 | package parsing.recursive_descent; 2 | 3 | import parsing.lexer.Lexer; 4 | import parsing.lexer.Token; 5 | 6 | public class Parser { 7 | Lexer input; // 输入的词法单元 8 | Token lookahead; // 当前的向前看符号 9 | 10 | public Parser(Lexer input) { 11 | this.input = input; 12 | lookahead = input.nextToken(); 13 | } 14 | 15 | /** 16 | * 如果向前看词法类型能匹配x,那么就忽略并返回;否则报错 17 | * 18 | * @param x 19 | * 需要匹配的词法类型 20 | */ 21 | public void match(int x) { 22 | if (lookahead.type == x) 23 | consume(); 24 | else 25 | throw new Error("expecting " + Token.getTokenName(x) + "; found " 26 | + lookahead); 27 | } 28 | 29 | public void consume() { 30 | lookahead = input.nextToken(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/IR/Hetero/AddNode.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import parsing.lexer.Token; 4 | 5 | public class AddNode extends ExprNode { 6 | ExprNode left, right; // 不规则的子节点,有自定义的名字 7 | 8 | public AddNode(ExprNode left, Token addToken, ExprNode right) { 9 | super(addToken); 10 | this.left = left; 11 | this.right = right; 12 | } 13 | 14 | public String toStringTree() { 15 | if (left == null || right == null) { 16 | return this.toString(); 17 | } 18 | StringBuilder buf = new StringBuilder(); 19 | buf.append("("); 20 | buf.append(this.toString()); 21 | buf.append(" "); 22 | buf.append(left.toStringTree()); 23 | buf.append(" "); 24 | buf.append(right.toStringTree()); 25 | buf.append(")"); 26 | return buf.toString(); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/IR/Normalized/ExprNode.java: -------------------------------------------------------------------------------- 1 | package IR.Normalized; 2 | 3 | import parsing.lexer.Token; 4 | import IR.Homo.AST; 5 | 6 | public class ExprNode extends AST { 7 | public static final int tINVALID = 0;// 非法表达式 8 | public static final int tINTEGER = 1;// 整数表达式 9 | public static final int tVECTOR = 2;// 向量表达式 10 | 11 | /** 12 | * 记录节点的表达式类型(整数型或向量型) 13 | */ 14 | int evalType; 15 | 16 | public int getEvalType() { 17 | return evalType; 18 | } 19 | 20 | public ExprNode(Token payload) { 21 | super(payload); 22 | } 23 | 24 | /** 25 | * 返回节点文本表示。若已知节点类型,则加上 26 | */ 27 | public String toString() { 28 | if (evalType != tINVALID) { 29 | return super.toString() + ""; 31 | } 32 | return super.toString(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/IR/Hetero/ListNode.java: -------------------------------------------------------------------------------- 1 | package IR.Hetero; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * 一棵平整的树,相当于根节点为nil:(nil child1 child2 ...) 8 | */ 9 | public class ListNode extends HeteroAST { 10 | List elements = new ArrayList(); 11 | 12 | public ListNode(List elements) { 13 | this.elements = elements; 14 | } 15 | 16 | public String toStringTree() { 17 | if (elements == null || elements.size() == 0) { 18 | return this.toString(); 19 | } 20 | StringBuilder buf = new StringBuilder(); 21 | for (int i = 0; elements != null && i < elements.size(); i++) { 22 | HeteroAST t = (HeteroAST) elements.get(i); 23 | if (i > 0) 24 | buf.append(" "); 25 | buf.append(t.toStringTree()); 26 | } 27 | return buf.toString(); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/parsing/lexer/Lexer.java: -------------------------------------------------------------------------------- 1 | package parsing.lexer; 2 | 3 | public abstract class Lexer { 4 | public static final int INVALID_TOKEN_TYPE = 0; 5 | public static final char EOF = (char) -1;// EOF字符,即文件结尾 6 | 7 | String input; // 输入字符串 8 | int p = 0;// 当前输入字符的下标 9 | char c; // 当前字符 10 | 11 | public Lexer(String input) { 12 | this.input = input; 13 | c = input.charAt(p); // 预备向前看字符 14 | } 15 | 16 | /** 17 | * 向前移动一个字符;检测输入是否结束 18 | */ 19 | public void consume() { 20 | p++; 21 | if (p >= input.length()) 22 | c = EOF; 23 | else 24 | c = input.charAt(p); 25 | } 26 | 27 | /** 28 | * 确保x是输入流中的下一个字符 29 | */ 30 | public void match(char x) { 31 | if (c == x) 32 | consume(); 33 | else 34 | throw new Error("expecting " + x + "; found " + c); 35 | } 36 | 37 | public abstract Token nextToken(); 38 | } 39 | -------------------------------------------------------------------------------- /src/parsing/recursive_descent/ListParser.java: -------------------------------------------------------------------------------- 1 | package parsing.recursive_descent; 2 | 3 | import parsing.lexer.Lexer; 4 | import parsing.lexer.Token; 5 | 6 | public class ListParser extends Parser { 7 | public ListParser(Lexer input) { 8 | super(input); 9 | } 10 | 11 | /** 12 | * list : '[' elements ']' ; 13 | */ 14 | public void list() { 15 | match(Token.LBRACK); 16 | elements(); 17 | match(Token.RBRACK); 18 | } 19 | 20 | /** 21 | * elements : element (',' element)* ; 22 | */ 23 | void elements() { 24 | element(); 25 | while (lookahead.type == Token.COMMA) { 26 | match(Token.COMMA); 27 | element(); 28 | } 29 | } 30 | 31 | /** 32 | * element : NAME | list ; 33 | */ 34 | void element() { 35 | if (lookahead.type == Token.NAME) 36 | match(Token.NAME); 37 | else if (lookahead.type == Token.LBRACK) 38 | list(); 39 | else 40 | throw new Error("expecting name or list; found " + lookahead); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/parsing/multi/Parser.java: -------------------------------------------------------------------------------- 1 | package parsing.multi; 2 | 3 | import parsing.lexer.Lexer; 4 | import parsing.lexer.Token; 5 | 6 | public class Parser { 7 | Lexer input; // 输入的词法单元 8 | Token[] lookahead; // 环形缓冲区 9 | int k; // 向前看符号的个数 10 | int p = 0; // 环形缓冲区中装填下一个词法单元的位置 11 | 12 | public Parser(Lexer input, int k) { 13 | this.input = input; 14 | this.k = k; 15 | lookahead = new Token[k]; // 开辟向前看缓冲区 16 | for (int i = 1; i <= k; i++) { 17 | consume(); // 用k个向前看符号初始化缓冲区 18 | } 19 | } 20 | 21 | public Token LT(int i) { 22 | return lookahead[(p + i - 1) % k];// 环式取值 23 | } 24 | 25 | public int LA(int i) { 26 | return LT(i).type; 27 | } 28 | 29 | public void match(int x) { 30 | if (LA(1) == x) 31 | consume(); 32 | else 33 | throw new Error("expecting " + Token.getTokenName(x) + "; found " 34 | + LT(1)); 35 | } 36 | 37 | public void consume() { 38 | lookahead[p] = input.nextToken(); // 在下一个位置上放入词法单元 39 | p = (p + 1) % k; // 自增下标 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/parsing/multi/LookaheadParser.java: -------------------------------------------------------------------------------- 1 | package parsing.multi; 2 | 3 | import parsing.lexer.Lexer; 4 | import parsing.lexer.Token; 5 | 6 | public class LookaheadParser extends Parser { 7 | public LookaheadParser(Lexer input, int k) { 8 | super(input, k); 9 | } 10 | 11 | /** 12 | * list : '[' elements ']' ; 13 | */ 14 | public void list() { 15 | match(Token.LBRACK); 16 | elements(); 17 | match(Token.RBRACK); 18 | } 19 | 20 | /** 21 | * elements : element (',' element)* ; 22 | */ 23 | void elements() { 24 | element(); 25 | while (LA(1) == Token.COMMA) { 26 | match(Token.COMMA); 27 | element(); 28 | } 29 | } 30 | 31 | /** 32 | * element : NAME '=' NAME | NAME | list ; 33 | */ 34 | void element() { 35 | if (LA(1) == Token.NAME && LA(2) == Token.EQUALS) { 36 | match(Token.NAME); 37 | match(Token.EQUALS); 38 | match(Token.NAME); 39 | } else if (LA(1) == Token.NAME) 40 | match(Token.NAME); 41 | else if (LA(1) == Token.LBRACK) 42 | list(); 43 | else 44 | throw new Error("expecting name or list; found " + LT(1)); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/parsing/lexer/Token.java: -------------------------------------------------------------------------------- 1 | package parsing.lexer; 2 | 3 | public class Token { 4 | public static int EOF_TYPE = 1;// 表示EOF词法类型 5 | public static int NAME = 2; 6 | public static int COMMA = 3; 7 | public static int LBRACK = 4; 8 | public static int RBRACK = 5; 9 | public static int EQUALS = 6; 10 | public static int PLUS = 7; 11 | public static int MINUS = 8; 12 | public static int INT = 101; 13 | public static String[] tokenNames = { "n/a", "", "NAME", "COMMA", 14 | "LBACK", "RBACK", "EQUALS", "PLUS", "MINUS" }; 15 | 16 | public static String[] tokenName_Type = { "n/a", "INT" }; // 基本数据类型 17 | 18 | public static String getTokenName(int x) { 19 | if (x > 100) { 20 | return tokenName_Type[x - 100]; 21 | } 22 | return tokenNames[x]; 23 | } 24 | 25 | public int type; 26 | public String text; 27 | 28 | public Token(int type, String text) { 29 | this.type = type; 30 | this.text = text; 31 | } 32 | 33 | public Token(int type) { 34 | this.type = type; 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | String tname = getTokenName(type); 40 | return "<'" + text + "'," + tname + ">"; 41 | } 42 | 43 | /** 44 | * 只返回节点的文本 45 | */ 46 | public String toText() { 47 | return text; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/parsing/lexer/ListLexer.java: -------------------------------------------------------------------------------- 1 | package parsing.lexer; 2 | 3 | public class ListLexer extends Lexer { 4 | 5 | public ListLexer(String input) { 6 | super(input); 7 | } 8 | 9 | @Override 10 | public Token nextToken() { 11 | while (c != EOF) { 12 | switch (c) { 13 | case ' ': 14 | case '\t': 15 | case '\n': 16 | case '\r': 17 | WS(); 18 | continue; 19 | case ',': 20 | consume(); 21 | return new Token(Token.COMMA, ","); 22 | case '[': 23 | consume(); 24 | return new Token(Token.LBRACK, "["); 25 | case ']': 26 | consume(); 27 | return new Token(Token.RBRACK, "]"); 28 | case '=': 29 | consume(); 30 | return new Token(Token.EQUALS, "="); 31 | default: 32 | if (isLETTER()) 33 | return NAME(); 34 | throw new Error("invalid character: " + c); 35 | } 36 | } 37 | return new Token(Token.EOF_TYPE, ""); 38 | } 39 | 40 | void WS() { 41 | while (c == ' ' || c == '\t' || c == '\n' || c == '\r') 42 | consume(); 43 | } 44 | 45 | Token NAME() { 46 | StringBuilder buf = new StringBuilder(); 47 | do { 48 | buf.append(c); 49 | consume(); 50 | } while (isLETTER()); 51 | return new Token(Token.NAME, buf.toString()); 52 | } 53 | 54 | boolean isLETTER() { 55 | return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'A'; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Language_Implementation_Patterns 2 | ================================ 3 | 4 | 编译语言实现模式例程 5 | 6 | ## parsing.lexer 7 | 词法分析器。将输入分解为Token,用于后续的语法分析处理。 8 | 9 | ## parsing.recursive_descent 10 | 运用递归思想,根据之后遇到的Token来匹配相应的文法。适用于LL(1)文法。 11 | 12 | 示例文法: 13 | 14 | list : '[' elements ']' ; // 匹配方括号内的列表 15 | elements : element (',' element)* ; // 匹配中间有逗号的列表 16 | element : NAME | list ; // 一个element要么是NAME,要么是嵌套的列表 17 | 18 | Test程序:匹配成功无显示,匹配失败抛出异常。 19 | 20 | ## parsing.multi 21 | 为处理First集有交集的文法(非LL(1)文法),根据之后遇到的1~n个Token来匹配相应的文法。 22 | 23 | 示例文法: 24 | 25 | list : '[' elements ']' ; // 匹配方括号内的列表 26 | elements : element (',' element)* ; // 匹配中间有逗号的列表 27 | element : NAME '=' NAME // 匹配a=b这样的赋值语句 28 | | NAME 29 | | list 30 | ; 31 | 32 | Test程序:匹配成功无显示,匹配失败抛出异常。 33 | 34 | ## parsing.backtrack 35 | 类DFS方式。假定一个文法进行尝试,若尝试成功就依此匹配,尝试失败返回更换文法重新尝试;文法尝试完毕未成功则匹配失败,抛出异常。 36 | 37 | 示例文法: 38 | 39 | stat : list EOF | assign EOF ; 40 | assign : list '=' list ; 41 | list : '[' elements ']' ; 42 | elements : element (',' element)* ; 43 | element : NAME '=' NAME | NAME | list ; 44 | 45 | Test程序:匹配成功无显示,匹配失败抛出异常。 46 | 47 | ## parsing.memoize 48 | 记忆化的回溯匹配。在匹配过程中记录成功或失败的位置,当下一次在相同位置用相同文法进行匹配时直接返回结果。 49 | 50 | 示例文法: 51 | 52 | stat : list EOF 53 | | list '=' list 54 | ; 55 | list : '[' elements ']' ; 56 | elements : element (',' element)* ; 57 | element : NAME '=' NAME | NAME | list ; 58 | 59 | Test程序:显示匹配的大致过程,匹配失败抛出异常。 60 | 61 | ## IR.Home 62 | 同型AST:使用单节点类型及规范化的子节点列表实现AST。 63 | 节点的关键字段有两个:原始的词法单元;子节点列表。 64 | 65 | ## IR.Normalized 66 | 规范化异型AST:用多种节点数据类型实现AST,用规范化列表表示子节点。 67 | 68 | ## IR.Hetero 69 | 不规则异型AST:用多种数据类型实现AST,用不规则列表表示子节点。 70 | 71 | -------------------------------------------------------------------------------- /src/IR/Homo/AST.java: -------------------------------------------------------------------------------- 1 | package IR.Homo; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import parsing.lexer.Token; 7 | 8 | public class AST { 9 | Token token; // 同型AST节点类型 10 | List children;// 原始词法单元 11 | 12 | /** 13 | * 创建作为根节点的空节点 14 | */ 15 | public AST() { 16 | } 17 | 18 | /** 19 | * 根据已有的词法单元创建节点 20 | * 21 | * @param token 22 | * 词法单元 23 | */ 24 | public AST(Token token) { 25 | this.token = token; 26 | } 27 | 28 | /** 29 | * 根据词法单元类型创建节点;主要用于虚节点 30 | * 31 | * @param tokenType 32 | * 词法单元类型 33 | */ 34 | public AST(int tokenType) { 35 | this.token = new Token(tokenType); 36 | } 37 | 38 | /** 39 | * 根据同一类型的节点,外部访问者会执行相同的代码 40 | */ 41 | public int getNodeType() { 42 | return token.type; 43 | } 44 | 45 | public void addChild(Token token) { 46 | AST t = new AST(token); 47 | addChild(t); 48 | } 49 | 50 | public void addChild(AST t) { 51 | if (children == null) { 52 | children = new ArrayList(); 53 | } 54 | children.add(t); 55 | } 56 | 57 | public boolean isNil() { 58 | return token == null; 59 | } 60 | 61 | /** 62 | * 单个节点的文本形式 63 | */ 64 | public String toString() { 65 | return token != null ? token.toText() : "nil"; 66 | } 67 | 68 | /** 69 | * 整个树的文本形式 70 | */ 71 | public String toStringTree() { 72 | if (children == null || children.size() == 0) { 73 | return this.toString(); 74 | } 75 | StringBuilder buf = new StringBuilder(); 76 | if (!isNil()) { 77 | buf.append("("); 78 | buf.append(this.toString()); 79 | buf.append(" "); 80 | } 81 | for (int i = 0; i < children.size(); i++) { 82 | AST t = (AST) children.get(i); 83 | if (i > 0) 84 | buf.append(' '); 85 | buf.append(t.toStringTree()); 86 | } 87 | if (!isNil()) { 88 | buf.append(")"); 89 | } 90 | return buf.toString(); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/parsing/backtrack/Parser.java: -------------------------------------------------------------------------------- 1 | package parsing.backtrack; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import parsing.exception.MismatchedTokenException; 7 | import parsing.lexer.Lexer; 8 | import parsing.lexer.Token; 9 | 10 | public class Parser { 11 | Lexer input; // 词法单元的来源 12 | List markers;// 栈,存放用于记录位置的位标(标记) 13 | List lookahead;// 大小可变的缓冲区 14 | int p = 0;// 当前向前看词法单元的下标 15 | 16 | public Parser(Lexer input) { 17 | this.input = input; 18 | markers = new ArrayList(); 19 | lookahead = new ArrayList(); 20 | } 21 | 22 | public Token LT(int i) { 23 | sync(i); 24 | return lookahead.get(p + i - 1); 25 | } 26 | 27 | public int LA(int i) { 28 | return LT(i).type; 29 | } 30 | 31 | public void match(int x) throws MismatchedTokenException { 32 | if (LA(1) == x) 33 | consume(); 34 | else 35 | throw new MismatchedTokenException("expecting " 36 | + Token.getTokenName(x) + " found " + LT(1)); 37 | } 38 | 39 | /** 40 | * 确保当前位置p之后有i个词法单元 41 | * 42 | * @param i 43 | * 词法单元的个数 44 | */ 45 | public void sync(int i) { 46 | if (p + i - 1 > (lookahead.size() - 1)) // 词法单元是否越界 47 | { 48 | int n = (p + i - 1) - (lookahead.size() - 1); // 需要的词法单元的个数 49 | fill(n); 50 | } 51 | } 52 | 53 | /** 54 | * 加入n个词法单元 55 | * 56 | * @param n 57 | * 需加入的词法单元的个数 58 | */ 59 | public void fill(int n) { 60 | for (int i = 1; i <= n; i++) { 61 | lookahead.add(input.nextToken()); 62 | } 63 | } 64 | 65 | public void consume() { 66 | p++; 67 | // 非推断状态,而且到达向前看缓冲区的末尾 68 | if (p == lookahead.size() && !isSpeculating()) { 69 | // 到了末尾,应该重新从0开始填入新的词法单元 70 | p = 0; 71 | lookahead.clear(); 72 | } 73 | sync(1);// 取一个新的词法单元 74 | } 75 | 76 | public int mark() { 77 | markers.add(p); 78 | return p; 79 | } 80 | 81 | public void release() { 82 | int marker = markers.get(markers.size() - 1); 83 | markers.remove(markers.size() - 1); 84 | seek(marker); 85 | } 86 | 87 | public void seek(int index) { 88 | p = index; 89 | } 90 | 91 | /** 92 | * 当前是否在推断状态 93 | * 94 | * @return 是否是推断状态 95 | */ 96 | public boolean isSpeculating() { 97 | return markers.size() > 0; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/parsing/backtrack/BacktrackParser.java: -------------------------------------------------------------------------------- 1 | package parsing.backtrack; 2 | 3 | import parsing.exception.NoViableAltException; 4 | import parsing.exception.RecognitionException; 5 | import parsing.lexer.Lexer; 6 | import parsing.lexer.Token; 7 | 8 | public class BacktrackParser extends Parser { 9 | 10 | public BacktrackParser(Lexer input) { 11 | super(input); 12 | } 13 | 14 | /** 15 | * stat : list EOF | assign EOF ; 16 | */ 17 | public void stat() throws RecognitionException { 18 | // 尝试解析选项 1: list EOF 19 | if (speculate_stat_alt1()) { 20 | list(); 21 | match(Token.EOF_TYPE); 22 | // 尝试解析选项2: assign EOF 23 | } else if (speculate_stat_alt2()) { 24 | assign(); 25 | match(Token.EOF_TYPE); 26 | // 两个都不匹配,出错 27 | } else 28 | throw new NoViableAltException("expection stat found " + LT(1)); 29 | } 30 | 31 | public boolean speculate_stat_alt1() { 32 | boolean success = true; 33 | mark(); // 标记当前位置 34 | try { 35 | list(); 36 | match(Token.EOF_TYPE); 37 | } catch (RecognitionException e) { 38 | success = false; 39 | } 40 | release(); // 回溯 41 | return success; 42 | } 43 | 44 | public boolean speculate_stat_alt2() { 45 | boolean success = true; 46 | mark(); // 标记当前位置 47 | try { 48 | assign(); 49 | match(Token.EOF_TYPE); 50 | } catch (RecognitionException e) { 51 | success = false; 52 | } 53 | release(); // 回溯 54 | return success; 55 | } 56 | 57 | /** 58 | * assign : list '=' list ; 59 | */ 60 | public void assign() throws RecognitionException { 61 | list(); 62 | match(Token.EQUALS); 63 | list(); 64 | } 65 | 66 | /** 67 | * list : '[' elements ']' ; 68 | */ 69 | public void list() throws RecognitionException { 70 | match(Token.LBRACK); 71 | elements(); 72 | match(Token.RBRACK); 73 | } 74 | 75 | /** 76 | * elements : element (',' element)* ; 77 | */ 78 | void elements() throws RecognitionException { 79 | element(); 80 | while (LA(1) == Token.COMMA) { 81 | match(Token.COMMA); 82 | element(); 83 | } 84 | } 85 | 86 | /** 87 | * element : NAME '=' NAME | NAME | list ; 88 | */ 89 | void element() throws RecognitionException { 90 | if (LA(1) == Token.NAME && LA(2) == Token.EQUALS) { 91 | match(Token.NAME); 92 | match(Token.EQUALS); 93 | match(Token.NAME); 94 | } else if (LA(1) == Token.NAME) 95 | match(Token.NAME); 96 | else if (LA(1) == Token.LBRACK) 97 | list(); 98 | else 99 | throw new Error("expecting name or list; found " + LT(1)); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/parsing/memoize/BacktrackParser.java: -------------------------------------------------------------------------------- 1 | package parsing.memoize; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import parsing.exception.NoViableAltException; 7 | import parsing.exception.RecognitionException; 8 | import parsing.lexer.Lexer; 9 | import parsing.lexer.Token; 10 | 11 | public class BacktrackParser extends Parser { 12 | 13 | Map list_memo; 14 | 15 | public BacktrackParser(Lexer input) { 16 | super(input); 17 | list_memo = new HashMap(); 18 | } 19 | 20 | /** 21 | * stat : list EOF | assign EOF ; 22 | */ 23 | public void stat() throws RecognitionException { 24 | // 尝试解析选项 1: list EOF 25 | if (speculate_stat_alt1()) { 26 | list(); 27 | match(Token.EOF_TYPE); 28 | // 尝试解析选项2: assign EOF 29 | } else if (speculate_stat_alt2()) { 30 | assign(); 31 | match(Token.EOF_TYPE); 32 | // 两个都不匹配,出错 33 | } else 34 | throw new NoViableAltException("expection stat found " + LT(1)); 35 | } 36 | 37 | public boolean speculate_stat_alt1() { 38 | System.out.println("attempt alternative 1"); 39 | boolean success = true; 40 | mark(); // 标记当前位置 41 | try { 42 | list(); 43 | match(Token.EOF_TYPE); 44 | } catch (RecognitionException e) { 45 | success = false; 46 | } 47 | release(); // 回溯 48 | return success; 49 | } 50 | 51 | public boolean speculate_stat_alt2() { 52 | System.out.println("attempt alternative 2"); 53 | boolean success = true; 54 | mark(); // 标记当前位置 55 | try { 56 | assign(); 57 | match(Token.EOF_TYPE); 58 | } catch (RecognitionException e) { 59 | success = false; 60 | } 61 | release(); // 回溯 62 | return success; 63 | } 64 | 65 | /** 66 | * assign : list '=' list ; 67 | */ 68 | public void assign() throws RecognitionException { 69 | list(); 70 | match(Token.EQUALS); 71 | list(); 72 | } 73 | 74 | /** 75 | * list : '[' elements ']' ; 76 | */ 77 | public void list() throws RecognitionException { 78 | boolean failed = false; 79 | int startTokenIndex = index(); // 获取当前词法单元的位置 80 | if (isSpeculating() && alreadyParsedRule(list_memo)) 81 | return; 82 | // 之前没有在tokenIndex处解析过 83 | try { 84 | _list(); 85 | } catch (RecognitionException re) { 86 | failed = true; 87 | throw re; 88 | } finally { 89 | if (isSpeculating()) { 90 | // 回溯记录解析结果 91 | memoize(list_memo, startTokenIndex, failed); 92 | } 93 | } 94 | } 95 | 96 | /** 97 | * list : '[' elements ']' ; 98 | */ 99 | public void _list() throws RecognitionException { 100 | System.out.println("parsing list rule at token index: " + index()); 101 | match(Token.LBRACK); 102 | elements(); 103 | match(Token.RBRACK); 104 | } 105 | 106 | /** 107 | * elements : element (',' element)* ; 108 | */ 109 | void elements() throws RecognitionException { 110 | element(); 111 | while (LA(1) == Token.COMMA) { 112 | match(Token.COMMA); 113 | element(); 114 | } 115 | } 116 | 117 | /** 118 | * element : NAME '=' NAME | NAME | list ; 119 | */ 120 | void element() throws RecognitionException { 121 | if (LA(1) == Token.NAME && LA(2) == Token.EQUALS) { 122 | match(Token.NAME); 123 | match(Token.EQUALS); 124 | match(Token.NAME); 125 | } else if (LA(1) == Token.NAME) 126 | match(Token.NAME); 127 | else if (LA(1) == Token.LBRACK) 128 | list(); 129 | else 130 | throw new Error("expecting name or list; found " + LT(1)); 131 | } 132 | 133 | @Override 134 | public void clearMemo() { 135 | list_memo.clear(); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/parsing/memoize/Parser.java: -------------------------------------------------------------------------------- 1 | package parsing.memoize; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import parsing.exception.MismatchedTokenException; 8 | import parsing.exception.PreviousParseFailedException; 9 | import parsing.lexer.Lexer; 10 | import parsing.lexer.Token; 11 | 12 | public abstract class Parser { 13 | public static final int FAILED = -1; // 表示上一次解析失败 14 | 15 | Lexer input; // 词法单元的来源 16 | List markers;// 栈,存放用于记录位置的位标(标记) 17 | List lookahead;// 大小可变的缓冲区 18 | int p = 0;// 当前向前看词法单元的下标 19 | 20 | public Parser(Lexer input) { 21 | this.input = input; 22 | markers = new ArrayList(); 23 | lookahead = new ArrayList(); 24 | } 25 | 26 | public Token LT(int i) { 27 | sync(i); 28 | return lookahead.get(p + i - 1); 29 | } 30 | 31 | public int LA(int i) { 32 | return LT(i).type; 33 | } 34 | 35 | public void match(int x) throws MismatchedTokenException { 36 | if (LA(1) == x) 37 | consume(); 38 | else 39 | throw new MismatchedTokenException("expecting " 40 | + Token.getTokenName(x) + " found " + LT(1)); 41 | } 42 | 43 | /** 44 | * 确保当前位置p之后有i个词法单元 45 | * 46 | * @param i 47 | * 词法单元的个数 48 | */ 49 | public void sync(int i) { 50 | if (p + i - 1 > (lookahead.size() - 1)) // 词法单元是否越界 51 | { 52 | int n = (p + i - 1) - (lookahead.size() - 1); // 需要的词法单元的个数 53 | fill(n); 54 | } 55 | } 56 | 57 | /** 58 | * 加入n个词法单元 59 | * 60 | * @param n 61 | * 需加入的词法单元的个数 62 | */ 63 | public void fill(int n) { 64 | for (int i = 1; i <= n; i++) { 65 | lookahead.add(input.nextToken()); 66 | } 67 | } 68 | 69 | public void consume() { 70 | p++; 71 | // 非推断状态,而且到达向前看缓冲区的末尾 72 | if (p == lookahead.size() && !isSpeculating()) { 73 | // 到了末尾,应该重新从0开始填入新的词法单元 74 | p = 0; 75 | lookahead.clear(); 76 | clearMemo(); // 清除相关记录 77 | } 78 | sync(1);// 取一个新的词法单元 79 | } 80 | 81 | public int mark() { 82 | markers.add(p); 83 | return p; 84 | } 85 | 86 | public void release() { 87 | int marker = markers.get(markers.size() - 1); 88 | markers.remove(markers.size() - 1); 89 | seek(marker); 90 | } 91 | 92 | public void seek(int index) { 93 | p = index; 94 | } 95 | 96 | /** 97 | * 当前是否在推断状态״̬ 98 | * 99 | * @return 是否是推断状态 100 | */ 101 | public boolean isSpeculating() { 102 | return markers.size() > 0; 103 | } 104 | 105 | /** 106 | * 判断在当前位置是否解析过这个规则
107 | * 如果查不到相关记录,表示没有解析过。 如果返回值是FAILED,那么上次解析失败。 108 | * 如果返回值大于等于0,这是词法单元缓冲区的下标,表示上次解析成功。 109 | */ 110 | public boolean alreadyParsedRule(Map memoization) 111 | throws PreviousParseFailedException { 112 | Integer memoI = memoization.get(index()); 113 | if (memoI == null) { 114 | return false; 115 | } 116 | int memo = memoI.intValue(); 117 | System.out.println("parsed list before at index " + index() 118 | + "; ship ahead to token index " + memo + ": " 119 | + lookahead.get(memo).text); 120 | if (memo == FAILED) 121 | throw new PreviousParseFailedException(); 122 | 123 | seek(memo); 124 | return true; 125 | } 126 | 127 | /** 128 | * 回溯时,记录解析的中间结果
129 | * 如果解析失败,记录FAILED;如果解析成功,记录匹配的结束位置。 130 | */ 131 | public void memoize(Map memoization, int startTokenIndex, 132 | boolean failed) { 133 | int stopTokenIndex = failed ? FAILED : index(); 134 | memoization.put(startTokenIndex, stopTokenIndex); 135 | } 136 | 137 | public abstract void clearMemo(); 138 | 139 | /** 140 | * 返回当前输入流的位置 141 | */ 142 | public int index() { 143 | return p; 144 | } 145 | } 146 | --------------------------------------------------------------------------------