├── code ├── chapter6 │ ├── yacc │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── y.tab.h │ │ ├── token.l │ │ └── calculator.y │ ├── antlr │ │ ├── .mvn │ │ │ └── wrapper │ │ │ │ ├── maven-wrapper.jar │ │ │ │ ├── maven-wrapper.properties │ │ │ │ └── MavenWrapperDownloader.java │ │ ├── src │ │ │ ├── main │ │ │ │ ├── java │ │ │ │ │ └── com │ │ │ │ │ │ └── github │ │ │ │ │ │ └── asciidwango │ │ │ │ │ │ └── parser_book │ │ │ │ │ │ └── ch5 │ │ │ │ │ │ └── PetitXML.java │ │ │ │ └── antlr4 │ │ │ │ │ └── com │ │ │ │ │ └── github │ │ │ │ │ └── asciidwango │ │ │ │ │ └── parser_book │ │ │ │ │ └── ch5 │ │ │ │ │ ├── PetitXML.g4 │ │ │ │ │ ├── Expression.g4 │ │ │ │ │ └── LRExpression.g4 │ │ │ └── test │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── github │ │ │ │ └── asciidwango │ │ │ │ └── parser_book │ │ │ │ └── ch5 │ │ │ │ ├── ANTLRExpressionParserTest.java │ │ │ │ ├── ANTLRLRExpressionParserTest.java │ │ │ │ └── ANTLRPetitXMLParserTest.java │ │ ├── pom.xml │ │ └── mvnw.cmd │ ├── jcomb │ │ ├── src │ │ │ ├── main │ │ │ │ └── java │ │ │ │ │ └── com │ │ │ │ │ └── github │ │ │ │ │ └── kmizu │ │ │ │ │ └── jcomb │ │ │ │ │ ├── Pair.java │ │ │ │ │ ├── Result.java │ │ │ │ │ ├── JParser.java │ │ │ │ │ └── JComb.java │ │ │ └── test │ │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── github │ │ │ │ └── kmizu │ │ │ │ └── jcomb │ │ │ │ ├── JCombTest.java │ │ │ │ └── JCombMathematicalExpressionTest.java │ │ └── pom.xml │ └── javacc │ │ ├── src │ │ ├── test │ │ │ └── java │ │ │ │ └── com │ │ │ │ └── github │ │ │ │ └── kmizu │ │ │ │ └── calculator │ │ │ │ └── CalculatorTest.java │ │ └── main │ │ │ └── java │ │ │ └── com │ │ │ └── github │ │ │ └── kmizu │ │ │ └── calculator │ │ │ └── Calculator.jj │ │ └── pom.xml ├── chapter3 │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── src │ │ ├── main │ │ │ └── java │ │ │ │ └── parser │ │ │ │ ├── JsonParser.java │ │ │ │ ├── JsonTokenizer.java │ │ │ │ ├── ParseException.java │ │ │ │ ├── TokenizerException.java │ │ │ │ ├── ParseResult.java │ │ │ │ ├── Pair.java │ │ │ │ ├── Token.java │ │ │ │ ├── Ast.java │ │ │ │ ├── SimpleJsonParser.java │ │ │ │ ├── SimpleJsonTokenizer.java │ │ │ │ └── PegJsonParser.java │ │ └── test │ │ │ └── java │ │ │ └── parser │ │ │ ├── PegJsonParserTest.java │ │ │ ├── SimpleJsonParserTest.java │ │ │ └── SimpleJsonTokzenizerTest.java │ ├── build.gradle │ ├── gradlew.bat │ └── gradlew └── chapter5 │ ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties │ ├── src │ ├── main │ │ └── java │ │ │ └── parser │ │ │ ├── ll1 │ │ │ ├── Grammar.java │ │ │ ├── Rule.java │ │ │ ├── Expression.java │ │ │ ├── Main.java │ │ │ └── LL1Recognizer.java │ │ │ ├── lr0 │ │ │ ├── Grammar.java │ │ │ ├── Rule.java │ │ │ ├── Expression.java │ │ │ ├── LR0Item.java │ │ │ ├── LR0ItemSet.java │ │ │ ├── Main.java │ │ │ └── LR0Recognizer.java │ │ │ ├── slr1 │ │ │ ├── Grammar.java │ │ │ ├── Rule.java │ │ │ ├── Expression.java │ │ │ ├── LR0Item.java │ │ │ ├── LR0ItemSet.java │ │ │ └── Main.java │ │ │ ├── Elements.java │ │ │ ├── Rule.java │ │ │ └── Dyck.java │ └── test │ │ └── java │ │ └── parser │ │ ├── RuleTest.java │ │ ├── slr1 │ │ ├── SLR1ParserFollowTest.java │ │ ├── SLR1ParserSimpleTest.java │ │ └── SLR1ParserTest.java │ │ ├── lr0 │ │ └── LR0RecognizerTest.java │ │ └── ll1 │ │ └── LL1RecognizerTest.java │ ├── build.gradle │ ├── gradlew.bat │ └── gradlew ├── pdf ├── parser_book-20250616.pdf ├── parser_book-20250621.pdf ├── parser_book-20250628.pdf ├── parser_book-20250713.pdf ├── parser_book-20251014.pdf └── parser_book-20251203.pdf ├── PLAN.md ├── .gitignore ├── templates └── minimal_japanese.latex ├── PURPOSE.md ├── contents ├── references.md └── chapter8.md ├── metadata.yaml ├── README.md └── JAPANESE_FONT_SETUP.md /code/chapter6/yacc/.gitignore: -------------------------------------------------------------------------------- 1 | parser 2 | -------------------------------------------------------------------------------- /pdf/parser_book-20250616.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/pdf/parser_book-20250616.pdf -------------------------------------------------------------------------------- /pdf/parser_book-20250621.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/pdf/parser_book-20250621.pdf -------------------------------------------------------------------------------- /pdf/parser_book-20250628.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/pdf/parser_book-20250628.pdf -------------------------------------------------------------------------------- /pdf/parser_book-20250713.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/pdf/parser_book-20250713.pdf -------------------------------------------------------------------------------- /pdf/parser_book-20251014.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/pdf/parser_book-20251014.pdf -------------------------------------------------------------------------------- /pdf/parser_book-20251203.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/pdf/parser_book-20251203.pdf -------------------------------------------------------------------------------- /code/chapter3/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/code/chapter3/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /code/chapter5/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/code/chapter5/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /code/chapter6/antlr/.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kahei/parser_book/HEAD/code/chapter6/antlr/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /code/chapter6/jcomb/src/main/java/com/github/kmizu/jcomb/Pair.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.jcomb; 2 | 3 | public record Pair(A a, B b) {} 4 | -------------------------------------------------------------------------------- /code/chapter6/jcomb/src/main/java/com/github/kmizu/jcomb/Result.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.jcomb; 2 | 3 | record Result(V value, String rest){} 4 | -------------------------------------------------------------------------------- /PLAN.md: -------------------------------------------------------------------------------- 1 | # 執筆計画(最新版) 2 | 3 | - ステータス(2025年06月2日) 4 | - 第1章~第8章までひととおり執筆完了 5 | - 各章をセルフレビューしている 6 | - 2025年6/8(日)中に初稿完成 7 | - 2025年6/9(月)~2025年7/6(日)までレビュー期間 -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/ll1/Grammar.java: -------------------------------------------------------------------------------- 1 | package parser.ll1; 2 | import java.util.List; 3 | 4 | public record Grammar(String start, List rules) {} -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/Grammar.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | import java.util.List; 3 | 4 | public record Grammar(String start, List rules) {} -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/ll1/Rule.java: -------------------------------------------------------------------------------- 1 | package parser.ll1; 2 | 3 | import java.util.List; 4 | 5 | public record Rule(String name, List body) {} -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/Rule.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | import java.util.List; 4 | 5 | public record Rule(String name, List body) {} -------------------------------------------------------------------------------- /code/chapter6/yacc/Makefile: -------------------------------------------------------------------------------- 1 | all: parser lexer 2 | gcc -o parser y.tab.c lex.yy.c 3 | lexer: lex.yy.c 4 | flex token.l 5 | parser: y.tab.c 6 | yacc calculator.y 7 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/slr1/Grammar.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | import java.util.List; 3 | 4 | public record Grammar(String start, List rules) {} 5 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/slr1/Rule.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import java.util.List; 4 | 5 | public record Rule(String name, List body) {} 6 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/JsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | public interface JsonParser { 3 | public ParseResult parse(String input); 4 | } 5 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/JsonTokenizer.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | public interface JsonTokenizer { 4 | boolean moveNext(); 5 | Token current(); 6 | } 7 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/ParseException.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | class ParseException extends RuntimeException { 4 | public ParseException(String message) { 5 | super(message); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /code/chapter6/yacc/y.tab.h: -------------------------------------------------------------------------------- 1 | #ifndef _yy_defines_h_ 2 | #define _yy_defines_h_ 3 | 4 | #define NUM 257 5 | #define EOL 258 6 | #define UMINUS 259 7 | 8 | extern int yylval; 9 | 10 | #endif /* _yy_defines_h_ */ 11 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/TokenizerException.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | public class TokenizerException extends RuntimeException { 4 | public TokenizerException(String message) { 5 | super(message); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /code/chapter6/antlr/.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.1/apache-maven-3.6.1-bin.zip 2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar 3 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/Elements.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | public class Elements { 3 | public sealed interface Element permits NonTerminal, Terminal {} 4 | public record NonTerminal(char name) implements Element {} 5 | public record Terminal(char value) implements Element {} 6 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/ll1/Expression.java: -------------------------------------------------------------------------------- 1 | package parser.ll1; 2 | 3 | public sealed interface Expression permits Expression.NonTerminal, Expression.Terminal { 4 | record NonTerminal(String name) implements Expression {} 5 | record Terminal(String value) implements Expression {} 6 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/Expression.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | public sealed interface Expression permits Expression.NonTerminal, Expression.Terminal { 4 | record NonTerminal(String name) implements Expression {} 5 | record Terminal(String value) implements Expression {} 6 | } -------------------------------------------------------------------------------- /code/chapter3/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue Oct 18 12:03:44 JST 2022 2 | distributionBase=GRADLE_USER_HOME 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.3.1-bin.zip 4 | distributionPath=wrapper/dists 5 | zipStorePath=wrapper/dists 6 | zipStoreBase=GRADLE_USER_HOME 7 | -------------------------------------------------------------------------------- /code/chapter5/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue Oct 18 12:03:44 JST 2022 2 | distributionBase=GRADLE_USER_HOME 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.13-bin.zip 4 | distributionPath=wrapper/dists 5 | zipStorePath=wrapper/dists 6 | zipStoreBase=GRADLE_USER_HOME 7 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/slr1/Expression.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | public sealed interface Expression permits Expression.NonTerminal, Expression.Terminal { 4 | record NonTerminal(String name) implements Expression {} 5 | record Terminal(String value) implements Expression {} 6 | } 7 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/ParseResult.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | public class ParseResult { 3 | public final T value; 4 | public final String next; 5 | public ParseResult(T value, String next) { 6 | this.value = value; 7 | this.next = next; 8 | } 9 | } -------------------------------------------------------------------------------- /code/chapter3/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | id 'idea' 4 | id 'eclipse' 5 | } 6 | 7 | sourceCompatibility = 11 8 | targetCompatibility = 11 9 | 10 | repositories { 11 | mavenCentral() 12 | } 13 | 14 | dependencies { 15 | testImplementation('org.junit.jupiter:junit-jupiter:5.6.2') 16 | } 17 | -------------------------------------------------------------------------------- /code/chapter6/antlr/src/main/java/com/github/asciidwango/parser_book/ch5/PetitXML.java: -------------------------------------------------------------------------------- 1 | package com.github.asciidwango.parser_book.ch5; 2 | 3 | import java.util.List; 4 | 5 | public class PetitXML { 6 | public static record Element(String name, List children) { 7 | public Element(String name) { this(name, List.of()); } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /code/chapter6/yacc/token.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include "y.tab.h" 3 | %} 4 | 5 | %% 6 | [0-9]+ { yylval = atoi(yytext); return NUM; } 7 | [-+*/()] { return yytext[0]; } 8 | [ \t] { /* ignore whitespce */ } 9 | "\r\n" { return EOL; } 10 | "\r" { return EOL; } 11 | "\n" { return EOL; } 12 | . { printf("Invalid character: %s\n", yytext); } 13 | %% 14 | -------------------------------------------------------------------------------- /code/chapter6/jcomb/src/main/java/com/github/kmizu/jcomb/JParser.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.jcomb; 2 | import java.util.function.*; 3 | 4 | interface JParser { 5 | Result parse(String input); 6 | 7 | default JParser map(Function f) { 8 | return (input) -> { 9 | var result = this.parse(input); 10 | if (result == null) return null; 11 | return new Result<>(f.apply(result.value()), result.rest()); 12 | }; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /code/chapter5/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | id 'idea' 4 | id 'eclipse' 5 | } 6 | 7 | java { 8 | sourceCompatibility = JavaVersion.VERSION_21 9 | targetCompatibility = JavaVersion.VERSION_21 10 | } 11 | 12 | repositories { 13 | mavenCentral() 14 | } 15 | 16 | dependencies { 17 | testImplementation('org.junit.jupiter:junit-jupiter:5.6.2') 18 | } 19 | 20 | test { 21 | useJUnitPlatform() 22 | testLogging { 23 | showStandardStreams true 24 | events 'started', 'skipped', 'passed', 'failed' 25 | exceptionFormat 'full' 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Pandoc/LaTeX generated files 2 | build/*.pdf 3 | build/*.html 4 | build/*.tex 5 | build/*.log 6 | build/*.aux 7 | build/*.toc 8 | build/*.out 9 | book.md 10 | pandoc/build/ 11 | pandoc/src/book.md 12 | 13 | # Java/Gradle build files 14 | *.class 15 | target/ 16 | classes/ 17 | .gradle/ 18 | build/ 19 | bin/ 20 | 21 | # C generated files 22 | y.tab.c 23 | y.tab.h 24 | lex.yy.c 25 | a.out 26 | 27 | # IDE files 28 | .idea/ 29 | .metals/ 30 | .vscode/ 31 | .scala-build/ 32 | .claude/ 33 | *.iml 34 | 35 | # OS generated files 36 | .DS_Store 37 | Thumbs.db 38 | *.swp 39 | *~ 40 | 41 | # Claude Code 42 | .claude/ 43 | CLAUDE.md -------------------------------------------------------------------------------- /code/chapter6/antlr/src/main/antlr4/com/github/asciidwango/parser_book/ch5/PetitXML.g4: -------------------------------------------------------------------------------- 1 | grammar PetitXML; 2 | @parser::header { 3 | import static com.github.asciidwango.parser_book.ch5.PetitXML.*; 4 | import java.util.*; 5 | } 6 | 7 | root returns [Element e] 8 | : v=element {$e = $v.e;} 9 | ; 10 | 11 | element returns [Element e] 12 | : ('<' begin=NAME '>' es=elements '' {$begin.text.equals($end.text)}? 13 | {$e = new Element($begin.text, $es.es);}) 14 | | ('<' name=NAME '/>' {$e = new Element($name.text);}) 15 | ; 16 | 17 | elements returns [List es] 18 | : { $es = new ArrayList<>();} (element {$es.add($element.e);})* 19 | ; 20 | 21 | LT: '<'; 22 | GT: '>'; 23 | SLASH: '/'; 24 | NAME: [a-zA-Z_][a-zA-Z0-9]* ; 25 | 26 | WS : [ \t\n\r]+ -> skip ; 27 | -------------------------------------------------------------------------------- /code/chapter6/antlr/src/main/antlr4/com/github/asciidwango/parser_book/ch5/Expression.g4: -------------------------------------------------------------------------------- 1 | grammar Expression; 2 | 3 | expression returns [int e] 4 | : v=additive {$e = $v.e;} 5 | ; 6 | 7 | additive returns [int e = 0;] 8 | : l=multitive {$e = $l.e;} ( 9 | '+' r=multitive {$e = $e + $r.e;} 10 | | '-' r=multitive {$e = $e - $r.e;} 11 | )* 12 | ; 13 | 14 | multitive returns [int e = 0;] 15 | : l=primary {$e = $l.e;} ( 16 | '*' r=primary {$e = $e * $r.e;} 17 | | '/' r=primary {$e = $e / $r.e;} 18 | )* 19 | ; 20 | 21 | primary returns [int e] 22 | : n=NUMBER {$e = Integer.parseInt($n.getText());} 23 | | '(' x=expression ')' {$e = $x.e;} 24 | ; 25 | 26 | LP : '(' ; 27 | RP : ')' ; 28 | NUMBER : INT ; 29 | fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros 30 | WS : [ \t\n\r]+ -> skip ; 31 | -------------------------------------------------------------------------------- /templates/minimal_japanese.latex: -------------------------------------------------------------------------------- 1 | \documentclass[a4paper]{ltjsarticle} 2 | \usepackage{luatexja} 3 | \usepackage{amsmath,amssymb} 4 | \usepackage{hyperref} 5 | \usepackage{graphicx} 6 | \usepackage{listings} 7 | \usepackage{xcolor} 8 | \usepackage{longtable,booktabs,array} 9 | 10 | \providecommand{\tightlist}{% 11 | \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} 12 | 13 | % For code blocks 14 | \usepackage{fancyvrb} 15 | \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}} 16 | 17 | % Highlighting commands 18 | $if(highlighting-macros)$ 19 | $highlighting-macros$ 20 | $endif$ 21 | 22 | $if(title)$ 23 | \title{$title$} 24 | $endif$ 25 | $if(author)$ 26 | \author{$author$} 27 | $endif$ 28 | $if(date)$ 29 | \date{$date$} 30 | $endif$ 31 | 32 | \begin{document} 33 | 34 | $if(title)$ 35 | \maketitle 36 | $endif$ 37 | 38 | $if(toc)$ 39 | \tableofcontents 40 | \newpage 41 | $endif$ 42 | 43 | $body$ 44 | 45 | \end{document} -------------------------------------------------------------------------------- /code/chapter6/antlr/src/main/antlr4/com/github/asciidwango/parser_book/ch5/LRExpression.g4: -------------------------------------------------------------------------------- 1 | grammar LRExpression; 2 | 3 | expression returns [int e] 4 | : v=additive {$e = $v.e;} 5 | ; 6 | 7 | additive returns [int e] 8 | : l=additive op='+' r=multitive {$e = $l.e + $r.e;} 9 | | l=additive op='-' r=multitive {$e = $l.e - $r.e;} 10 | | v=multitive {$e = $v.e;} 11 | ; 12 | 13 | multitive returns [int e] 14 | : l=multitive op='*' r=primary {$e = $l.e * $r.e;} 15 | | l=multitive op='/' r=primary {$e = $l.e / $r.e;} 16 | | v=primary {$e = $v.e;} 17 | ; 18 | 19 | primary returns [int e] 20 | : n=NUMBER {$e = Integer.parseInt($n.getText());} 21 | | '(' x=expression ')' {$e = $x.e;} 22 | ; 23 | 24 | LP 25 | : '(' 26 | ; 27 | 28 | RP : ')' 29 | ; 30 | 31 | NUMBER 32 | : INT 33 | ; 34 | 35 | fragment INT : '0' | [1-9] [0-9]* ; // no leading zeros 36 | 37 | WS : [ \t\n\r]+ -> skip ; 38 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/Pair.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.Objects; 4 | 5 | public class Pair { 6 | public final T fst; 7 | public final U snd; 8 | public Pair(T fst, U snd) { 9 | this.fst = fst; 10 | this.snd = snd; 11 | } 12 | 13 | public static Pair of(T fst, U snd) { 14 | return new Pair<>(fst, snd); 15 | } 16 | 17 | @Override 18 | public boolean equals(Object o) { 19 | if (this == o) return true; 20 | if (o == null || getClass() != o.getClass()) return false; 21 | Pair pair = (Pair) o; 22 | return Objects.equals(fst, pair.fst) && 23 | Objects.equals(snd, pair.snd); 24 | } 25 | 26 | @Override 27 | public int hashCode() { 28 | return Objects.hash(fst, snd); 29 | } 30 | 31 | @Override 32 | public String toString() { 33 | return "(" + fst + ", " + snd + ")"; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/Rule.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | import java.util.*; 3 | 4 | public record Rule(char lhs, List rhs) { 5 | private static List toElements(String string) { 6 | List elements = new ArrayList<>(); 7 | for(int i = 0; i < string.length(); i++) { 8 | elements.add(new Elements.Terminal(string.charAt(i))); 9 | } 10 | return elements; 11 | } 12 | public Rule(char lhs, String rhs) { 13 | this(lhs, toElements(rhs)); 14 | } 15 | public boolean matches(String sequence) { 16 | return matches(toElements(sequence)); 17 | } 18 | public boolean matches(List sequence) { 19 | for(int i = 1; i <= rhs.size(); i++) { 20 | var a = rhs.get(rhs.size() - i); 21 | if(sequence.size() - i < 0) return false; 22 | var b = sequence.get(sequence.size() - i); 23 | if(!a.equals(b)) return false; 24 | } 25 | return true; 26 | } 27 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/LR0Item.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | import java.util.Objects; 4 | 5 | public record LR0Item(Rule rule, int dotPosition) { 6 | public LR0Item { 7 | Objects.requireNonNull(rule); 8 | if (dotPosition < 0 || dotPosition > rule.body().size()) { 9 | throw new IllegalArgumentException("Invalid dot position"); 10 | } 11 | } 12 | 13 | public Expression nextSymbol() { 14 | return dotPosition < rule.body().size() ? rule.body().get(dotPosition) : null; 15 | } 16 | 17 | public LR0Item advance() { 18 | return new LR0Item(rule, dotPosition + 1); 19 | } 20 | 21 | @Override 22 | public String toString() { 23 | StringBuilder sb = new StringBuilder(); 24 | sb.append(rule.name()).append(" -> "); 25 | for (int i = 0; i < rule.body().size(); i++) { 26 | if (i == dotPosition) sb.append("•"); 27 | sb.append(rule.body().get(i)).append(" "); 28 | } 29 | if (dotPosition == rule.body().size()) sb.append("•"); 30 | return sb.toString(); 31 | } 32 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/slr1/LR0Item.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import java.util.Objects; 4 | 5 | public record LR0Item(Rule rule, int dotPosition) { 6 | public LR0Item { 7 | Objects.requireNonNull(rule); 8 | if (dotPosition < 0 || dotPosition > rule.body().size()) { 9 | throw new IllegalArgumentException("Invalid dot position"); 10 | } 11 | } 12 | 13 | public Expression nextSymbol() { 14 | return dotPosition < rule.body().size() ? rule.body().get(dotPosition) : null; 15 | } 16 | 17 | public LR0Item advance() { 18 | return new LR0Item(rule, dotPosition + 1); 19 | } 20 | 21 | @Override 22 | public String toString() { 23 | StringBuilder sb = new StringBuilder(); 24 | sb.append(rule.name()).append(" -> "); 25 | for (int i = 0; i < rule.body().size(); i++) { 26 | if (i == dotPosition) sb.append("•"); 27 | sb.append(rule.body().get(i)).append(" "); 28 | } 29 | if (dotPosition == rule.body().size()) sb.append("•"); 30 | return sb.toString(); 31 | } 32 | } -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/Token.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.Objects; 4 | 5 | public class Token { 6 | public final Type type; 7 | public final Object value; 8 | public enum Type { 9 | STRING, INTEGER, NULL, TRUE, FALSE, 10 | LPAREN, RPAREN, LBRACE, RBRACE, LBRACKET, RBRACKET, COMMA, COLON, SPACE, EOF 11 | } 12 | 13 | public Token(Type type, Object value) { 14 | this.type = type; 15 | this.value = value; 16 | } 17 | 18 | @Override 19 | public String toString() { 20 | return "Token{" + 21 | "type=" + type + 22 | ", value=" + value + 23 | '}'; 24 | } 25 | 26 | @Override 27 | public boolean equals(Object o) { 28 | if (this == o) return true; 29 | if (o == null || getClass() != o.getClass()) return false; 30 | Token token = (Token) o; 31 | return type == token.type && 32 | Objects.equals(value, token.value); 33 | } 34 | 35 | @Override 36 | public int hashCode() { 37 | return Objects.hash(type, value); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /code/chapter6/javacc/src/test/java/com/github/kmizu/calculator/CalculatorTest.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.calculator; 2 | 3 | import jdk.jfr.Description; 4 | import org.junit.jupiter.api.Test; 5 | import com.github.kmizu.calculator.Calculator; 6 | import java.io.*; 7 | 8 | import static org.junit.jupiter.api.Assertions.assertEquals; 9 | 10 | public class CalculatorTest { 11 | @Test 12 | @Description("1 + 2 * 3 = 7") 13 | public void test1() throws Exception { 14 | Calculator calculator = new Calculator(new StringReader("1 + 2 * 3")); 15 | assertEquals(7, calculator.expression()); 16 | } 17 | 18 | @Test 19 | @Description("(1 + 2) * 4 = 12") 20 | public void test2() throws Exception { 21 | Calculator calculator = new Calculator(new StringReader("(1 + 2) * 4")); 22 | assertEquals(12, calculator.expression()); 23 | } 24 | 25 | @Test 26 | @Description("(5 * 6) - (3 + 4) = 23") 27 | public void test3() throws Exception { 28 | Calculator calculator = new Calculator(new StringReader("(5 * 6) - (3 + 4)")); 29 | assertEquals(23, calculator.expression()); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /code/chapter6/yacc/calculator.y: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | int yylex(void); 4 | void yyerror(char const *s); 5 | int yywrap(void) {return 1;} 6 | extern int yylval; 7 | %} 8 | 9 | %token NUM 10 | %token EOL 11 | %left '+' '-' 12 | %left '*' '/' 13 | %nonassoc UMINUS 14 | 15 | %% 16 | 17 | input : expr EOL 18 | { 19 | printf("Result: %d\n", $1); 20 | } 21 | ; 22 | 23 | expr : NUM 24 | { 25 | $$ = $1; 26 | } 27 | | expr '+' expr 28 | { 29 | $$ = $1 + $3; 30 | } 31 | | expr '-' expr 32 | { 33 | $$ = $1 - $3; 34 | } 35 | | expr '*' expr 36 | { 37 | $$ = $1 * $3; 38 | } 39 | | expr '/' expr 40 | { 41 | if ($3 == 0) 42 | { 43 | yyerror("Cannot divide by zero."); 44 | } 45 | else 46 | { 47 | $$ = $1 / $3; 48 | } 49 | } 50 | | '(' expr ')' 51 | { 52 | $$ = $2; 53 | } 54 | | '-' expr %prec UMINUS 55 | { 56 | $$ = -$2; 57 | } 58 | ; 59 | 60 | %% 61 | 62 | void yyerror(char const *s) 63 | { 64 | fprintf(stderr, "Parse error: %s\n", s); 65 | } 66 | 67 | int main() 68 | { 69 | yyparse(); 70 | } 71 | 72 | -------------------------------------------------------------------------------- /code/chapter6/jcomb/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.github.kmizu 5 | jcomb 6 | jar 7 | 0.10-SNAPSHOT 8 | jcomb 9 | https://github.com/asciidwango/parser_book 10 | 11 | 17 12 | 17 13 | 14 | 15 | 16 | org.junit.jupiter 17 | junit-jupiter-api 18 | 5.8.2 19 | test 20 | 21 | 22 | org.junit.jupiter 23 | junit-jupiter-engine 24 | 5.8.2 25 | test 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /code/chapter5/src/test/java/parser/RuleTest.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import org.junit.jupiter.api.Test; 5 | 6 | import java.util.List; 7 | 8 | public class RuleTest { 9 | @Test 10 | public void testEmptyRuleShouldMatchEmptySequence() { 11 | var r = new Rule('A', ""); 12 | assertTrue(r.matches("")); 13 | } 14 | 15 | @Test 16 | public void testRegularRuleShouldMatchCorrectSequence() { 17 | var r = new Rule('A', "abc"); 18 | assertTrue(r.matches("abc")); 19 | } 20 | 21 | @Test 22 | public void testRegularRuleShouldNotMatchIncorrectSequence() { 23 | var r = new Rule('A', "abc"); 24 | assertFalse(r.matches("abd")); 25 | } 26 | 27 | @Test 28 | public void testRegularRuleShouldNotMatchShorterSequence() { 29 | var r = new Rule('A', "abc"); 30 | assertFalse(r.matches("ab")); 31 | } 32 | 33 | @Test 34 | public void testRegularRuleShouldMatchLongerSuffix() { 35 | var r = new Rule('A', "abc"); 36 | assertTrue(r.matches("dabc")); 37 | } 38 | 39 | @Test 40 | public void testRegularRuleShouldNotMatchPrefix() { 41 | var r = new Rule('A', "abc"); 42 | assertFalse(r.matches("abcd")); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /code/chapter6/javacc/src/main/java/com/github/kmizu/calculator/Calculator.jj: -------------------------------------------------------------------------------- 1 | options { 2 | STATIC = false; 3 | JDK_VERSION = "17"; 4 | } 5 | 6 | PARSER_BEGIN(Calculator) 7 | package com.github.kmizu.calculator; 8 | public class Calculator { 9 | public static void main(String[] args) throws ParseException { 10 | Calculator parser = new Calculator(System.in); 11 | parser.start(); 12 | } 13 | } 14 | PARSER_END(Calculator) 15 | 16 | SKIP : { " " | "\t" | "\r" | "\n" } 17 | TOKEN : { 18 | 19 | | 20 | | 21 | | 22 | | 23 | | 24 | | 25 | } 26 | o 27 | public int expression() : 28 | {int r = 0;} 29 | { 30 | r=add() { return r; } 31 | } 32 | 33 | public int add() : 34 | {int r = 0; int v = 0;} 35 | { 36 | r=mult() ( v=mult() { r += v; }| v=mult() { r -= v; })* { 37 | return r; 38 | } 39 | } 40 | 41 | 42 | public int mult() : 43 | {int r = 0; int v = 0;} 44 | { 45 | r=primary() ( v=primary() { r *= v; }| r=primary() { r /= v; })* { 46 | return r; 47 | } 48 | } 49 | 50 | public int primary() : 51 | {int r = 0; Token t = null;} 52 | { 53 | ( 54 | r=expression() 55 | | t= { r = Integer.parseInt(t.image); } 56 | ) { return r; } 57 | } 58 | -------------------------------------------------------------------------------- /PURPOSE.md: -------------------------------------------------------------------------------- 1 | これまで、プログラミング言語処理系(インタプリタやコンパイラ)を作ることについてはさまざまな本が出版されてきた。とくに有名なものは 2 | 3 | * 通称ドラゴンブック(邦題:コンパイラ―原理・技法・ツール) 4 | * 通称タイガーブック(邦題:最新コンパイラ構成技法) 5 | 6 | この2冊だと言える。他にも、コンパイラで有名な中田育男先生やアカデミアで有名な専門家が書いたコンパイラの本は少なくない。 7 | 8 | 一方で、コンパイラやインタプリタの一部となるパーザ(構文解析器)については、非常に文献が少ない。上記、ドラゴンブックやタイガーブックにて、アルゴリズムについて一部触れられているものの、それ以外は言語作成の過程として多少の説明をする程度である。英語の本として、構文解析というテーマについて網羅的に触れられているのは、[Parsing Techniques 2nd Edition](http://dickgrune.com/Books/PTAPG_2nd_Edition/)がほぼ唯一のものであると思われる。そのParsing Techniquesにしても構文解析を専門としている人向けの解説書であって、構文解析の初学者向けの本ではない。 9 | 10 | これは、構文解析が、1980年代以降、「解決した問題」と思われていたことが一因ではないかと思われる。しかし、Bryan Fordが発表したPackrat Parsing(2002) 11 | やその理論的基礎をなすParsing Expression Grammar(2004)、トップダウン型構文解析の新たな地平を切り開いたLL(*)(2010)、Adaptive LL(*)(2014)、GLL Parsing(2010)など、新しい構文解析アルゴリズムは近年も登場しており、構文解析は「解決した問題」とはいえない。 12 | 13 | さらに、IDEの発展などに伴い、ユーザーフレンドリーなエラーメッセージの提示という観点から構文解析という領域は再び注目を集めているとも言える。 14 | 15 | 今ここで、再び構文解析という世界に光をあて 16 | 17 | * 非自然言語の構文解析とはどのような作業なのか 18 | * 構文解析にはどのようなアルゴリズムがあるのか 19 | * 各構文解析アルゴリズムアルゴリズムの特徴と制限 20 | * 決定的な下向き構文解析(LLファミリー) 21 | * 決定的な上向き構文解析(LRファミリー) 22 | * PEG 23 | * 構文解析器生成系について 24 | * 現実の構文解析において総合する問題 25 | 26 | といったトピックを扱う本を出版したいと考えている。本書を読むことを通じて、読者は 27 | 28 | * 自分が行いたい作業にあった構文解析アルゴリズムを適切に選ぶことができる 29 | * 必要に応じて、構文解析アルゴリズムを使い分けることができる 30 | * 手書きパーザー 31 | * パーザージェネレーター 32 | * パーザコンビネーター 33 | * より柔軟な文法を組み立てることができる 34 | 35 | ようになることが期待される。 36 | -------------------------------------------------------------------------------- /contents/references.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # 参考文献 4 | 5 | ## 日本語書籍 6 | 7 | - A.V.エイホ、R.セシィ、M.S.ラム、J.D.ウルマン,『コンパイラ - 原理・技法・ツール』(第2版) サイエンス社、2009年 8 | - 新屋良磨、鈴木勇介、高田謙、正規表現技術入門―⁠―最新エンジン実装と理論的背景、技術評論社、2015年 9 | 10 | ## 英語書籍 11 | 12 | - Andrew W. Appel『Modern Compiler Implementation in ML』Cambridge University Press, 2008年 13 | - Terence Parr『Language Implementation Patterns』Pragmatic Bookshelf, 2009年 14 | - Terence Parr『The Definitive ANTLR 4 Reference』Pragmatic Bookshelf, 2013年 15 | 16 | ## 重要論文 17 | 18 | - Donald E. Knuth「On the Translation of Languages from Left to Right」Information and Control, Vol. 8, No. 6, pp. 607-639, 1965年 19 | - Frank DeRemer「Simple LR(k) grammars」Communications of the ACM, Vol. 14, No. 7, pp. 453-460, 1971年 20 | - Bryan Ford「Parsing expression grammars: a recognition-based syntactic foundation」POPL '04, pp. 111-122, 2004年 21 | - Terence Parr, Kathleen Fisher「LL(*): The Foundation of the ANTLR Parser Generator」PLDI '11, pp. 425-436, 2011年 22 | 23 | ## 仕様書・標準 24 | 25 | - [ECMA-404 The JSON data interchange syntax](https://ecma-international.org/publications-and-standards/standards/ecma-404/) 26 | - [ISO/IEC 14977:1996 EBNF](https://www.iso.org/standard/26153.html) 27 | 28 | ## Webリソース 29 | 30 | - [Crafting Interpreters by Robert Nystrom](https://craftinginterpreters.com/) 31 | - [Let's Build a Compiler by Jack Crenshaw](https://compilers.iecc.com/crenshaw/) 32 | - [Writing An Interpreter In Go by Thorsten Ball](https://interpreterbook.com/) -------------------------------------------------------------------------------- /metadata.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | title: "構文解析のしくみ" 3 | author: "水島宏太" 4 | date: "2025年6月" 5 | publisher: "ASCII Dwango" 6 | documentclass: ltjsbook 7 | geometry: 8 | - margin=2cm 9 | lang: ja 10 | mainfont: "Latin Modern Roman" 11 | sansfont: "Latin Modern Sans" 12 | monofont: "Latin Modern Mono" 13 | CJKmainfont: "Noto Serif CJK JP" 14 | CJKsansfont: "Noto Sans CJK JP" 15 | CJKmonofont: "Noto Sans Mono CJK JP" 16 | classoption: 17 | - oneside 18 | - openany 19 | fontsize: 10pt 20 | linestretch: 1.2 21 | toc: true 22 | toc-depth: 3 23 | numbersections: true 24 | secnumdepth: 3 25 | highlight-style: tango 26 | codeBlockCaptions: true 27 | linkReferences: true 28 | nameInLink: true 29 | pdf-engine: lualatex 30 | pdf-engine-opts: 31 | - --shell-escape 32 | 33 | # カスタム設定 34 | header-includes: | 35 | \usepackage{luatexja-fontspec} 36 | \usepackage{fancyhdr} 37 | \usepackage{graphicx} 38 | \usepackage{float} 39 | \usepackage{longtable} 40 | \usepackage{booktabs} 41 | \usepackage{array} 42 | \usepackage{multirow} 43 | \usepackage{wrapfig} 44 | \usepackage{colortbl} 45 | \usepackage{pdflscape} 46 | \usepackage{tabu} 47 | \usepackage{threeparttable} 48 | \usepackage{threeparttablex} 49 | \usepackage[normalem]{ulem} 50 | \usepackage{makecell} 51 | \usepackage{xcolor} 52 | \usepackage{listings} 53 | \usepackage{tikz} 54 | \usetikzlibrary{shapes,arrows,positioning,calc,automata,chains,fit,decorations.pathmorphing} 55 | \pagestyle{fancy} 56 | \fancyhf{} 57 | \fancyhead[C]{\thepage} 58 | \renewcommand{\headrulewidth}{0pt} 59 | --- -------------------------------------------------------------------------------- /code/chapter6/antlr/src/test/java/com/github/asciidwango/parser_book/ch5/ANTLRExpressionParserTest.java: -------------------------------------------------------------------------------- 1 | package com.github.asciidwango.parser_book.ch5; 2 | 3 | 4 | import org.antlr.v4.runtime.ANTLRInputStream; 5 | import org.antlr.v4.runtime.CommonTokenStream; 6 | import org.junit.jupiter.api.DisplayName; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.StringReader; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | 13 | public class ANTLRExpressionParserTest { 14 | private static CommonTokenStream streamFrom(String input) throws Exception { 15 | var antlrStream = new ANTLRInputStream(new StringReader(input)); 16 | var lexer = new ExpressionLexer(antlrStream); 17 | return new CommonTokenStream(lexer); 18 | } 19 | @DisplayName("ANTLRで1をパースした結果が1になることをテストする") 20 | @Test 21 | public void test1() throws Exception { 22 | var parser = new ExpressionParser(streamFrom("1")); 23 | assertEquals(1, parser.expression().e); 24 | } 25 | 26 | @DisplayName("ANTLRで(1 + 2) * 3をパースした結果が9になることをテストする") 27 | @Test 28 | public void test2() throws Exception { 29 | var parser = new ExpressionParser(streamFrom("(1 + 2) * 3")); 30 | assertEquals(9, parser.expression().e); 31 | } 32 | 33 | @DisplayName("ANTLRで(1 + 2) * 4 / 3をパースした結果が4になることをテストする") 34 | @Test 35 | public void test3() throws Exception { 36 | var parser = new ExpressionParser(streamFrom("(1 + 2) * 4 / 3")); 37 | assertEquals(4, parser.expression().e); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /code/chapter6/antlr/src/test/java/com/github/asciidwango/parser_book/ch5/ANTLRLRExpressionParserTest.java: -------------------------------------------------------------------------------- 1 | package com.github.asciidwango.parser_book.ch5; 2 | 3 | 4 | import org.antlr.v4.runtime.ANTLRInputStream; 5 | import org.antlr.v4.runtime.CommonTokenStream; 6 | import org.junit.jupiter.api.DisplayName; 7 | import org.junit.jupiter.api.Test; 8 | 9 | import java.io.StringReader; 10 | 11 | import static org.junit.jupiter.api.Assertions.assertEquals; 12 | 13 | public class ANTLRLRExpressionParserTest { 14 | private static CommonTokenStream streamFrom(String input) throws Exception { 15 | var antlrStream = new ANTLRInputStream(new StringReader(input)); 16 | var lexer = new ExpressionLexer(antlrStream); 17 | return new CommonTokenStream(lexer); 18 | } 19 | @DisplayName("ANTLRで1をパースした結果が1になることをテストする") 20 | @Test 21 | public void test1() throws Exception { 22 | var parser = new LRExpressionParser(streamFrom("1")); 23 | assertEquals(1, parser.expression().e); 24 | } 25 | 26 | @DisplayName("ANTLRで(1 + 2) * 3をパースした結果が9になることをテストする") 27 | @Test 28 | public void test2() throws Exception { 29 | var parser = new LRExpressionParser(streamFrom("(1 + 2) * 3")); 30 | assertEquals(9, parser.expression().e); 31 | } 32 | 33 | @DisplayName("ANTLRで(1 + 2) * 4 / 3をパースした結果が4になることをテストする") 34 | @Test 35 | public void test3() throws Exception { 36 | var parser = new LRExpressionParser(streamFrom("(1 + 2) * 4 / 3")); 37 | assertEquals(4, parser.expression().e); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /code/chapter6/antlr/src/test/java/com/github/asciidwango/parser_book/ch5/ANTLRPetitXMLParserTest.java: -------------------------------------------------------------------------------- 1 | package com.github.asciidwango.parser_book.ch5; 2 | 3 | import org.antlr.v4.runtime.CommonTokenStream; 4 | import org.junit.jupiter.api.DisplayName; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import static org.junit.jupiter.api.Assertions.assertEquals; 8 | 9 | public class ANTLRPetitXMLParserTest { 10 | private static CommonTokenStream streamFrom(String input) throws Exception { 11 | var antlrStream = new org.antlr.v4.runtime.ANTLRInputStream(new java.io.StringReader(input)); 12 | var lexer = new PetitXMLLexer(antlrStream); 13 | return new CommonTokenStream(lexer); 14 | } 15 | 16 | @DisplayName("ANTLRでをパースした結果がElement(e)になることをテストする") 17 | @Test 18 | public void test1() throws Exception { 19 | var parser = new PetitXMLParser(streamFrom("")); 20 | assertEquals(new PetitXML.Element("e"), parser.root().e); 21 | } 22 | 23 | @DisplayName("ANTLRでをパースした結果がElement(e)になることをテストする") 24 | @Test 25 | public void test2() throws Exception { 26 | var parser = new PetitXMLParser(streamFrom("")); 27 | assertEquals(new PetitXML.Element("e"), parser.root().e); 28 | } 29 | 30 | @DisplayName("ANTLRでをパースした結果がElement(e, Element(f))になることをテストする") 31 | @Test 32 | public void test3() throws Exception { 33 | var parser = new PetitXMLParser(streamFrom("")); 34 | assertEquals(new PetitXML.Element("e", java.util.List.of(new PetitXML.Element("f"))), parser.root().e); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /code/chapter5/src/test/java/parser/slr1/SLR1ParserFollowTest.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import static java.util.List.of; 6 | import static org.junit.jupiter.api.Assertions.*; 7 | 8 | class SLR1ParserFollowTest { 9 | 10 | @Test 11 | void testSimpleFollowSets() { 12 | // E -> E + T | T 13 | // T -> T * id | id 14 | var grammar = new Grammar("E", of( 15 | new Rule("E", of( 16 | new Expression.NonTerminal("E"), 17 | new Expression.Terminal("+"), 18 | new Expression.NonTerminal("T") 19 | )), 20 | new Rule("E", of(new Expression.NonTerminal("T"))), 21 | new Rule("T", of( 22 | new Expression.NonTerminal("T"), 23 | new Expression.Terminal("*"), 24 | new Expression.Terminal("id") 25 | )), 26 | new Rule("T", of(new Expression.Terminal("id"))) 27 | )); 28 | 29 | var parser = new SLR1Parser(grammar); 30 | 31 | // FOLLOWセットをデバッグ出力 32 | System.out.println("\n=== FOLLOW Sets Debug ==="); 33 | var followSets = parser.getFollowSets(); // このメソッドを追加する必要がある 34 | for (var entry : followSets.entrySet()) { 35 | System.out.println("FOLLOW(" + entry.getKey() + ") = " + entry.getValue()); 36 | } 37 | 38 | // "id * id + id" を認識できるはず 39 | assertTrue(parser.parse(of( 40 | new Expression.Terminal("id"), 41 | new Expression.Terminal("*"), 42 | new Expression.Terminal("id"), 43 | new Expression.Terminal("+"), 44 | new Expression.Terminal("id") 45 | ))); 46 | } 47 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/Dyck.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Optional; 6 | import parser.Elements.*; 7 | 8 | public class Dyck { 9 | private String input; 10 | private int position; 11 | private final List rules; 12 | 13 | private List symbols; 14 | 15 | public Dyck() { 16 | this.rules = List.of( 17 | new Rule('D', List.of(new Terminal('$'), new NonTerminal('P'), new Terminal('$'))), 18 | new Rule('P', "()"), 19 | new Rule('P', List.of(new Terminal('('), new NonTerminal('P'), new Terminal(')'))) 20 | ); 21 | } 22 | 23 | private Optional findRule() { 24 | return rules.stream().filter(r -> r.matches(symbols)).findFirst(); 25 | } 26 | 27 | private void shift() { 28 | symbols.add(new Elements.Terminal(input.charAt(position))); 29 | position++; 30 | } 31 | 32 | private void reduce(Rule rule) { 33 | for(int i = 1; i <= rule.rhs().size(); i++) { 34 | symbols.remove(symbols.size() - 1); 35 | } 36 | symbols.add(new Elements.NonTerminal(rule.lhs())); 37 | } 38 | 39 | public boolean parse(String source) { 40 | this.input = "$" + source + "$"; 41 | this.position = 0; 42 | this.symbols = new ArrayList<>(); 43 | 44 | while(true) { 45 | var optRule = findRule(); 46 | if(optRule.isPresent()) { 47 | reduce(optRule.get()); 48 | if(symbols.size() == 1) return true; 49 | } else { 50 | if(position >= input.length()) return false; 51 | shift(); 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/LR0ItemSet.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | import java.util.HashSet; 4 | import java.util.Objects; 5 | import java.util.Set; 6 | 7 | public class LR0ItemSet { 8 | private final Set items; 9 | 10 | public LR0ItemSet(Set items) { 11 | this.items = new HashSet<>(Objects.requireNonNull(items)); 12 | } 13 | 14 | public Set items() { 15 | return items; 16 | } 17 | 18 | public LR0ItemSet closure(Grammar grammar) { 19 | Set closure = new HashSet<>(items); 20 | boolean added; 21 | do { 22 | added = false; 23 | Set newItems = new HashSet<>(); // 新しいアイテムを一時的に保持 24 | for (LR0Item item : closure) { 25 | Expression next = item.nextSymbol(); 26 | if (next instanceof Expression.NonTerminal nt) { 27 | for (Rule rule : grammar.rules()) { 28 | if (rule.name().equals(nt.name())) { 29 | LR0Item newItem = new LR0Item(rule, 0); 30 | if (!closure.contains(newItem)) { // 追加前にチェック 31 | newItems.add(newItem); // 一時セットに追加 32 | added = true; 33 | } 34 | } 35 | } 36 | } 37 | } 38 | closure.addAll(newItems); // 一時セットのアイテムをまとめて追加 39 | } while (added); 40 | return new LR0ItemSet(closure); 41 | } 42 | 43 | @Override 44 | public boolean equals(Object o) { 45 | if (this == o) return true; 46 | if (o == null || getClass() != o.getClass()) return false; 47 | LR0ItemSet that = (LR0ItemSet) o; 48 | return items.equals(that.items); 49 | } 50 | 51 | @Override 52 | public int hashCode() { 53 | return Objects.hash(items); 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | StringBuilder sb = new StringBuilder("{\n"); 59 | for (LR0Item item : items) { 60 | sb.append(" ").append(item).append("\n"); 61 | } 62 | sb.append("}"); 63 | return sb.toString(); 64 | } 65 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/slr1/LR0ItemSet.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import java.util.HashSet; 4 | import java.util.Objects; 5 | import java.util.Set; 6 | 7 | public class LR0ItemSet { 8 | private final Set items; 9 | 10 | public LR0ItemSet(Set items) { 11 | this.items = new HashSet<>(Objects.requireNonNull(items)); 12 | } 13 | 14 | public Set items() { 15 | return items; 16 | } 17 | 18 | public LR0ItemSet closure(Grammar grammar) { 19 | Set closure = new HashSet<>(items); 20 | boolean added; 21 | do { 22 | added = false; 23 | Set newItems = new HashSet<>(); // 新しいアイテムを一時的に保持 24 | for (LR0Item item : closure) { 25 | Expression next = item.nextSymbol(); 26 | if (next instanceof Expression.NonTerminal nt) { 27 | for (Rule rule : grammar.rules()) { 28 | if (rule.name().equals(nt.name())) { 29 | LR0Item newItem = new LR0Item(rule, 0); 30 | if (!closure.contains(newItem)) { // 追加前にチェック 31 | newItems.add(newItem); // 一時セットに追加 32 | added = true; 33 | } 34 | } 35 | } 36 | } 37 | } 38 | closure.addAll(newItems); // 一時セットのアイテムをまとめて追加 39 | } while (added); 40 | return new LR0ItemSet(closure); 41 | } 42 | 43 | @Override 44 | public boolean equals(Object o) { 45 | if (this == o) return true; 46 | if (o == null || getClass() != o.getClass()) return false; 47 | LR0ItemSet that = (LR0ItemSet) o; 48 | return items.equals(that.items); 49 | } 50 | 51 | @Override 52 | public int hashCode() { 53 | return Objects.hash(items); 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | StringBuilder sb = new StringBuilder("{\n"); 59 | for (LR0Item item : items) { 60 | sb.append(" ").append(item).append("\n"); 61 | } 62 | sb.append("}"); 63 | return sb.toString(); 64 | } 65 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/slr1/Main.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import static java.util.List.of; 6 | import static parser.slr1.Expression.*; 7 | 8 | public class Main { 9 | private static List toExpressions(String input) { 10 | List result = new ArrayList<>(); 11 | for(int i = 0; i < input.length(); i++) { 12 | result.add(new Expression.Terminal(input.substring(i, i + 1))); 13 | } 14 | return result; 15 | } 16 | public static void main(String[] args) { 17 | Grammar grammar = new Grammar("E", List.of( 18 | new Rule("E", of(new NonTerminal("E"), new Terminal("+"), new NonTerminal("T"))), 19 | new Rule("E", of(new NonTerminal("T"))), 20 | new Rule("T", of(new NonTerminal("T"), new Terminal("*"), new NonTerminal("F"))), 21 | new Rule("T", of(new NonTerminal("F"))), 22 | new Rule("F", of(new Terminal("("), new NonTerminal("E"), new Terminal(")"))), 23 | new Rule("F", of(new Terminal("x"))) 24 | )); 25 | /* 26 | Grammar grammar = new Grammar( 27 | "E", 28 | List.of( 29 | // E := T E 30 | new Rule("E", of(new NonTerminal("T"), new NonTerminal("E'"))), 31 | new Rule("E'", of(new Terminal("+"), new NonTerminal("T"), new NonTerminal("E'"))), 32 | new Rule("E'", of()), // ε (空生成) 33 | new Rule("T", of(new NonTerminal("F"), new NonTerminal("T'"))), 34 | new Rule("T'", of(new Terminal("*"), new NonTerminal("F"), new NonTerminal("T'"))), 35 | new Rule("T'", of()), // ε (空生成) 36 | new Rule("F", of(new Terminal("("), new NonTerminal("E"), new Terminal(")"))), 37 | new Rule("F", of(new Terminal("x"))) 38 | ) 39 | ); 40 | */ 41 | 42 | SLR1Parser parser = new SLR1Parser(grammar); 43 | boolean result = parser.parse(toExpressions("((x+(x*x)))")); 44 | 45 | System.out.println("Parsing result: " + result); 46 | } 47 | } -------------------------------------------------------------------------------- /code/chapter6/jcomb/src/test/java/com/github/kmizu/jcomb/JCombTest.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.jcomb; 2 | 3 | import jdk.jfr.Description; 4 | import org.junit.jupiter.api.Test; 5 | import java.io.*; 6 | 7 | import static org.junit.jupiter.api.Assertions.assertEquals; 8 | import static com.github.kmizu.jcomb.JComb.*; 9 | 10 | public class JCombTest { 11 | @Test 12 | @Description("f") 13 | public void testEmpty() throws Exception { 14 | JParser foo = string(""); 15 | assertEquals(new Result<>("", "f" + 16 | ""), foo.parse("f")); // Result("f", "") 17 | } 18 | @Test 19 | @Description("f") 20 | public void testF() throws Exception { 21 | JParser foo = string("f"); 22 | assertEquals(new Result<>("f", ""), foo.parse("f")); // Result("f", "") 23 | } 24 | @Test 25 | @Description("foo") 26 | public void testFoo() throws Exception { 27 | JParser foo = string("foo"); 28 | assertEquals(new Result<>("foo", "_bar"), foo.parse("foo_bar")); // Result("foo", "_bar") 29 | } 30 | 31 | @Test 32 | @Description("a / b") 33 | public void testAltAB() throws Exception { 34 | JParser a = string("a"); 35 | JParser b = string("b"); 36 | JParser ab = alt(a, b); 37 | assertEquals(new Result<>("a", ""), ab.parse("a")); // Result("a", "") 38 | assertEquals(new Result<>("b", ""), ab.parse("b")); // Result("b", "") 39 | } 40 | 41 | @Test 42 | @Description("a b") 43 | public void testSeqAB() throws Exception { 44 | JParser> ab = seq(string("a"), string("b")); 45 | assertEquals(new Result<>(new Pair<>("a", "b"), ""), ab.parse("ab")); // Result>(new Pair<>("a", "b"), "") 46 | } 47 | 48 | @Test 49 | @Description("1000") 50 | public void testRegex() throws Exception { 51 | JParser regex = regex("[0-9]+"); 52 | assertEquals(new Result<>("1000", ""), regex.parse("1000")); // Result("1000", "") 53 | } 54 | 55 | @Test 56 | @Description("1000") 57 | public void testMap() throws Exception { 58 | JParser regex = regex("[0-9]+").map(Integer::parseInt); 59 | assertEquals(new Result<>(1000, ""), regex.parse("1000")); // Result(1000, "") 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /code/chapter6/javacc/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.github.kmizu 5 | calculator 6 | jar 7 | 0.10-SNAPSHOT 8 | calculator 9 | https://github.com/asciidwango/parser_book 10 | 11 | 17 12 | 17 13 | 14 | 15 | 16 | org.junit.jupiter 17 | junit-jupiter-api 18 | 5.8.2 19 | test 20 | 21 | 22 | org.junit.jupiter 23 | junit-jupiter-engine 24 | 5.8.2 25 | test 26 | 27 | 28 | 29 | 30 | 31 | com.helger.maven 32 | ph-javacc-maven-plugin 33 | 4.1.4 34 | 35 | 36 | javacc 37 | generate-sources 38 | 39 | javacc 40 | 41 | 42 | 17 43 | src/main/java/com/github/kmizu/calculator 44 | ${project.build.directory}/generated-sources/javacc 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 構文解析の本 2 | 3 | アスキードワンゴで出版予定の構文解析の技術書です。Pandoc形式で執筆・管理しています。 4 | 5 | # リポジトリの構成 6 | 7 | ``` 8 | ./ 9 | README.md // このファイル 10 | PURPOSE.md // 本書の趣旨 11 | book.md // 統合された書籍全体(自動生成) 12 | metadata.yaml // 書籍メタデータ 13 | build_pdf.sh // PDF生成スクリプト 14 | contents/ // 個別章のMarkdownファイル 15 | chapter1.md // 第1章:構文解析の世界へようこそ 16 | chapter2.md // 第2章:構文解析の基礎 17 | chapter3.md // 第3章:JSONの構文解析 18 | chapter4.md // 第4章:文脈自由文法の世界 19 | chapter5.md // 第5章:構文解析アルゴリズム・処理系統 20 | chapter6.md // 第6章:構文解析器生成系の世界 21 | chapter7.md // 第7章:現実の構文解析 22 | chapter8.md // 第8章:おわりに 23 | references.md // 参考文献 24 | templates/ // LaTeX/PDFテンプレート 25 | build/ // ビルド出力ディレクトリ 26 | code/ // サンプルコード 27 | chapter3/ // JSON パーサー実装 28 | chapter5/ // SLR(1) パーサー実装 29 | chapter6/ // パーサージェネレータの例 30 | pandoc/ // 旧Pandoc環境(移行後削除予定) 31 | .gitignore // gitの管理対象から除外するパターン 32 | ``` 33 | 34 | ## 必要な環境 35 | 36 | - [Pandoc](https://pandoc.org/installing.html) 2.9以上 37 | - [LuaLaTeX](https://www.luatex.org/) (TeX Live 2022以上推奨) 38 | - 日本語フォント (Noto CJK フォント推奨) 39 | 40 | ### Ubuntu/Debianでのインストール 41 | 42 | [こちら](https://qiita.com/YuH25/items/76f056bf691855e420e0)を参考に、以下のコマンドで必要なパッケージをインストールできます。 43 | 44 | ```bash 45 | # Pandoc 46 | sudo apt update 47 | sudo apt install -y pandoc 48 | 49 | # TeX Live (LuaLaTeX含む) 50 | sudo apt install -y texlive-lang-japanese 51 | sudo apt install -y texlive-luatex 52 | sudo apt install -y texlive-pictures texlive-latex-extra 53 | 54 | # 日本語フォント 55 | sudo apt install -y fonts-noto-cjk 56 | 57 | # rsvg-convert # SVG画像の変換に必要 58 | sudo apt install -y librsvg2-dev 59 | ``` 60 | 61 | ### macOSでのインストール 62 | 63 | ```bash 64 | # Homebrew使用 65 | brew install pandoc 66 | brew install --cask mactex 67 | 68 | # 日本語フォント 69 | brew install --cask font-noto-sans-cjk-jp 70 | ``` 71 | 72 | ## 書籍のビルド方法 73 | 74 | ### PDFビルド 75 | 76 | ```bash 77 | ./build_pdf.sh 78 | # build/parser_book.pdf が生成されます 79 | ``` 80 | 81 | ### HTMLプレビュー 82 | 83 | ```bash 84 | ./build_pdf.sh preview 85 | # build/preview.html が生成されます 86 | # ブラウザでbuild/preview.htmlを開く 87 | ``` 88 | 89 | ## 執筆ワークフロー 90 | 91 | ```bash 92 | # 章を編集(contents/*.md) 93 | vim contents/chapter1.md 94 | 95 | # PDFを生成 96 | ./build_pdf.sh 97 | ``` 98 | 99 | ## 趣旨 100 | 101 | [こちら](./PURPOSE.md)参照 102 | -------------------------------------------------------------------------------- /code/chapter5/src/test/java/parser/slr1/SLR1ParserSimpleTest.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import static java.util.List.of; 6 | import static org.junit.jupiter.api.Assertions.*; 7 | 8 | class SLR1ParserSimpleTest { 9 | 10 | @Test 11 | void testVerySimpleGrammar() { 12 | // 非常に簡単な文法 13 | // S -> a 14 | var grammar = new Grammar("S", of( 15 | new Rule("S", of(new Expression.Terminal("a"))) 16 | )); 17 | 18 | var parser = new SLR1Parser(grammar); 19 | 20 | // "a" を認識 21 | assertTrue(parser.parse(of( 22 | new Expression.Terminal("a") 23 | ))); 24 | 25 | // "b" は認識しない 26 | assertFalse(parser.parse(of( 27 | new Expression.Terminal("b") 28 | ))); 29 | 30 | // 空は認識しない 31 | assertFalse(parser.parse(of())); 32 | } 33 | 34 | @Test 35 | void testSimpleSequence() { 36 | // S -> a b 37 | var grammar = new Grammar("S", of( 38 | new Rule("S", of( 39 | new Expression.Terminal("a"), 40 | new Expression.Terminal("b") 41 | )) 42 | )); 43 | 44 | var parser = new SLR1Parser(grammar); 45 | 46 | // "a b" を認識 47 | assertTrue(parser.parse(of( 48 | new Expression.Terminal("a"), 49 | new Expression.Terminal("b") 50 | ))); 51 | 52 | // "a" だけは認識しない 53 | assertFalse(parser.parse(of( 54 | new Expression.Terminal("a") 55 | ))); 56 | 57 | // "b" だけは認識しない 58 | assertFalse(parser.parse(of( 59 | new Expression.Terminal("b") 60 | ))); 61 | } 62 | 63 | @Test 64 | void testSimpleChoice() { 65 | // S -> a | b 66 | var grammar = new Grammar("S", of( 67 | new Rule("S", of(new Expression.Terminal("a"))), 68 | new Rule("S", of(new Expression.Terminal("b"))) 69 | )); 70 | 71 | var parser = new SLR1Parser(grammar); 72 | 73 | // "a" を認識 74 | assertTrue(parser.parse(of( 75 | new Expression.Terminal("a") 76 | ))); 77 | 78 | // "b" を認識 79 | assertTrue(parser.parse(of( 80 | new Expression.Terminal("b") 81 | ))); 82 | 83 | // "c" は認識しない 84 | assertFalse(parser.parse(of( 85 | new Expression.Terminal("c") 86 | ))); 87 | } 88 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/Main.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | import static java.util.List.of; 4 | 5 | public class Main { 6 | public static void main(String[] args) { 7 | // 簡単な算術式の文法 8 | // E -> E + T | T 9 | // T -> T * F | F 10 | // F -> ( E ) | id 11 | var grammar = new Grammar("E", of( 12 | new Rule("E", of( 13 | new Expression.NonTerminal("E"), 14 | new Expression.Terminal("+"), 15 | new Expression.NonTerminal("T") 16 | )), 17 | new Rule("E", of(new Expression.NonTerminal("T"))), 18 | new Rule("T", of( 19 | new Expression.NonTerminal("T"), 20 | new Expression.Terminal("*"), 21 | new Expression.NonTerminal("F") 22 | )), 23 | new Rule("T", of(new Expression.NonTerminal("F"))), 24 | new Rule("F", of( 25 | new Expression.Terminal("("), 26 | new Expression.NonTerminal("E"), 27 | new Expression.Terminal(")") 28 | )), 29 | new Rule("F", of(new Expression.Terminal("id"))) 30 | )); 31 | 32 | var recognizer = new LR0Recognizer(grammar); 33 | 34 | // 状態とテーブルを表示 35 | recognizer.printStates(); 36 | recognizer.printTables(); 37 | 38 | // テスト入力 39 | System.out.println("\n=== Testing: id + id * id ==="); 40 | var input1 = of( 41 | new Expression.Terminal("id"), 42 | new Expression.Terminal("+"), 43 | new Expression.Terminal("id"), 44 | new Expression.Terminal("*"), 45 | new Expression.Terminal("id") 46 | ); 47 | boolean result1 = recognizer.recognize(input1); 48 | System.out.println("Result: " + result1); 49 | 50 | System.out.println("\n=== Testing: ( id + id ) ==="); 51 | var input2 = of( 52 | new Expression.Terminal("("), 53 | new Expression.Terminal("id"), 54 | new Expression.Terminal("+"), 55 | new Expression.Terminal("id"), 56 | new Expression.Terminal(")") 57 | ); 58 | boolean result2 = recognizer.recognize(input2); 59 | System.out.println("Result: " + result2); 60 | 61 | System.out.println("\n=== Testing: id + + id (invalid) ==="); 62 | var input3 = of( 63 | new Expression.Terminal("id"), 64 | new Expression.Terminal("+"), 65 | new Expression.Terminal("+"), 66 | new Expression.Terminal("id") 67 | ); 68 | boolean result3 = recognizer.recognize(input3); 69 | System.out.println("Result: " + result3); 70 | } 71 | } -------------------------------------------------------------------------------- /JAPANESE_FONT_SETUP.md: -------------------------------------------------------------------------------- 1 | # Japanese Font Support for PDF Generation 2 | 3 | This document explains how Japanese font support is configured for PDF generation in this project. 4 | 5 | ## Current Setup 6 | 7 | ### 1. PDF Engine 8 | We use **LuaLaTeX** as the PDF engine, which has excellent support for Unicode and Japanese text processing. 9 | 10 | ### 2. Document Class 11 | The `ltjsbook` document class is used, which is a Japanese-aware variant of the standard LaTeX book class provided by the LuaTeX-ja package. 12 | 13 | ### 3. Font Configuration 14 | The following fonts are configured in `src/metadata.yaml`: 15 | 16 | - **Main font (serif)**: Noto Serif CJK JP 17 | - **Sans-serif font**: Noto Sans CJK JP 18 | - **Monospace font**: Noto Sans Mono CJK JP 19 | 20 | These fonts are system fonts that come pre-installed on most Linux distributions. 21 | 22 | ### 4. Key Metadata Settings 23 | 24 | ```yaml 25 | documentclass: ltjsbook # Japanese-aware document class 26 | lang: ja # Document language 27 | mainfont: "Noto Serif CJK JP" # For Latin text 28 | sansfont: "Noto Sans CJK JP" 29 | monofont: "Noto Sans Mono CJK JP" 30 | CJKmainfont: "Noto Serif CJK JP" # For CJK text 31 | CJKsansfont: "Noto Sans CJK JP" 32 | CJKmonofont: "Noto Sans Mono CJK JP" 33 | pdf-engine: lualatex # Unicode-aware engine 34 | ``` 35 | 36 | ### 5. Additional Package 37 | The `luatexja-fontspec` package is loaded in the header-includes section to ensure proper font handling. 38 | 39 | ## Troubleshooting 40 | 41 | ### Missing Character Warnings 42 | If you see warnings like "Missing character: There is no ス (U+30B9) in font", it means: 43 | - The PDF engine is not properly configured for Japanese 44 | - The CJK fonts are not being applied correctly 45 | - The document class doesn't support Japanese 46 | 47 | ### Font Not Found Errors 48 | If fonts are not found: 49 | 1. Check installed fonts: `fc-list | grep -i "noto.*cjk"` 50 | 2. Install missing fonts: `sudo apt-get install fonts-noto-cjk` 51 | 52 | ### Alternative Fonts 53 | If Noto fonts are not available, you can use: 54 | - IPAex fonts: `ipaexm` (mincho/serif) and `ipaexg` (gothic/sans) 55 | - Harano Aji fonts (included with TeX Live) 56 | 57 | ## Testing 58 | 59 | To test Japanese font support: 60 | ```bash 61 | ./build_pdf.sh test 62 | ``` 63 | 64 | This will generate a test PDF with Japanese text in `build/test_sample.pdf`. 65 | 66 | ## Custom Template (Optional) 67 | 68 | A custom LaTeX template is available at `templates/japanese.latex` for more advanced customization needs. To use it, add the `--template` option to pandoc commands. 69 | 70 | ## Dependencies 71 | 72 | Required packages: 73 | - texlive-luatex 74 | - texlive-lang-japanese 75 | - fonts-noto-cjk (or alternative CJK fonts) 76 | - pandoc (2.0+) -------------------------------------------------------------------------------- /code/chapter6/jcomb/src/test/java/com/github/kmizu/jcomb/JCombMathematicalExpressionTest.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.jcomb; 2 | 3 | import jdk.jfr.Description; 4 | import org.junit.jupiter.api.Test; 5 | 6 | import static com.github.kmizu.jcomb.JComb.*; 7 | import static org.junit.jupiter.api.Assertions.assertEquals; 8 | 9 | public class JCombMathematicalExpressionTest { 10 | public JParser expression() { 11 | /* 12 | * expression <- additive ( ("+" / "-") additive )* 13 | */ 14 | return seq( 15 | lazy(() -> additive()), 16 | rep0( 17 | seq( 18 | alt(string("+"), string("-")), 19 | lazy(() -> additive()) 20 | ) 21 | ) 22 | ).map(p -> { 23 | var left = p.a(); 24 | var rights = p.b(); 25 | for (var right : rights) { 26 | var op = right.a(); 27 | var rightValue = right.b(); 28 | if (op.equals("+")) { 29 | left += rightValue; 30 | } else { 31 | left -= rightValue; 32 | } 33 | } 34 | return left; 35 | }); 36 | } 37 | 38 | public JParser additive() { 39 | /* 40 | * additive <- primary ( ("*" / "/") primary )* 41 | */ 42 | return seq( 43 | lazy(() -> primary()), 44 | rep0( 45 | seq( 46 | alt(string("*"), string("/")), 47 | lazy(() -> primary()) 48 | ) 49 | ) 50 | ).map(p -> { 51 | var left = p.a(); 52 | var rights = p.b(); 53 | for (var right : rights) { 54 | var op = right.a(); 55 | var rightValue = right.b(); 56 | if (op.equals("*")) { 57 | left *= rightValue; 58 | } else { 59 | left /= rightValue; 60 | } 61 | } 62 | return left; 63 | }); 64 | } 65 | 66 | public JParser primary() { 67 | /* 68 | * primary <- number / "(" expression ")" 69 | */ 70 | return alt( 71 | number, 72 | seq( 73 | string("("), 74 | seq( 75 | lazy(() -> expression()), 76 | string(")") 77 | ) 78 | ).map(p -> p.b().a()) 79 | ); 80 | } 81 | 82 | // number <- [0-9]+ 83 | JParser number = regex("[0-9]+").map(Integer::parseInt); 84 | 85 | @Test 86 | @Description("1+2*3") 87 | public void testExpression() { 88 | assertEquals(new Result<>(7, ""), expression().parse("1+2*3")); 89 | } 90 | } -------------------------------------------------------------------------------- /code/chapter5/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /code/chapter3/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 34 | 35 | @rem Find java.exe 36 | if defined JAVA_HOME goto findJavaFromJavaHome 37 | 38 | set JAVA_EXE=java.exe 39 | %JAVA_EXE% -version >NUL 2>&1 40 | if "%ERRORLEVEL%" == "0" goto init 41 | 42 | echo. 43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 44 | echo. 45 | echo Please set the JAVA_HOME variable in your environment to match the 46 | echo location of your Java installation. 47 | 48 | goto fail 49 | 50 | :findJavaFromJavaHome 51 | set JAVA_HOME=%JAVA_HOME:"=% 52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 53 | 54 | if exist "%JAVA_EXE%" goto init 55 | 56 | echo. 57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 58 | echo. 59 | echo Please set the JAVA_HOME variable in your environment to match the 60 | echo location of your Java installation. 61 | 62 | goto fail 63 | 64 | :init 65 | @rem Get command-line arguments, handling Windows variants 66 | 67 | if not "%OS%" == "Windows_NT" goto win9xME_args 68 | 69 | :win9xME_args 70 | @rem Slurp the command line arguments. 71 | set CMD_LINE_ARGS= 72 | set _SKIP=2 73 | 74 | :win9xME_args_slurp 75 | if "x%~1" == "x" goto execute 76 | 77 | set CMD_LINE_ARGS=%* 78 | 79 | :execute 80 | @rem Setup the command line 81 | 82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 83 | 84 | @rem Execute Gradle 85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 86 | 87 | :end 88 | @rem End local scope for the variables with windows NT shell 89 | if "%ERRORLEVEL%"=="0" goto mainEnd 90 | 91 | :fail 92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 93 | rem the _cmd.exe /c_ return code! 94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 95 | exit /b 1 96 | 97 | :mainEnd 98 | if "%OS%"=="Windows_NT" endlocal 99 | 100 | :omega 101 | -------------------------------------------------------------------------------- /code/chapter6/jcomb/src/main/java/com/github/kmizu/jcomb/JComb.java: -------------------------------------------------------------------------------- 1 | package com.github.kmizu.jcomb; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.regex.*; 6 | import java.util.function.*; 7 | 8 | public class JComb { 9 | public static class JLiteralParser implements JParser { 10 | private String literal; 11 | public JLiteralParser(String literal) { 12 | this.literal = literal; 13 | 14 | } 15 | public Result parse(String input) { 16 | if(input.startsWith(literal)) { 17 | return new Result(literal, input.substring(literal.length())); 18 | } else { 19 | return null; 20 | } 21 | } 22 | } 23 | 24 | public static JParser string(String literal) { 25 | return new JLiteralParser(literal); 26 | } 27 | 28 | public static JParser alt(JParser p1, JParser p2) { 29 | return (input) -> { 30 | var result = p1.parse(input); 31 | if(result != null) return result; 32 | return p2.parse(input); 33 | }; 34 | } 35 | 36 | public static JParser> rep0(JParser p) { 37 | return (input) -> { 38 | var result = p.parse(input); 39 | if(result == null) return new Result<>(List.of(), input); 40 | var value = result.value(); 41 | var rest = result.rest(); 42 | var result2 = rep0(p).parse(rest); 43 | if(result2 == null) return new Result<>(List.of(value), rest); 44 | List values = new ArrayList<>(); 45 | values.add(value); 46 | values.addAll(result2.value()); 47 | return new Result<>(values, result2.rest()); 48 | }; 49 | } 50 | 51 | public static JParser> rep1(JParser p) { 52 | return (input) -> { 53 | var result = p.parse(input); 54 | if(result == null) return null; 55 | var value = result.value(); 56 | var rest = result.rest(); 57 | var result2 = rep0(p).parse(rest); 58 | if(result2 == null) return new Result<>(List.of(value), rest); 59 | List values = new ArrayList<>(); 60 | values.add(value); 61 | values.addAll(result2.value()); 62 | return new Result<>(values, result2.rest()); 63 | }; 64 | } 65 | 66 | public static JParser> seq(JParser p1, JParser p2) { 67 | return (input) -> { 68 | var result1 = p1.parse(input); 69 | if(result1 == null) return null; 70 | var rest = result1.rest(); 71 | var result2 = p2.parse(rest); 72 | if(result2 == null) return null; 73 | return new Result<>(new Pair(result1.value(), result2.value()), result2.rest()); 74 | }; 75 | } 76 | 77 | public static JParser regex(String regex) { 78 | return (input) -> { 79 | var matcher = Pattern.compile(regex).matcher(input); 80 | if(matcher.lookingAt()) { 81 | return new Result<>(matcher.group(), input.substring(matcher.end())); 82 | } else { 83 | return null; 84 | } 85 | }; 86 | } 87 | 88 | public static JParser lazy(Supplier> supplier) { 89 | return (input) -> supplier.get().parse(input); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /code/chapter6/antlr/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.github.asciidwango 8 | parser_book 9 | 0.1-SNAPSHOT 10 | expression 11 | 12 | 13 | 14 | The MIT License 15 | http://www.opensource.org/licenses/MIT 16 | repo 17 | 18 | 19 | 20 | 21 | git@github.com:kmizu/asciidwango/parser_book.git 22 | scm:git:git@github.com:kmizu/asciidwango/parser_book.git 23 | 24 | https://github.com/asciidwango/parser_book 25 | A Tiny Expression 26 | 27 | 28 | 29 | kmizu 30 | Kota Mizushima 31 | https://github.com/kmizu 32 | 33 | 34 | 35 | 36 | 37 | org.junit.jupiter 38 | junit-jupiter-engine 39 | 5.9.2 40 | test 41 | 42 | 43 | org.antlr 44 | antlr4-runtime 45 | 4.3 46 | 47 | 48 | 49 | 50 | 51 | 52 | org.apache.maven.plugins 53 | maven-compiler-plugin 54 | 3.11.0 55 | 56 | 17 57 | 17 58 | UTF-8 59 | 60 | 61 | 62 | org.codehaus.mojo 63 | exec-maven-plugin 64 | 3.1.0 65 | 66 | com.github.asciidwango.parser_book.ch5.Main 67 | 68 | 69 | 70 | org.antlr 71 | antlr4-maven-plugin 72 | 4.3 73 | 74 | 75 | antlr 76 | generate-sources 77 | 78 | antlr4 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 17 88 | ${java.version} 89 | ${java.version} 90 | 91 | 92 | -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/ll1/Main.java: -------------------------------------------------------------------------------- 1 | package parser.ll1; 2 | 3 | import static java.util.List.of; 4 | 5 | public class Main { 6 | public static void main(String[] args) { 7 | // LL(1)文法の例 8 | // S -> A B 9 | // A -> a A | ε 10 | // B -> b 11 | var grammar1 = new Grammar("S", of( 12 | new Rule("S", of( 13 | new Expression.NonTerminal("A"), 14 | new Expression.NonTerminal("B") 15 | )), 16 | new Rule("A", of( 17 | new Expression.Terminal("a"), 18 | new Expression.NonTerminal("A") 19 | )), 20 | new Rule("A", of()), // 空規則 21 | new Rule("B", of(new Expression.Terminal("b"))) 22 | )); 23 | 24 | System.out.println("=== Grammar 1: S -> AB, A -> aA | ε, B -> b ==="); 25 | var recognizer1 = new LL1Recognizer(grammar1); 26 | recognizer1.printFirstSets(); 27 | recognizer1.printFollowSets(); 28 | recognizer1.printParseTable(); 29 | 30 | System.out.println("\n=== Testing: aab ==="); 31 | var input1 = of( 32 | new Expression.Terminal("a"), 33 | new Expression.Terminal("a"), 34 | new Expression.Terminal("b") 35 | ); 36 | recognizer1.recognize(input1); 37 | 38 | System.out.println("\n=== Testing: b ==="); 39 | var input2 = of(new Expression.Terminal("b")); 40 | recognizer1.recognize(input2); 41 | 42 | System.out.println("\n=== Testing: aa (invalid - missing b) ==="); 43 | var input3 = of( 44 | new Expression.Terminal("a"), 45 | new Expression.Terminal("a") 46 | ); 47 | recognizer1.recognize(input3); 48 | 49 | // より複雑なLL(1)文法の例 50 | // E -> T E' 51 | // E' -> + T E' | ε 52 | // T -> F T' 53 | // T' -> * F T' | ε 54 | // F -> ( E ) | id 55 | var grammar2 = new Grammar("E", of( 56 | new Rule("E", of( 57 | new Expression.NonTerminal("T"), 58 | new Expression.NonTerminal("E'") 59 | )), 60 | new Rule("E'", of( 61 | new Expression.Terminal("+"), 62 | new Expression.NonTerminal("T"), 63 | new Expression.NonTerminal("E'") 64 | )), 65 | new Rule("E'", of()), // 空規則 66 | new Rule("T", of( 67 | new Expression.NonTerminal("F"), 68 | new Expression.NonTerminal("T'") 69 | )), 70 | new Rule("T'", of( 71 | new Expression.Terminal("*"), 72 | new Expression.NonTerminal("F"), 73 | new Expression.NonTerminal("T'") 74 | )), 75 | new Rule("T'", of()), // 空規則 76 | new Rule("F", of( 77 | new Expression.Terminal("("), 78 | new Expression.NonTerminal("E"), 79 | new Expression.Terminal(")") 80 | )), 81 | new Rule("F", of(new Expression.Terminal("id"))) 82 | )); 83 | 84 | System.out.println("\n\n=== Grammar 2: LL(1) version of arithmetic expressions ==="); 85 | var recognizer2 = new LL1Recognizer(grammar2); 86 | recognizer2.printFirstSets(); 87 | recognizer2.printFollowSets(); 88 | recognizer2.printParseTable(); 89 | 90 | System.out.println("\n=== Testing: id + id * id ==="); 91 | var input4 = of( 92 | new Expression.Terminal("id"), 93 | new Expression.Terminal("+"), 94 | new Expression.Terminal("id"), 95 | new Expression.Terminal("*"), 96 | new Expression.Terminal("id") 97 | ); 98 | recognizer2.recognize(input4); 99 | 100 | System.out.println("\n=== Testing: ( id + id ) ==="); 101 | var input5 = of( 102 | new Expression.Terminal("("), 103 | new Expression.Terminal("id"), 104 | new Expression.Terminal("+"), 105 | new Expression.Terminal("id"), 106 | new Expression.Terminal(")") 107 | ); 108 | recognizer2.recognize(input5); 109 | } 110 | } -------------------------------------------------------------------------------- /contents/chapter8.md: -------------------------------------------------------------------------------- 1 | 2 | # 第8章 おわりに 3 | 4 | ここまでで構文解析の世界を概観してみましたがいかがでしたか?構文解析、特に非自然言語の構文解析というのは地味なもので、パーサージェネレータの発展などもあり、20世紀末には「構文解析はもう終わった問題だ」という人もいました。ただ、その一方で2000年代以降になってもPEGの発明(再発見)があり、Pythonの構文解析器に採用されるまでにつながりましたし、`LL(*)`や`ALL(*)`のような革新的なアルゴリズムが生み出されています。それも、どちらかといえば主流であった上向き型の構文解析でなく下向き型の構文解析で、です。 5 | 6 | とはいえやはり地味なものは地味であり、プログラミング言語処理系を構成するコンポーネントという観点から言っても「脇役」という印象は否めません。ただ、わたしたちはプログラミング言語を書いているときは、コンパイラの内部表現や抽象構文木と対話しているわけではありません。プログラマーが直接対話する相手はプログラミング言語の具象構文であり、具象構文はプログラミング言語の「UI」を担当する部分といえるでしょう。通常のアプリケーションでUIが軽視されるべきでないのと同様にやはり具象構文も軽視されるべきでないと私は思いますし、よりよい具象構文の設計には構文解析の知識が助けになると信じています。 7 | 8 | ところで、ここまで、構文解析の基盤を支える「形式言語」の世界についてはあえてはしょった説明に留めました。何故なら構文解析を学ぶという点からすると本筋から外れ過ぎてしまいますし、何より形式言語理論を学ぶのは骨が折れる作業でもあるからです。 9 | 10 | とはいえ、せっかくなので、この章では形式言語理論のほんの導入だけでも紹介したいと思います。形式言語理論は、言語の構造を数学的に研究する分野であり、構文解析の理論的基盤となっています。「数学的」というと難しそうに聞こえますが、要は「プログラミング言語の文法を厳密に扱うための理論」と考えてください。 11 | 12 | 例えば、第4章で触れた「正規言語」や「文脈自由言語」といった言語クラスは、それぞれ異なる計算モデル(機械)によって認識できることが知られています。本書でも多少触れましたが、改めて簡単にまとめておきます。 13 | 14 | - **正規言語:** 有限オートマトンという、有限個の状態しか持たない単純な機械で認識できます。正規表現は正規言語を記述するための便利な記法です。括弧の対応のように無限のネスト構造を持つものは扱えません。 15 | - **文脈自由言語:** プッシュダウンオートマトンという、有限オートマトンにスタック(無限の深さを持つメモリ)を追加した機械で認識できます。括弧の対応のような再帰的な構造を扱えるようになります。本書で紹介した多くの構文解析アルゴリズムは、この文脈自由言語を対象としています。 16 | - **文脈依存言語:** 文脈自由言語よりも強力なクラスで、例えば `a^n b^n c^n`(n個のa、n個のb、n個のcがこの順で並ぶ文字列の集合)のような言語を記述できます。プッシュダウンオートマトンでは認識が不可能です。 17 | - **帰納的可算言語(チューリングマシンが認識する言語):** 最も強力な言語クラスで、私たちが普段使うJavaやPythonなどのプログラミング言語で書けるアルゴリズム(計算可能な問題)が認識できる言語の範囲に対応します。 18 | 19 | \begin{figure}[h] 20 | \centering 21 | \begin{tikzpicture}[scale=1.5] 22 | % 最外側: チューリングマシン(帰納的可算言語) 23 | \draw[thick, fill=blue!10] (0,0) ellipse (4.5cm and 3.5cm); 24 | \node[align=center] at (0,3) {\textbf{チューリングマシン}\\(帰納的可算言語)}; 25 | 26 | % 2番目: 線形拘束オートマトン(文脈依存言語) 27 | \draw[thick, fill=green!10] (0,-0.3) ellipse (3.5cm and 2.5cm); 28 | \node[align=center] at (0,1.7) {\textbf{線形拘束オートマトン}\\(文脈依存言語)}; 29 | 30 | % 3番目: プッシュダウンオートマトン(文脈自由言語) 31 | \draw[thick, fill=yellow!10] (0,-0.5) ellipse (2.5cm and 1.7cm); 32 | \node[align=center] at (0,0.7) {\textbf{プッシュダウン}\\オートマトン\\(文脈自由言語)}; 33 | 34 | % 最内側: 有限オートマトン(正規言語) 35 | \draw[thick, fill=red!10] (0,-0.6) ellipse (1.5cm and 0.9cm); 36 | \node[align=center] at (0,-0.6) {\textbf{有限オートマトン}\\(正規言語)}; 37 | \end{tikzpicture} 38 | \caption{オートマトンの階層と言語クラスの対応(各内側の集合は外側の集合に含まれる)} 39 | \label{fig:automaton-hierarchy} 40 | \end{figure} 41 | 42 | 形式言語理論を学ぶと、「なぜある種のパターンは正規表現で書けるのに、別のパターンは書けないのか?」や「なぜ `a^n b^n` は文脈自由言語なのに `a^n b^n c^n` はそうではないのか?」といった疑問に、より深いレベルで答えることができるようになります。これらの問いは、計算モデルの能力の限界と深く関わっています。 43 | 44 | 幸い、形式言語理論を学ぶための良質な教科書はいくつもあります。もしこの本を読み終えて、言語の理論的な側面にさらに興味を持った方は、ぜひ専門書を手に取ってみてください。以下でいくつかの参考文献を紹介します。 45 | 46 | ### 古典的名著・専門書 47 | 48 | - A.V.エイホ、R.セシィ、M.S.ラム、J.D.ウルマン,『コンパイラ - 原理・技法・ツール』(第2版)、サイエンス社、2009年(通称ドラゴンブック) 49 | - コメント: コンパイラ構築に関する標準的な教科書。字句解析、構文解析(LL、LR)、意味解析、コード生成など、コンパイラの全般的なトピックを網羅。理論的背景もしっかり解説されています。中級者以上向け。 50 | - J.ホップクロフト、J.ウルマン、『オートマトン 言語理論 計算論 Ⅰ』(第2版) 、サイエンス社、2003年 51 | - コメント: オートマトンと形式言語の理論に関する大学生レベル以上向け教科書。正規言語、文脈自由言語など、計算理論の基礎をしっかり学べます。数学的な厳密さを求める方向け。 52 | - J.ホップクロフト、J.ウルマン、『オートマトン 言語理論 計算論 Ⅱ』(第2版) 、サイエンス社、2003年 53 | - コメント: 同上。Ⅱではチューリングマシン、決定不能性、計算複雑性などを取り扱っています。Ⅱは計算理論のより深い部分に踏み込んでいます。 54 | - Dick Grune, Ceriel J.H. Jacobs. *Parsing Techniques: A Practical Guide (2nd Edition)* 55 | - コメント: 書名どおり、様々な構文解析技術に特化した書籍。LL、LRだけでなく、アーリー法、GLR、CYK法など、より高度なアルゴリズムや曖昧性のある文法の扱いについても詳しい。構文解析を専門的に深めたい方向け。 56 | 57 | ### 特定の技術に関する論文・資料 58 | 59 | - Bryan Ford, "Parsing Expression Grammars: A Recognition-Based Syntactic Foundation", 2004 60 | - コメント: PEGを提案したオリジナルの論文。PEGの形式的な定義、操作的意味論、Packrat Parsingについて解説。理論的な背景を深く理解したい方向け。 (オンラインで検索すれば見つかるはずです) 61 | - Terence Parr, *The Definitive ANTLR 4 Reference* 62 | - コメント: ANTLR v4の作者自身による解説書。ANTLRの文法定義、使い方、ALL(*)アルゴリズムの概要、実践的なパーサー構築のテクニックが豊富。ANTLRを使いこなしたいなら必読。 63 | - ANTLR公式サイト、https://www.antlr.org/ 64 | - コメント: ANTLRのドキュメント、チュートリアル、文法リポジトリなど。最新情報やコミュニティのサポートも得られます。 65 | - JavaCC公式サイト、https://javacc.github.io/javacc/ 66 | - コメント: JavaCCのドキュメント、チュートリアル、FAQなど。 67 | - GNU Bisonマニュアル、https://www.gnu.org/software/bison/manual/ 68 | - コメント: Bison (Yacc互換) の詳細なマニュアル。LALR(1)やGLRパーサーの生成方法、文法定義の書き方などが学べます。 69 | 70 | これらの資料を通じて、構文解析の世界への探求をさらに深めていただければ幸いです。 71 | 72 | ## まとめ 73 | 74 | 本書を通じて、構文解析の基本的な考え方や、様々なアルゴリズム、現実のプログラミング言語が抱える課題の一端に触れていただきました。 75 | 76 | よりよい具象構文の設計には構文解析の知識が助けになるという趣旨のことを冒頭で述べましたが、これは例えば、あなたが新しいドメイン固有言語(DSL)を設計する際に、利用者が直観的に理解しやすく、かつパーサーが効率的に解析できるような構文(例: 演算子の優先順位、予約語の選択、ブロック構造の表現方法など)を選ぶ上で、本書で学んだLL/LRの特性やPEGの柔軟性といった知識が役立つでしょう。 77 | 78 | たとえば、JSONのようなシンプルな設定ファイル形式を拡張したいとき、コメント機能を追加するにしても「行コメント」にするか「ブロックコメント」にするか、あるいはPythonのような文字列リテラル内のドキュメント方式にするかで、構文解析の難易度は変わってきます。こうした選択を適切に行えるようになることが、本書で得られる実践的な知識の一つです。 79 | 80 | また「構文解析は終わった問題ではない」という点も改めて強調しておきたいと思います。プログラミング言語は進化を続けており、async/awaitのような非同期処理の構文、パターンマッチングの高度化、型システムの進化に伴う構文の複雑化など、依然として構文解析技術に新たな課題を提示し続けています。特に、言語の進化の過程で機能追加をする場合、本書でも出てきた衝突(コンフリクト)が起こることは珍しくありません。これらの課題に取り組む上で、本書で得た知識が何らかの形で皆さんの力になることを願っています。 81 | 82 | 構文解析の世界は奥深く、そして面白いものです。この本が、その面白さを少しでも伝えることができたなら、著者としてこれ以上の喜びはありません。 83 | 84 | 2025年6月、自室にて。水島宏太 85 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/Ast.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | import java.util.Objects; 6 | 7 | public class Ast { 8 | // value 9 | public interface JsonValue {} 10 | 11 | // NULL 12 | public static class JsonNull implements JsonValue { 13 | private JsonNull(){} 14 | private static final JsonNull INSTANCE = new JsonNull(); 15 | public static JsonNull getInstance() { 16 | return INSTANCE; 17 | } 18 | 19 | @Override 20 | public String toString() { 21 | return "null"; 22 | } 23 | } 24 | 25 | 26 | // TRUE 27 | public static class JsonTrue implements JsonValue { 28 | private JsonTrue(){} 29 | private static final JsonTrue INSTANCE = new JsonTrue(); 30 | public static JsonTrue getInstance() { 31 | return INSTANCE; 32 | } 33 | 34 | @Override 35 | public String toString() { 36 | return "true"; 37 | } 38 | } 39 | 40 | // FALSE 41 | public static class JsonFalse implements JsonValue { 42 | private JsonFalse(){} 43 | private static final JsonFalse INSTANCE = new JsonFalse(); 44 | public static JsonFalse getInstance() { 45 | return INSTANCE; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "false"; 51 | } 52 | } 53 | 54 | // NUMBER 55 | public static class JsonNumber implements JsonValue { 56 | public final double value; 57 | public JsonNumber(double value) { 58 | this.value = value; 59 | } 60 | 61 | @Override 62 | public boolean equals(Object o) { 63 | if (this == o) return true; 64 | if (o == null || getClass() != o.getClass()) return false; 65 | JsonNumber that = (JsonNumber) o; 66 | return Double.compare(that.value, value) == 0; 67 | } 68 | 69 | @Override 70 | public int hashCode() { 71 | return Objects.hash(value); 72 | } 73 | 74 | @Override 75 | public String toString() { 76 | return "JsonNumber{" + 77 | "value=" + value + 78 | '}'; 79 | } 80 | } 81 | 82 | // STRING 83 | public static class JsonString implements JsonValue { 84 | public final String value; 85 | public JsonString(String value) { 86 | this.value = value; 87 | } 88 | 89 | @Override 90 | public boolean equals(Object o) { 91 | if (this == o) return true; 92 | if (o == null || getClass() != o.getClass()) return false; 93 | JsonString that = (JsonString) o; 94 | return Objects.equals(value, that.value); 95 | } 96 | 97 | @Override 98 | public int hashCode() { 99 | return Objects.hash(value); 100 | } 101 | 102 | @Override 103 | public String toString() { 104 | return "JsonString{" + 105 | "value='" + value + '\'' + 106 | '}'; 107 | } 108 | } 109 | 110 | // object 111 | public static class JsonObject implements JsonValue { 112 | public final List> properties; 113 | public JsonObject(List> properties) { 114 | this.properties = properties; 115 | } 116 | 117 | @Override 118 | public boolean equals(Object o) { 119 | if (this == o) return true; 120 | if (o == null || getClass() != o.getClass()) return false; 121 | JsonObject object = (JsonObject) o; 122 | return Objects.equals(properties, object.properties); 123 | } 124 | 125 | @Override 126 | public int hashCode() { 127 | return Objects.hash(properties); 128 | } 129 | 130 | @Override 131 | public String toString() { 132 | return "JsonObject{" + 133 | "properties=" + properties + 134 | '}'; 135 | } 136 | } 137 | 138 | // array 139 | public static class JsonArray implements JsonValue { 140 | public final List elements; 141 | public JsonArray(List elements) { 142 | this.elements = elements; 143 | } 144 | 145 | @Override 146 | public boolean equals(Object o) { 147 | if (this == o) return true; 148 | if (o == null || getClass() != o.getClass()) return false; 149 | JsonArray jsonArray = (JsonArray) o; 150 | return Objects.equals(elements, jsonArray.elements); 151 | } 152 | 153 | @Override 154 | public int hashCode() { 155 | return Objects.hash(elements); 156 | } 157 | 158 | @Override 159 | public String toString() { 160 | return "JsonArray{" + 161 | "elements=" + elements + 162 | '}'; 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/SimpleJsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class SimpleJsonParser implements JsonParser { 7 | private List tokens; 8 | private int index; 9 | 10 | public ParseResult parse(String input) { 11 | SimpleJsonTokenizer tokenizer = new SimpleJsonTokenizer(input); 12 | this.tokens = tokenizer.tokenizeAll(); 13 | this.index = 0; 14 | var value = parseValue(); 15 | // 残りの入力を返すため、現在のトークン位置以降を再構築する必要がある 16 | // ここでは簡単のため空文字列を返す 17 | return new ParseResult<>(value, ""); 18 | } 19 | 20 | private Token current() { 21 | if (index < tokens.size()) { 22 | return tokens.get(index); 23 | } 24 | return new Token(Token.Type.EOF, null); 25 | } 26 | 27 | private boolean moveNext() { 28 | if (index < tokens.size() - 1) { 29 | index++; 30 | return true; 31 | } 32 | return false; 33 | } 34 | 35 | private Ast.JsonValue parseValue() { 36 | var token = current(); 37 | switch(token.type) { 38 | case INTEGER: 39 | return parseNumber(); 40 | case STRING: 41 | return parseString(); 42 | case TRUE: 43 | return parseTrue(); 44 | case FALSE: 45 | return parseFalse(); 46 | case NULL: 47 | return parseNull(); 48 | case LBRACKET: 49 | return parseArray(); 50 | case LBRACE: 51 | return parseObject(); 52 | } 53 | throw new RuntimeException("cannot reach here"); 54 | } 55 | 56 | private Ast.JsonTrue parseTrue() { 57 | if(current().type == Token.Type.TRUE) { 58 | return Ast.JsonTrue.getInstance(); 59 | } 60 | throw new parser.ParseException("expected: true, actual: " + current().value); 61 | } 62 | 63 | private Ast.JsonFalse parseFalse() { 64 | if(current().type == Token.Type.FALSE) { 65 | return Ast.JsonFalse.getInstance(); 66 | } 67 | throw new parser.ParseException("expected: false, actual: " + current().value); 68 | } 69 | 70 | private Ast.JsonNull parseNull() { 71 | if(current().type == Token.Type.NULL) { 72 | return Ast.JsonNull.getInstance(); 73 | } 74 | throw new parser.ParseException("expected: null, actual: " + current().value); 75 | } 76 | 77 | private Ast.JsonString parseString() { 78 | return new Ast.JsonString((String)current().value); 79 | } 80 | 81 | private Ast.JsonNumber parseNumber() { 82 | var value = (Integer)current().value; 83 | return new Ast.JsonNumber(value); 84 | } 85 | 86 | private Pair parsePair() { 87 | var key = parseString(); 88 | moveNext(); 89 | if(current().type != Token.Type.COLON) { 90 | throw new parser.ParseException("expected: `:`, actual: " + current().value); 91 | } 92 | moveNext(); 93 | var value = parseValue(); 94 | return new Pair<>(key, value); 95 | } 96 | 97 | private Ast.JsonObject parseObject() { 98 | if(current().type != Token.Type.LBRACE) { 99 | throw new parser.ParseException("expected `{`, actual: " + current().value); 100 | } 101 | 102 | moveNext(); 103 | if(current().type == Token.Type.RBRACE) { 104 | return new Ast.JsonObject(new ArrayList<>()); 105 | } 106 | 107 | List> members = new ArrayList<>(); 108 | var pair= parsePair(); 109 | members.add(pair); 110 | 111 | while(moveNext()) { 112 | if(current().type == Token.Type.RBRACE) { 113 | return new Ast.JsonObject(members); 114 | } 115 | if(current().type != Token.Type.COMMA) { 116 | throw new parser.ParseException("expected: `,`, actual: " + current().value); 117 | } 118 | moveNext(); 119 | pair = parsePair(); 120 | members.add(pair); 121 | } 122 | 123 | throw new parser.ParseException("unexpected EOF"); 124 | } 125 | 126 | private Ast.JsonArray parseArray() { 127 | if(current().type != Token.Type.LBRACKET) { 128 | throw new parser.ParseException("expected: `[`, actual: " + current().value); 129 | } 130 | 131 | moveNext(); 132 | if(current().type == Token.Type.RBRACKET) { 133 | return new Ast.JsonArray(new ArrayList<>()); 134 | } 135 | 136 | List values = new ArrayList<>(); 137 | var value = parseValue(); 138 | values.add(value); 139 | 140 | while(moveNext()) { 141 | if(current().type == Token.Type.RBRACKET) { 142 | return new Ast.JsonArray(values); 143 | } 144 | if(current().type != Token.Type.COMMA) { 145 | throw new parser.ParseException("expected: `,`, actual: " + current().value); 146 | } 147 | moveNext(); 148 | value = parseValue(); 149 | values.add(value); 150 | } 151 | 152 | throw new ParseException("unexpected EOF"); 153 | } 154 | } -------------------------------------------------------------------------------- /code/chapter6/antlr/.mvn/wrapper/MavenWrapperDownloader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2007-present the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import java.net.*; 17 | import java.io.*; 18 | import java.nio.channels.*; 19 | import java.util.Properties; 20 | 21 | public class MavenWrapperDownloader { 22 | 23 | private static final String WRAPPER_VERSION = "0.5.5"; 24 | /** 25 | * Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided. 26 | */ 27 | private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/" 28 | + WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar"; 29 | 30 | /** 31 | * Path to the maven-wrapper.properties file, which might contain a downloadUrl property to 32 | * use instead of the default one. 33 | */ 34 | private static final String MAVEN_WRAPPER_PROPERTIES_PATH = 35 | ".mvn/wrapper/maven-wrapper.properties"; 36 | 37 | /** 38 | * Path where the maven-wrapper.jar will be saved to. 39 | */ 40 | private static final String MAVEN_WRAPPER_JAR_PATH = 41 | ".mvn/wrapper/maven-wrapper.jar"; 42 | 43 | /** 44 | * Name of the property which should be used to override the default download url for the wrapper. 45 | */ 46 | private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl"; 47 | 48 | public static void main(String args[]) { 49 | System.out.println("- Downloader started"); 50 | File baseDirectory = new File(args[0]); 51 | System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath()); 52 | 53 | // If the maven-wrapper.properties exists, read it and check if it contains a custom 54 | // wrapperUrl parameter. 55 | File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH); 56 | String url = DEFAULT_DOWNLOAD_URL; 57 | if(mavenWrapperPropertyFile.exists()) { 58 | FileInputStream mavenWrapperPropertyFileInputStream = null; 59 | try { 60 | mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile); 61 | Properties mavenWrapperProperties = new Properties(); 62 | mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream); 63 | url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url); 64 | } catch (IOException e) { 65 | System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'"); 66 | } finally { 67 | try { 68 | if(mavenWrapperPropertyFileInputStream != null) { 69 | mavenWrapperPropertyFileInputStream.close(); 70 | } 71 | } catch (IOException e) { 72 | // Ignore ... 73 | } 74 | } 75 | } 76 | System.out.println("- Downloading from: " + url); 77 | 78 | File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH); 79 | if(!outputFile.getParentFile().exists()) { 80 | if(!outputFile.getParentFile().mkdirs()) { 81 | System.out.println( 82 | "- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'"); 83 | } 84 | } 85 | System.out.println("- Downloading to: " + outputFile.getAbsolutePath()); 86 | try { 87 | downloadFileFromURL(url, outputFile); 88 | System.out.println("Done"); 89 | System.exit(0); 90 | } catch (Throwable e) { 91 | System.out.println("- Error downloading"); 92 | e.printStackTrace(); 93 | System.exit(1); 94 | } 95 | } 96 | 97 | private static void downloadFileFromURL(String urlString, File destination) throws Exception { 98 | if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) { 99 | String username = System.getenv("MVNW_USERNAME"); 100 | char[] password = System.getenv("MVNW_PASSWORD").toCharArray(); 101 | Authenticator.setDefault(new Authenticator() { 102 | @Override 103 | protected PasswordAuthentication getPasswordAuthentication() { 104 | return new PasswordAuthentication(username, password); 105 | } 106 | }); 107 | } 108 | URL website = new URL(urlString); 109 | ReadableByteChannel rbc; 110 | rbc = Channels.newChannel(website.openStream()); 111 | FileOutputStream fos = new FileOutputStream(destination); 112 | fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); 113 | fos.close(); 114 | rbc.close(); 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /code/chapter5/src/test/java/parser/lr0/LR0RecognizerTest.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | import org.junit.jupiter.api.BeforeEach; 4 | import org.junit.jupiter.api.Test; 5 | 6 | import static java.util.List.of; 7 | import static org.junit.jupiter.api.Assertions.*; 8 | 9 | class LR0RecognizerTest { 10 | private LR0Recognizer recognizer; 11 | 12 | @BeforeEach 13 | void setUp() { 14 | // 簡単な算術式の文法(LR(0)文法) 15 | // S -> A a | B b 16 | // A -> c 17 | // B -> c 18 | // この文法はSLR(1)では解析可能だが、LR(0)では競合が発生する 19 | var grammar = new Grammar("S", of( 20 | new Rule("S", of( 21 | new Expression.NonTerminal("A"), 22 | new Expression.Terminal("a") 23 | )), 24 | new Rule("S", of( 25 | new Expression.NonTerminal("B"), 26 | new Expression.Terminal("b") 27 | )), 28 | new Rule("A", of(new Expression.Terminal("c"))), 29 | new Rule("B", of(new Expression.Terminal("c"))) 30 | )); 31 | recognizer = new LR0Recognizer(grammar); 32 | } 33 | 34 | @Test 35 | void testSimpleLR0Grammar() { 36 | // より単純なLR(0)文法でテスト 37 | // S -> a S b | c 38 | var simpleLR0Grammar = new Grammar("S", of( 39 | new Rule("S", of( 40 | new Expression.Terminal("a"), 41 | new Expression.NonTerminal("S"), 42 | new Expression.Terminal("b") 43 | )), 44 | new Rule("S", of(new Expression.Terminal("c"))) 45 | )); 46 | 47 | var simpleRecognizer = new LR0Recognizer(simpleLR0Grammar); 48 | 49 | // "c" を認識 50 | assertTrue(simpleRecognizer.recognize(of( 51 | new Expression.Terminal("c") 52 | ))); 53 | 54 | // "acb" を認識 55 | assertTrue(simpleRecognizer.recognize(of( 56 | new Expression.Terminal("a"), 57 | new Expression.Terminal("c"), 58 | new Expression.Terminal("b") 59 | ))); 60 | 61 | // "aacbb" を認識 62 | assertTrue(simpleRecognizer.recognize(of( 63 | new Expression.Terminal("a"), 64 | new Expression.Terminal("a"), 65 | new Expression.Terminal("c"), 66 | new Expression.Terminal("b"), 67 | new Expression.Terminal("b") 68 | ))); 69 | 70 | // "ab" は認識しない(不正) 71 | assertFalse(simpleRecognizer.recognize(of( 72 | new Expression.Terminal("a"), 73 | new Expression.Terminal("b") 74 | ))); 75 | } 76 | 77 | @Test 78 | void testParenthesesGrammar() { 79 | // バランスの取れた括弧の文法 80 | // S -> ( S ) | ε 81 | var parenGrammar = new Grammar("S", of( 82 | new Rule("S", of( 83 | new Expression.Terminal("("), 84 | new Expression.NonTerminal("S"), 85 | new Expression.Terminal(")") 86 | )), 87 | new Rule("S", of()) // 空規則 88 | )); 89 | 90 | var parenRecognizer = new LR0Recognizer(parenGrammar); 91 | 92 | // 空文字列を認識 93 | assertTrue(parenRecognizer.recognize(of())); 94 | 95 | // "()" を認識 96 | assertTrue(parenRecognizer.recognize(of( 97 | new Expression.Terminal("("), 98 | new Expression.Terminal(")") 99 | ))); 100 | 101 | // "(())" を認識 102 | assertTrue(parenRecognizer.recognize(of( 103 | new Expression.Terminal("("), 104 | new Expression.Terminal("("), 105 | new Expression.Terminal(")"), 106 | new Expression.Terminal(")") 107 | ))); 108 | 109 | // "(()" は認識しない(不正) 110 | assertFalse(parenRecognizer.recognize(of( 111 | new Expression.Terminal("("), 112 | new Expression.Terminal("("), 113 | new Expression.Terminal(")") 114 | ))); 115 | } 116 | 117 | @Test 118 | void testListGrammar() { 119 | // リスト文法 120 | // L -> L , E | E 121 | // E -> id 122 | var listGrammar = new Grammar("L", of( 123 | new Rule("L", of( 124 | new Expression.NonTerminal("L"), 125 | new Expression.Terminal(","), 126 | new Expression.NonTerminal("E") 127 | )), 128 | new Rule("L", of(new Expression.NonTerminal("E"))), 129 | new Rule("E", of(new Expression.Terminal("id"))) 130 | )); 131 | 132 | var listRecognizer = new LR0Recognizer(listGrammar); 133 | 134 | // "id" を認識 135 | assertTrue(listRecognizer.recognize(of( 136 | new Expression.Terminal("id") 137 | ))); 138 | 139 | // "id, id" を認識 140 | assertTrue(listRecognizer.recognize(of( 141 | new Expression.Terminal("id"), 142 | new Expression.Terminal(","), 143 | new Expression.Terminal("id") 144 | ))); 145 | 146 | // "id, id, id" を認識 147 | assertTrue(listRecognizer.recognize(of( 148 | new Expression.Terminal("id"), 149 | new Expression.Terminal(","), 150 | new Expression.Terminal("id"), 151 | new Expression.Terminal(","), 152 | new Expression.Terminal("id") 153 | ))); 154 | 155 | // ", id" は認識しない(不正) 156 | assertFalse(listRecognizer.recognize(of( 157 | new Expression.Terminal(","), 158 | new Expression.Terminal("id") 159 | ))); 160 | } 161 | } -------------------------------------------------------------------------------- /code/chapter3/gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=`expr $i + 1` 158 | done 159 | case $i in 160 | 0) set -- ;; 161 | 1) set -- "$args0" ;; 162 | 2) set -- "$args0" "$args1" ;; 163 | 3) set -- "$args0" "$args1" "$args2" ;; 164 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=`save "$@"` 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | exec "$JAVACMD" "$@" 184 | -------------------------------------------------------------------------------- /code/chapter3/src/test/java/parser/PegJsonParserTest.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import org.junit.jupiter.api.Test; 5 | 6 | import java.util.List; 7 | 8 | public class PegJsonParserTest { 9 | @Test 10 | public void parseSimpleNumber() { 11 | var parser = new PegJsonParser(); 12 | var result = parser.parse("100"); 13 | assertEquals(100.0, ((Ast.JsonNumber) result.value).value); 14 | } 15 | 16 | @Test 17 | public void parseSimpleString() { 18 | var parser = new PegJsonParser(); 19 | var result = parser.parse("\"" + "hoge" + "\""); 20 | assertEquals("hoge", ((Ast.JsonString) result.value).value); 21 | } 22 | 23 | @Test 24 | public void parseBackEscapedString() { 25 | var parser = new PegJsonParser(); 26 | var result = parser.parse("\"" + "\\b" + "\""); 27 | assertEquals("\b", ((Ast.JsonString) result.value).value); 28 | } 29 | 30 | @Test 31 | public void parseFormFeedEscapedString() { 32 | var parser = new PegJsonParser(); 33 | var result = parser.parse("\"" + "\\f" + "\""); 34 | assertEquals("\f", ((Ast.JsonString) result.value).value); 35 | } 36 | 37 | @Test 38 | public void parseTabEscapedString() { 39 | var parser = new PegJsonParser(); 40 | var result = parser.parse("\"" + "\\t" + "\""); 41 | assertEquals("\t", ((Ast.JsonString) result.value).value); 42 | } 43 | 44 | @Test 45 | public void parseNlEscapedString() { 46 | var parser = new PegJsonParser(); 47 | var result = parser.parse("\"" + "\\n" + "\""); 48 | assertEquals("\n", ((Ast.JsonString) result.value).value); 49 | } 50 | 51 | @Test 52 | public void testLfEscapedString() { 53 | var parser = new PegJsonParser(); 54 | var result = parser.parse("\"" + "\\r" + "\""); 55 | assertEquals("\r", ((Ast.JsonString) result.value).value); 56 | } 57 | 58 | @Test 59 | public void testBachSlashEscapedString() { 60 | var parser = new PegJsonParser(); 61 | var result = parser.parse("\"" + "\\\\" + "\""); 62 | assertEquals("\\", ((Ast.JsonString) result.value).value); 63 | } 64 | 65 | @Test 66 | public void testSlashEscapedString() { 67 | var parser = new PegJsonParser(); 68 | var result = parser.parse("\"" + "\\/" + "\""); 69 | assertEquals("/", ((Ast.JsonString) result.value).value); 70 | } 71 | 72 | @Test 73 | public void testDoubleQuoteEscapedString() { 74 | var parser = new PegJsonParser(); 75 | var result = parser.parse("\"" + "\\\"" + "\""); 76 | assertEquals("\"", ((Ast.JsonString) result.value).value); 77 | } 78 | 79 | @Test 80 | public void testUnicodeEscapedString() { 81 | var parser = new PegJsonParser(); 82 | var result = parser.parse("\"" + "\\u0041\\u0042\\u0043\\u006A" + "\""); 83 | assertEquals("ABCj", ((Ast.JsonString) result.value).value); 84 | } 85 | 86 | @Test 87 | public void parseTrue() { 88 | var parser = new PegJsonParser(); 89 | var result = parser.parse("true"); 90 | assertTrue(result.value instanceof Ast.JsonTrue); 91 | } 92 | 93 | @Test 94 | public void parseFalse() { 95 | var parser = new PegJsonParser(); 96 | var result = parser.parse("false"); 97 | assertTrue(result.value instanceof Ast.JsonFalse); 98 | } 99 | 100 | @Test 101 | public void parseNull() { 102 | var parser = new PegJsonParser(); 103 | ParseResult nullValue = parser.parse("null"); 104 | } 105 | 106 | @Test 107 | public void parseEmptyArray() { 108 | var parser = new PegJsonParser(); 109 | var result = parser.parse("[]"); 110 | assertEquals(true, result.value instanceof Ast.JsonArray); 111 | var array = (Ast.JsonArray)result.value; 112 | assertEquals(List.of(), array.elements); 113 | } 114 | 115 | @Test 116 | public void parseOneElementArray() { 117 | var parser = new PegJsonParser(); 118 | var result = parser.parse("[1]"); 119 | assertEquals(true, result.value instanceof Ast.JsonArray); 120 | var array = (Ast.JsonArray)result.value; 121 | assertEquals(List.of(new Ast.JsonNumber(1)), array.elements); 122 | } 123 | 124 | @Test 125 | public void parseTwoElementArray() { 126 | var parser = new PegJsonParser(); 127 | var result = parser.parse("[1, 2]"); 128 | assertEquals(true, result.value instanceof Ast.JsonArray); 129 | var array = (Ast.JsonArray)result.value; 130 | assertEquals(List.of(new Ast.JsonNumber(1), new Ast.JsonNumber(2)), array.elements); 131 | } 132 | 133 | @Test 134 | public void parseEmptyObject() { 135 | var parser = new PegJsonParser(); 136 | var result = parser.parse("{}"); 137 | assertEquals(true, result.value instanceof Ast.JsonObject); 138 | var object = (Ast.JsonObject)result.value; 139 | assertEquals(List.of(), object.properties); 140 | } 141 | 142 | @Test 143 | public void parseOnePropertyObject() { 144 | var parser = new PegJsonParser(); 145 | var result = parser.parse("{\"foo\":1}"); 146 | assertEquals(true, result.value instanceof Ast.JsonObject); 147 | var object = (Ast.JsonObject)result.value; 148 | assertEquals(List.of(Pair.of(new Ast.JsonString("foo"), new Ast.JsonNumber(1))), object.properties); 149 | } 150 | 151 | @Test 152 | public void parseMultiPropertyObject() { 153 | var parser = new PegJsonParser(); 154 | var result = parser.parse("{\"foo\":1,\"bar\":2}"); 155 | assertEquals(true, result.value instanceof Ast.JsonObject); 156 | var object = (Ast.JsonObject)result.value; 157 | assertEquals( 158 | List.of( 159 | Pair.of(new Ast.JsonString("foo"), new Ast.JsonNumber(1)), 160 | Pair.of(new Ast.JsonString("bar"), new Ast.JsonNumber(2)) 161 | ), 162 | object.properties 163 | ); 164 | } 165 | 166 | @Test 167 | public void parseComplexObject() { 168 | var parser = new PegJsonParser(); 169 | var result = parser.parse("{\"foo\" : 1, \"bar\" : { \"baz\" : 2}}"); 170 | assertEquals(true, result.value instanceof Ast.JsonObject); 171 | var object = (Ast.JsonObject)result.value; 172 | assertEquals( 173 | new Ast.JsonObject( 174 | List.of( 175 | Pair.of(new Ast.JsonString("foo"), new Ast.JsonNumber(1)), 176 | Pair.of( 177 | new Ast.JsonString("bar"), 178 | new Ast.JsonObject( 179 | List.of(Pair.of(new Ast.JsonString("baz"), new Ast.JsonNumber(2))) 180 | ) 181 | ) 182 | ) 183 | ), 184 | object 185 | ); 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /code/chapter3/src/test/java/parser/SimpleJsonParserTest.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import java.util.List; 6 | 7 | import static org.junit.jupiter.api.Assertions.assertEquals; 8 | import static org.junit.jupiter.api.Assertions.assertTrue; 9 | 10 | public class SimpleJsonParserTest { 11 | @Test 12 | public void parseSimpleNumber() { 13 | var parser = new SimpleJsonParser(); 14 | var result = parser.parse("100"); 15 | assertEquals(100.0, ((Ast.JsonNumber) result.value).value); 16 | } 17 | 18 | @Test 19 | public void parseSimpleString() { 20 | var parser = new SimpleJsonParser(); 21 | var result = parser.parse("\"" + "hoge" + "\""); 22 | assertEquals("hoge", ((Ast.JsonString) result.value).value); 23 | } 24 | 25 | @Test 26 | public void parseBackEscapedString() { 27 | var parser = new SimpleJsonParser(); 28 | var result = parser.parse("\"" + "\\b" + "\""); 29 | assertEquals("\b", ((Ast.JsonString) result.value).value); 30 | } 31 | 32 | @Test 33 | public void parseFormFeedEscapedString() { 34 | var parser = new SimpleJsonParser(); 35 | var result = parser.parse("\"" + "\\f" + "\""); 36 | assertEquals("\f", ((Ast.JsonString) result.value).value); 37 | } 38 | 39 | @Test 40 | public void parseTabEscapedString() { 41 | var parser = new SimpleJsonParser(); 42 | var result = parser.parse("\"" + "\\t" + "\""); 43 | assertEquals("\t", ((Ast.JsonString) result.value).value); 44 | } 45 | 46 | @Test 47 | public void parseNlEscapedString() { 48 | var parser = new SimpleJsonParser(); 49 | var result = parser.parse("\"" + "\\n" + "\""); 50 | assertEquals("\n", ((Ast.JsonString) result.value).value); 51 | } 52 | 53 | @Test 54 | public void testLfEscapedString() { 55 | var parser = new SimpleJsonParser(); 56 | var result = parser.parse("\"" + "\\r" + "\""); 57 | assertEquals("\r", ((Ast.JsonString) result.value).value); 58 | } 59 | 60 | @Test 61 | public void testBachSlashEscapedString() { 62 | var parser = new SimpleJsonParser(); 63 | var result = parser.parse("\"" + "\\\\" + "\""); 64 | assertEquals("\\", ((Ast.JsonString) result.value).value); 65 | } 66 | 67 | @Test 68 | public void testSlashEscapedString() { 69 | var parser = new SimpleJsonParser(); 70 | var result = parser.parse("\"" + "\\/" + "\""); 71 | assertEquals("/", ((Ast.JsonString) result.value).value); 72 | } 73 | 74 | @Test 75 | public void testDoubleQuoteEscapedString() { 76 | var parser = new SimpleJsonParser(); 77 | var result = parser.parse("\"" + "\\\"" + "\""); 78 | assertEquals("\"", ((Ast.JsonString) result.value).value); 79 | } 80 | 81 | @Test 82 | public void testUnicodeEscapedString() { 83 | var parser = new SimpleJsonParser(); 84 | var result = parser.parse("\"" + "\\u0041\\u0042\\u0043\\u006A" + "\""); 85 | assertEquals("ABCj", ((Ast.JsonString) result.value).value); 86 | } 87 | 88 | @Test 89 | public void parseTrue() { 90 | var parser = new SimpleJsonParser(); 91 | var result = parser.parse("true"); 92 | assertTrue(result.value instanceof Ast.JsonTrue); 93 | } 94 | 95 | @Test 96 | public void parseFalse() { 97 | var parser = new SimpleJsonParser(); 98 | var result = parser.parse("false"); 99 | assertTrue(result.value instanceof Ast.JsonFalse); 100 | } 101 | 102 | @Test 103 | public void parseNull() { 104 | var parser = new SimpleJsonParser(); 105 | ParseResult nullValue = parser.parse("null"); 106 | } 107 | 108 | @Test 109 | public void parseEmptyArray() { 110 | var parser = new SimpleJsonParser(); 111 | var result = parser.parse("[]"); 112 | assertEquals(true, result.value instanceof Ast.JsonArray); 113 | var array = (Ast.JsonArray)result.value; 114 | assertEquals(List.of(), array.elements); 115 | } 116 | 117 | @Test 118 | public void parseOneElementArray() { 119 | var parser = new SimpleJsonParser(); 120 | var result = parser.parse("[1]"); 121 | assertEquals(true, result.value instanceof Ast.JsonArray); 122 | var array = (Ast.JsonArray)result.value; 123 | assertEquals(List.of(new Ast.JsonNumber(1)), array.elements); 124 | } 125 | 126 | @Test 127 | public void parseTwoElementArray() { 128 | var parser = new SimpleJsonParser(); 129 | var result = parser.parse("[1, 2]"); 130 | assertEquals(true, result.value instanceof Ast.JsonArray); 131 | var array = (Ast.JsonArray)result.value; 132 | assertEquals(List.of(new Ast.JsonNumber(1), new Ast.JsonNumber(2)), array.elements); 133 | } 134 | 135 | @Test 136 | public void parseEmptyObject() { 137 | var parser = new SimpleJsonParser(); 138 | var result = parser.parse("{}"); 139 | assertEquals(true, result.value instanceof Ast.JsonObject); 140 | var object = (Ast.JsonObject)result.value; 141 | assertEquals(List.of(), object.properties); 142 | } 143 | 144 | @Test 145 | public void parseOnePropertyObject() { 146 | var parser = new SimpleJsonParser(); 147 | var result = parser.parse("{\"foo\":1}"); 148 | assertEquals(true, result.value instanceof Ast.JsonObject); 149 | var object = (Ast.JsonObject)result.value; 150 | assertEquals(List.of(Pair.of(new Ast.JsonString("foo"), new Ast.JsonNumber(1))), object.properties); 151 | } 152 | 153 | @Test 154 | public void parseMultiPropertyObject() { 155 | var parser = new SimpleJsonParser(); 156 | var result = parser.parse("{\"foo\":1,\"bar\":2}"); 157 | assertEquals(true, result.value instanceof Ast.JsonObject); 158 | var object = (Ast.JsonObject)result.value; 159 | assertEquals( 160 | List.of( 161 | Pair.of(new Ast.JsonString("foo"), new Ast.JsonNumber(1)), 162 | Pair.of(new Ast.JsonString("bar"), new Ast.JsonNumber(2)) 163 | ), 164 | object.properties 165 | ); 166 | } 167 | 168 | @Test 169 | public void parseComplexObject() { 170 | var parser = new SimpleJsonParser(); 171 | var result = parser.parse("{\"foo\" : 1, \"bar\" : { \"baz\" : 2}}"); 172 | assertEquals(true, result.value instanceof Ast.JsonObject); 173 | var object = (Ast.JsonObject)result.value; 174 | assertEquals( 175 | new Ast.JsonObject( 176 | List.of( 177 | Pair.of(new Ast.JsonString("foo"), new Ast.JsonNumber(1)), 178 | Pair.of( 179 | new Ast.JsonString("bar"), 180 | new Ast.JsonObject( 181 | List.of(Pair.of(new Ast.JsonString("baz"), new Ast.JsonNumber(2))) 182 | ) 183 | ) 184 | ) 185 | ), 186 | object 187 | ); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /code/chapter6/antlr/mvnw.cmd: -------------------------------------------------------------------------------- 1 | @REM ---------------------------------------------------------------------------- 2 | @REM Licensed to the Apache Software Foundation (ASF) under one 3 | @REM or more contributor license agreements. See the NOTICE file 4 | @REM distributed with this work for additional information 5 | @REM regarding copyright ownership. The ASF licenses this file 6 | @REM to you under the Apache License, Version 2.0 (the 7 | @REM "License"); you may not use this file except in compliance 8 | @REM with the License. You may obtain a copy of the License at 9 | @REM 10 | @REM http://www.apache.org/licenses/LICENSE-2.0 11 | @REM 12 | @REM Unless required by applicable law or agreed to in writing, 13 | @REM software distributed under the License is distributed on an 14 | @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | @REM KIND, either express or implied. See the License for the 16 | @REM specific language governing permissions and limitations 17 | @REM under the License. 18 | @REM ---------------------------------------------------------------------------- 19 | 20 | @REM ---------------------------------------------------------------------------- 21 | @REM Maven2 Start Up Batch script 22 | @REM 23 | @REM Required ENV vars: 24 | @REM JAVA_HOME - location of a JDK home dir 25 | @REM 26 | @REM Optional ENV vars 27 | @REM M2_HOME - location of maven2's installed home dir 28 | @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands 29 | @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending 30 | @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven 31 | @REM e.g. to debug Maven itself, use 32 | @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 33 | @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files 34 | @REM ---------------------------------------------------------------------------- 35 | 36 | @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' 37 | @echo off 38 | @REM set title of command window 39 | title %0 40 | @REM enable echoing by setting MAVEN_BATCH_ECHO to 'on' 41 | @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% 42 | 43 | @REM set %HOME% to equivalent of $HOME 44 | if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") 45 | 46 | @REM Execute a user defined script before this one 47 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre 48 | @REM check for pre script, once with legacy .bat ending and once with .cmd ending 49 | if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" 50 | if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" 51 | :skipRcPre 52 | 53 | @setlocal 54 | 55 | set ERROR_CODE=0 56 | 57 | @REM To isolate internal variables from possible post scripts, we use another setlocal 58 | @setlocal 59 | 60 | @REM ==== START VALIDATION ==== 61 | if not "%JAVA_HOME%" == "" goto OkJHome 62 | 63 | echo. 64 | echo Error: JAVA_HOME not found in your environment. >&2 65 | echo Please set the JAVA_HOME variable in your environment to match the >&2 66 | echo location of your Java installation. >&2 67 | echo. 68 | goto error 69 | 70 | :OkJHome 71 | if exist "%JAVA_HOME%\bin\java.exe" goto init 72 | 73 | echo. 74 | echo Error: JAVA_HOME is set to an invalid directory. >&2 75 | echo JAVA_HOME = "%JAVA_HOME%" >&2 76 | echo Please set the JAVA_HOME variable in your environment to match the >&2 77 | echo location of your Java installation. >&2 78 | echo. 79 | goto error 80 | 81 | @REM ==== END VALIDATION ==== 82 | 83 | :init 84 | 85 | @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". 86 | @REM Fallback to current working directory if not found. 87 | 88 | set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% 89 | IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir 90 | 91 | set EXEC_DIR=%CD% 92 | set WDIR=%EXEC_DIR% 93 | :findBaseDir 94 | IF EXIST "%WDIR%"\.mvn goto baseDirFound 95 | cd .. 96 | IF "%WDIR%"=="%CD%" goto baseDirNotFound 97 | set WDIR=%CD% 98 | goto findBaseDir 99 | 100 | :baseDirFound 101 | set MAVEN_PROJECTBASEDIR=%WDIR% 102 | cd "%EXEC_DIR%" 103 | goto endDetectBaseDir 104 | 105 | :baseDirNotFound 106 | set MAVEN_PROJECTBASEDIR=%EXEC_DIR% 107 | cd "%EXEC_DIR%" 108 | 109 | :endDetectBaseDir 110 | 111 | IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig 112 | 113 | @setlocal EnableExtensions EnableDelayedExpansion 114 | for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a 115 | @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% 116 | 117 | :endReadAdditionalConfig 118 | 119 | SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" 120 | set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" 121 | set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain 122 | 123 | set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar" 124 | 125 | FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( 126 | IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B 127 | ) 128 | 129 | @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central 130 | @REM This allows using the maven wrapper in projects that prohibit checking in binary data. 131 | if exist %WRAPPER_JAR% ( 132 | if "%MVNW_VERBOSE%" == "true" ( 133 | echo Found %WRAPPER_JAR% 134 | ) 135 | ) else ( 136 | if not "%MVNW_REPOURL%" == "" ( 137 | SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar" 138 | ) 139 | if "%MVNW_VERBOSE%" == "true" ( 140 | echo Couldn't find %WRAPPER_JAR%, downloading it ... 141 | echo Downloading from: %DOWNLOAD_URL% 142 | ) 143 | 144 | powershell -Command "&{"^ 145 | "$webclient = new-object System.Net.WebClient;"^ 146 | "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ 147 | "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ 148 | "}"^ 149 | "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^ 150 | "}" 151 | if "%MVNW_VERBOSE%" == "true" ( 152 | echo Finished downloading %WRAPPER_JAR% 153 | ) 154 | ) 155 | @REM End of extension 156 | 157 | @REM Provide a "standardized" way to retrieve the CLI args that will 158 | @REM work with both Windows and non-Windows executions. 159 | set MAVEN_CMD_LINE_ARGS=%* 160 | 161 | %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* 162 | if ERRORLEVEL 1 goto error 163 | goto end 164 | 165 | :error 166 | set ERROR_CODE=1 167 | 168 | :end 169 | @endlocal & set ERROR_CODE=%ERROR_CODE% 170 | 171 | if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost 172 | @REM check for post script, once with legacy .bat ending and once with .cmd ending 173 | if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" 174 | if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" 175 | :skipRcPost 176 | 177 | @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' 178 | if "%MAVEN_BATCH_PAUSE%" == "on" pause 179 | 180 | if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% 181 | 182 | exit /B %ERROR_CODE% 183 | -------------------------------------------------------------------------------- /code/chapter3/src/test/java/parser/SimpleJsonTokzenizerTest.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import static org.junit.jupiter.api.Assertions.*; 4 | import org.junit.jupiter.api.*; 5 | 6 | public class SimpleJsonTokzenizerTest { 7 | @Test 8 | public void tokenizeLParen() { 9 | var t = new SimpleJsonTokenizer("("); 10 | t.moveNext(); 11 | assertEquals(Token.Type.LPAREN, t.current().type); 12 | } 13 | 14 | @Test 15 | public void tokenizeRParen() { 16 | var t = new SimpleJsonTokenizer(")"); 17 | t.moveNext(); 18 | assertEquals(Token.Type.RPAREN, t.current().type); 19 | } 20 | 21 | @Test 22 | public void tokenizeLBrace() { 23 | var t = new SimpleJsonTokenizer("{"); 24 | t.moveNext(); 25 | assertEquals(Token.Type.LBRACE, t.current().type); 26 | } 27 | 28 | @Test 29 | public void tokenizeRBrace() { 30 | var t = new SimpleJsonTokenizer("}"); 31 | t.moveNext(); 32 | assertEquals(Token.Type.RBRACE, t.current().type); 33 | } 34 | 35 | @Test 36 | public void tokenizeLBracket() { 37 | var t = new SimpleJsonTokenizer("["); 38 | t.moveNext(); 39 | assertEquals(Token.Type.LBRACKET, t.current().type); 40 | } 41 | 42 | @Test 43 | public void tokenizeRBracket() { 44 | var t = new SimpleJsonTokenizer("]"); 45 | t.moveNext(); 46 | assertEquals(Token.Type.RBRACKET, t.current().type); 47 | } 48 | 49 | @Test 50 | public void tokenizeComma() { 51 | var t = new SimpleJsonTokenizer(","); 52 | t.moveNext(); 53 | assertEquals(Token.Type.COMMA, t.current().type); 54 | } 55 | 56 | @Test 57 | public void tokenizeColon() { 58 | var t = new SimpleJsonTokenizer(":"); 59 | t.moveNext(); 60 | assertEquals(Token.Type.COLON, t.current().type); 61 | } 62 | 63 | @Test 64 | public void tokenizeTrue() { 65 | var t = new SimpleJsonTokenizer("true"); 66 | t.moveNext(); 67 | assertEquals(Token.Type.TRUE, t.current().type); 68 | } 69 | 70 | @Test 71 | public void tokenizeFalse() { 72 | var t = new SimpleJsonTokenizer("false"); 73 | t.moveNext(); 74 | assertEquals(Token.Type.FALSE, t.current().type); 75 | } 76 | 77 | @Test 78 | public void tokenizeNull() { 79 | var t = new SimpleJsonTokenizer("null"); 80 | t.moveNext(); 81 | assertEquals(Token.Type.NULL, t.current().type); 82 | } 83 | 84 | @Test void tokenizeMultpleToken() { 85 | var t = new SimpleJsonTokenizer("[true]"); 86 | t.moveNext(); 87 | assertEquals(Token.Type.LBRACKET, t.current().type); 88 | t.moveNext(); 89 | assertEquals(Token.Type.TRUE, t.current().type); 90 | t.moveNext(); 91 | assertEquals(Token.Type.RBRACKET, t.current().type); 92 | } 93 | 94 | @Test void tokenizeStringLiterals() { 95 | JsonTokenizer t; 96 | 97 | t = new SimpleJsonTokenizer("\"\""); 98 | t.moveNext(); 99 | assertEquals(Token.Type.STRING, t.current().type); 100 | assertEquals("", t.current().value); 101 | 102 | t = new SimpleJsonTokenizer("\"a\""); 103 | t.moveNext(); 104 | assertEquals(Token.Type.STRING, t.current().type); 105 | assertEquals("a", t.current().value); 106 | 107 | t = new SimpleJsonTokenizer("\"ab\""); 108 | t.moveNext(); 109 | assertEquals(Token.Type.STRING, t.current().type); 110 | assertEquals("ab", t.current().value); 111 | 112 | t = new SimpleJsonTokenizer("\"\\r\\n\\f\\b\""); 113 | t.moveNext(); 114 | assertEquals(Token.Type.STRING, t.current().type); 115 | assertEquals("\r\n\f\b", t.current().value); 116 | 117 | } 118 | 119 | @Test void tokenizePositiveIntegerLiterals() { 120 | JsonTokenizer t; 121 | 122 | t = new SimpleJsonTokenizer("100"); 123 | t.moveNext(); 124 | assertEquals(Token.Type.INTEGER, t.current().type); 125 | assertEquals(100, t.current().value); 126 | 127 | t = new SimpleJsonTokenizer("50"); 128 | t.moveNext(); 129 | assertEquals(Token.Type.INTEGER, t.current().type); 130 | assertEquals(50, t.current().value); 131 | 132 | t = new SimpleJsonTokenizer("1"); 133 | t.moveNext(); 134 | assertEquals(Token.Type.INTEGER, t.current().type); 135 | assertEquals(1, t.current().value); 136 | 137 | t = new SimpleJsonTokenizer("0"); 138 | t.moveNext(); 139 | assertEquals(Token.Type.INTEGER, t.current().type); 140 | assertEquals(0, t.current().value); 141 | 142 | t = new SimpleJsonTokenizer(Integer.toString(Integer.MAX_VALUE)); 143 | t.moveNext(); 144 | assertEquals(Token.Type.INTEGER, t.current().type); 145 | assertEquals(Integer.MAX_VALUE, t.current().value); 146 | } 147 | 148 | @Test void tokenizeNegativeIntegerLiterals() { 149 | JsonTokenizer t; 150 | 151 | t = new SimpleJsonTokenizer("-100"); 152 | t.moveNext(); 153 | assertEquals(Token.Type.INTEGER, t.current().type); 154 | assertEquals(-100, t.current().value); 155 | 156 | t = new SimpleJsonTokenizer("-50"); 157 | t.moveNext(); 158 | assertEquals(Token.Type.INTEGER, t.current().type); 159 | assertEquals(-50, t.current().value); 160 | 161 | t = new SimpleJsonTokenizer("-1"); 162 | t.moveNext(); 163 | assertEquals(Token.Type.INTEGER, t.current().type); 164 | assertEquals(-1, t.current().value); 165 | 166 | t = new SimpleJsonTokenizer("-0"); 167 | t.moveNext(); 168 | assertEquals(Token.Type.INTEGER, t.current().type); 169 | assertEquals(0, t.current().value); 170 | 171 | t = new SimpleJsonTokenizer(Integer.toString(Integer.MIN_VALUE)); 172 | t.moveNext(); 173 | assertEquals(Token.Type.INTEGER, t.current().type); 174 | assertEquals(Integer.MIN_VALUE, t.current().value); 175 | } 176 | 177 | @Test void tokenizeMultpleTokenWithWhitespace1() { 178 | var t = new SimpleJsonTokenizer("[ true ]"); 179 | t.moveNext(); 180 | assertEquals(Token.Type.LBRACKET, t.current().type); 181 | t.moveNext(); 182 | assertEquals(Token.Type.TRUE, t.current().type); 183 | t.moveNext(); 184 | assertEquals(Token.Type.RBRACKET, t.current().type); 185 | } 186 | 187 | @Test void tokenizeMultpleTokenWithWhitespace2() { 188 | var t = new SimpleJsonTokenizer("[ 1, true, \"foo\" ]"); 189 | t.moveNext(); 190 | assertEquals(Token.Type.LBRACKET, t.current().type); 191 | 192 | t.moveNext(); 193 | assertEquals(Token.Type.INTEGER, t.current().type); 194 | assertEquals(1, t.current().value); 195 | 196 | t.moveNext(); 197 | assertEquals(Token.Type.TRUE, t.current().type); 198 | 199 | t.moveNext(); 200 | assertEquals(Token.Type.COMMA, t.current().type); 201 | 202 | t.moveNext(); 203 | assertEquals(Token.Type.STRING, t.current().type); 204 | assertEquals("foo", t.current().value); 205 | 206 | t.moveNext(); 207 | assertEquals(Token.Type.RBRACKET, t.current().type); 208 | } 209 | 210 | @Test void tokenizeTokenizeArrayLiteral() { 211 | var t = new SimpleJsonTokenizer("[ true, false ]"); 212 | t.moveNext(); 213 | assertEquals(Token.Type.LBRACKET, t.current().type); 214 | t.moveNext(); 215 | assertEquals(Token.Type.TRUE, t.current().type); 216 | t.moveNext(); 217 | assertEquals(Token.Type.COMMA, t.current().type); 218 | t.moveNext(); 219 | assertEquals(Token.Type.FALSE, t.current().type); 220 | t.moveNext(); 221 | assertEquals(Token.Type.RBRACKET, t.current().type); 222 | } 223 | 224 | @Test 225 | public void failToTokenizeTru() { 226 | assertThrows(TokenizerException.class, () -> { 227 | var t = new SimpleJsonTokenizer("tru"); 228 | t.moveNext(); 229 | }); 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/SimpleJsonTokenizer.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class SimpleJsonTokenizer implements JsonTokenizer { 7 | private final String input; 8 | private int index; 9 | private Token fetched; 10 | 11 | public SimpleJsonTokenizer(String input) { 12 | this.input = input; 13 | this.index = 0; 14 | } 15 | 16 | public String rest() { 17 | return input.substring(index); 18 | } 19 | 20 | public List tokenizeAll() { 21 | List tokens = new ArrayList<>(); 22 | while (moveNext()) { 23 | tokens.add(current()); 24 | } 25 | tokens.add(new Token(Token.Type.EOF, null)); 26 | return tokens; 27 | } 28 | 29 | private static boolean isDigit(char ch) { 30 | return '0' <= ch && ch <= '9'; 31 | } 32 | 33 | private boolean tokenizeNumber(boolean positive) { 34 | char firstChar = input.charAt(index); 35 | if(!isDigit(firstChar)) return false; 36 | int result = 0; 37 | while(index < input.length()) { 38 | char ch = input.charAt(index); 39 | if(!isDigit(ch)) { 40 | fetched = new Token(Token.Type.INTEGER, positive ? result : -result); 41 | return true; 42 | } 43 | result = result * 10 + (ch - '0'); 44 | index++; 45 | } 46 | fetched = new Token(Token.Type.INTEGER, positive ? result : -result); 47 | return true; 48 | } 49 | 50 | private boolean tokenizeStringLiteral() { 51 | char firstChar = input.charAt(index); 52 | int beginIndex = index; 53 | if(firstChar != '"') return false; 54 | index++; 55 | var builder = new StringBuffer(); 56 | while(index < input.length()) { 57 | char ch = input.charAt(index); 58 | if(ch == '"') { 59 | fetched = new Token(Token.Type.STRING, builder.toString()); 60 | index++; 61 | return true; 62 | } 63 | if(ch == '\\') { 64 | index++; 65 | if(index >= input.length()) return false; 66 | char nextCh = input.charAt(index); 67 | switch(nextCh) { 68 | case '\\': 69 | builder.append('\\'); 70 | break; 71 | case '"': 72 | builder.append('"'); 73 | break; 74 | case '/': 75 | builder.append('/'); 76 | break; 77 | case 't': 78 | builder.append('\t'); 79 | break; 80 | case 'f': 81 | builder.append('\f'); 82 | break; 83 | case 'b': 84 | builder.append('\b'); 85 | break; 86 | case 'r': 87 | builder.append('\r'); 88 | break; 89 | case 'n': 90 | builder.append('\n'); 91 | break; 92 | case 'u': 93 | if((index + 1) + 4 >= input.length()) { 94 | throw new TokenizerException("unicode escape ends with EOF: " + input.substring(index)); 95 | } 96 | var unicodeEscape= input.substring(index + 1, index + 1 + 4); 97 | if(!unicodeEscape.matches("[0-9a-fA-F]{4}")) { 98 | throw new TokenizerException("illegal unicode escape: \\u" + unicodeEscape); 99 | } 100 | builder.append((char)Integer.parseInt(unicodeEscape, 16)); 101 | index += 4; 102 | break; 103 | } 104 | } else { 105 | builder.append(ch); 106 | } 107 | index++; 108 | } 109 | return false; 110 | } 111 | 112 | private void accept(String literal, Token.Type type, Object value) { 113 | String head = input.substring(index); 114 | if(head.indexOf(literal) == 0) { 115 | fetched = new Token(type, value); 116 | index += literal.length(); 117 | } else { 118 | throw new TokenizerException("expected: " + literal + ", actual: " + head); 119 | } 120 | } 121 | 122 | @Override 123 | public Token current() { 124 | return fetched; 125 | } 126 | 127 | @Override 128 | public boolean moveNext() { 129 | LOOP: 130 | while(index < input.length()) { 131 | char ch = input.charAt(index); 132 | switch (ch) { 133 | case '[': 134 | accept("[", Token.Type.LBRACKET, "["); 135 | return true; 136 | case ']': 137 | accept("]", Token.Type.RBRACKET, "]"); 138 | return true; 139 | case '{': 140 | accept("{", Token.Type.LBRACE, "{"); 141 | return true; 142 | case '}': 143 | accept("}", Token.Type.RBRACE, "}"); 144 | return true; 145 | case '(': 146 | accept("(", Token.Type.LPAREN, "("); 147 | return true; 148 | case ')': 149 | accept(")", Token.Type.RPAREN, ")"); 150 | return true; 151 | case ',': 152 | accept(",", Token.Type.COMMA, ","); 153 | return true; 154 | case ':': 155 | accept(":", Token.Type.COLON, ":"); 156 | return true; 157 | // true 158 | case 't': 159 | accept("true", Token.Type.TRUE, true); 160 | return true; 161 | // false 162 | case 'f': 163 | accept("false", Token.Type.FALSE, false); 164 | return true; 165 | case 'n': { 166 | String actual; 167 | if (index + 4 <= input.length()) { 168 | actual = input.substring(index, index + 4); 169 | if (actual.equals("null")) { 170 | fetched = new Token(Token.Type.NULL, null); 171 | index += 4; 172 | return true; 173 | } else { 174 | throw new TokenizerException("expected: null, actual: " + actual); 175 | } 176 | } else { 177 | actual = input.substring(index); 178 | throw new TokenizerException("expected: null, actual: " + actual); 179 | } 180 | } 181 | case '"': 182 | return tokenizeStringLiteral(); 183 | // whitespace 184 | case ' ': 185 | case '\t': 186 | case '\n': 187 | case '\r': 188 | case '\b': 189 | case '\f': 190 | char next = 0; 191 | do { 192 | index++; 193 | next = input.charAt(index); 194 | } while (index < input.length() && Character.isWhitespace(next)); 195 | continue LOOP; 196 | default: 197 | if('0' <= ch && ch <= '9') { 198 | return tokenizeNumber(true); 199 | } else if (ch == '+') { 200 | index++; 201 | return tokenizeNumber(true); 202 | } else if (ch == '-') { 203 | index++; 204 | return tokenizeNumber(false); 205 | } else { 206 | throw new TokenizerException("unexpected character: " + ch); 207 | } 208 | } 209 | } 210 | return false; 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /code/chapter5/gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 84 | 85 | APP_NAME="Gradle" 86 | APP_BASE_NAME=${0##*/} 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | MAX_FD=$( ulimit -H -n ) || 147 | warn "Could not query maximum file descriptor limit" 148 | esac 149 | case $MAX_FD in #( 150 | '' | soft) :;; #( 151 | *) 152 | ulimit -n "$MAX_FD" || 153 | warn "Could not set maximum file descriptor limit to $MAX_FD" 154 | esac 155 | fi 156 | 157 | # Collect all arguments for the java command, stacking in reverse order: 158 | # * args from the command line 159 | # * the main class name 160 | # * -classpath 161 | # * -D...appname settings 162 | # * --module-path (only if needed) 163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 164 | 165 | # For Cygwin or MSYS, switch paths to Windows format before running java 166 | if "$cygwin" || "$msys" ; then 167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 169 | 170 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 171 | 172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 173 | for arg do 174 | if 175 | case $arg in #( 176 | -*) false ;; # don't mess with options #( 177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 178 | [ -e "$t" ] ;; #( 179 | *) false ;; 180 | esac 181 | then 182 | arg=$( cygpath --path --ignore --mixed "$arg" ) 183 | fi 184 | # Roll the args list around exactly as many times as the number of 185 | # args, so each arg winds up back in the position where it started, but 186 | # possibly modified. 187 | # 188 | # NB: a `for` loop captures its iteration list before it begins, so 189 | # changing the positional parameters here affects neither the number of 190 | # iterations, nor the values presented in `arg`. 191 | shift # remove old arg 192 | set -- "$@" "$arg" # push replacement arg 193 | done 194 | fi 195 | 196 | # Collect all arguments for the java command; 197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 198 | # shell script including quotes and variable substitutions, so put them in 199 | # double quotes to make sure that they get re-expanded; and 200 | # * put everything else in single quotes, so that it's not re-expanded. 201 | 202 | set -- \ 203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 204 | -classpath "$CLASSPATH" \ 205 | org.gradle.wrapper.GradleWrapperMain \ 206 | "$@" 207 | 208 | # Use "xargs" to parse quoted args. 209 | # 210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 211 | # 212 | # In Bash we could simply go: 213 | # 214 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 215 | # set -- "${ARGS[@]}" "$@" 216 | # 217 | # but POSIX shell has neither arrays nor command substitution, so instead we 218 | # post-process each arg (as a line of input to sed) to backslash-escape any 219 | # character that might be a shell metacharacter, then use eval to reverse 220 | # that process (while maintaining the separation between arguments), and wrap 221 | # the whole thing up as a single "set" statement. 222 | # 223 | # This will of course break if any of these variables contains a newline or 224 | # an unmatched quote. 225 | # 226 | 227 | eval "set -- $( 228 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 229 | xargs -n1 | 230 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 231 | tr '\n' ' ' 232 | )" '"$@"' 233 | 234 | exec "$JAVACMD" "$@" 235 | -------------------------------------------------------------------------------- /code/chapter5/src/test/java/parser/ll1/LL1RecognizerTest.java: -------------------------------------------------------------------------------- 1 | package parser.ll1; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import static java.util.List.of; 6 | import static org.junit.jupiter.api.Assertions.*; 7 | 8 | class LL1RecognizerTest { 9 | 10 | @Test 11 | void testSimpleGrammar() { 12 | // S -> A B 13 | // A -> a A | ε 14 | // B -> b 15 | var grammar = new Grammar("S", of( 16 | new Rule("S", of( 17 | new Expression.NonTerminal("A"), 18 | new Expression.NonTerminal("B") 19 | )), 20 | new Rule("A", of( 21 | new Expression.Terminal("a"), 22 | new Expression.NonTerminal("A") 23 | )), 24 | new Rule("A", of()), // 空規則 25 | new Rule("B", of(new Expression.Terminal("b"))) 26 | )); 27 | 28 | var recognizer = new LL1Recognizer(grammar); 29 | 30 | // "aab" を認識 31 | assertTrue(recognizer.recognize(of( 32 | new Expression.Terminal("a"), 33 | new Expression.Terminal("a"), 34 | new Expression.Terminal("b") 35 | ))); 36 | 37 | // "b" を認識 38 | assertTrue(recognizer.recognize(of( 39 | new Expression.Terminal("b") 40 | ))); 41 | 42 | // "aaab" を認識 43 | assertTrue(recognizer.recognize(of( 44 | new Expression.Terminal("a"), 45 | new Expression.Terminal("a"), 46 | new Expression.Terminal("a"), 47 | new Expression.Terminal("b") 48 | ))); 49 | 50 | // "aa" は認識しない(bが必要) 51 | assertFalse(recognizer.recognize(of( 52 | new Expression.Terminal("a"), 53 | new Expression.Terminal("a") 54 | ))); 55 | 56 | // "ba" は認識しない(順序が違う) 57 | assertFalse(recognizer.recognize(of( 58 | new Expression.Terminal("b"), 59 | new Expression.Terminal("a") 60 | ))); 61 | } 62 | 63 | @Test 64 | void testArithmeticGrammar() { 65 | // LL(1)版の算術式文法 66 | // E -> T E' 67 | // E' -> + T E' | ε 68 | // T -> F T' 69 | // T' -> * F T' | ε 70 | // F -> ( E ) | id 71 | var grammar = new Grammar("E", of( 72 | new Rule("E", of( 73 | new Expression.NonTerminal("T"), 74 | new Expression.NonTerminal("E'") 75 | )), 76 | new Rule("E'", of( 77 | new Expression.Terminal("+"), 78 | new Expression.NonTerminal("T"), 79 | new Expression.NonTerminal("E'") 80 | )), 81 | new Rule("E'", of()), 82 | new Rule("T", of( 83 | new Expression.NonTerminal("F"), 84 | new Expression.NonTerminal("T'") 85 | )), 86 | new Rule("T'", of( 87 | new Expression.Terminal("*"), 88 | new Expression.NonTerminal("F"), 89 | new Expression.NonTerminal("T'") 90 | )), 91 | new Rule("T'", of()), 92 | new Rule("F", of( 93 | new Expression.Terminal("("), 94 | new Expression.NonTerminal("E"), 95 | new Expression.Terminal(")") 96 | )), 97 | new Rule("F", of(new Expression.Terminal("id"))) 98 | )); 99 | 100 | var recognizer = new LL1Recognizer(grammar); 101 | 102 | // "id" を認識 103 | assertTrue(recognizer.recognize(of( 104 | new Expression.Terminal("id") 105 | ))); 106 | 107 | // "id + id" を認識 108 | assertTrue(recognizer.recognize(of( 109 | new Expression.Terminal("id"), 110 | new Expression.Terminal("+"), 111 | new Expression.Terminal("id") 112 | ))); 113 | 114 | // "id * id" を認識 115 | assertTrue(recognizer.recognize(of( 116 | new Expression.Terminal("id"), 117 | new Expression.Terminal("*"), 118 | new Expression.Terminal("id") 119 | ))); 120 | 121 | // "id + id * id" を認識 122 | assertTrue(recognizer.recognize(of( 123 | new Expression.Terminal("id"), 124 | new Expression.Terminal("+"), 125 | new Expression.Terminal("id"), 126 | new Expression.Terminal("*"), 127 | new Expression.Terminal("id") 128 | ))); 129 | 130 | // "(id + id)" を認識 131 | assertTrue(recognizer.recognize(of( 132 | new Expression.Terminal("("), 133 | new Expression.Terminal("id"), 134 | new Expression.Terminal("+"), 135 | new Expression.Terminal("id"), 136 | new Expression.Terminal(")") 137 | ))); 138 | 139 | // "((id))" を認識 140 | assertTrue(recognizer.recognize(of( 141 | new Expression.Terminal("("), 142 | new Expression.Terminal("("), 143 | new Expression.Terminal("id"), 144 | new Expression.Terminal(")"), 145 | new Expression.Terminal(")") 146 | ))); 147 | 148 | // "+ id" は認識しない(不正) 149 | assertFalse(recognizer.recognize(of( 150 | new Expression.Terminal("+"), 151 | new Expression.Terminal("id") 152 | ))); 153 | 154 | // "id +" は認識しない(不完全) 155 | assertFalse(recognizer.recognize(of( 156 | new Expression.Terminal("id"), 157 | new Expression.Terminal("+") 158 | ))); 159 | 160 | // "(id" は認識しない(括弧が閉じていない) 161 | assertFalse(recognizer.recognize(of( 162 | new Expression.Terminal("("), 163 | new Expression.Terminal("id") 164 | ))); 165 | } 166 | 167 | @Test 168 | void testListGrammar() { 169 | // リスト文法(LL(1)版) 170 | // L -> E L' 171 | // L' -> , E L' | ε 172 | // E -> id 173 | var grammar = new Grammar("L", of( 174 | new Rule("L", of( 175 | new Expression.NonTerminal("E"), 176 | new Expression.NonTerminal("L'") 177 | )), 178 | new Rule("L'", of( 179 | new Expression.Terminal(","), 180 | new Expression.NonTerminal("E"), 181 | new Expression.NonTerminal("L'") 182 | )), 183 | new Rule("L'", of()), // 空規則 184 | new Rule("E", of(new Expression.Terminal("id"))) 185 | )); 186 | 187 | var recognizer = new LL1Recognizer(grammar); 188 | 189 | // "id" を認識 190 | assertTrue(recognizer.recognize(of( 191 | new Expression.Terminal("id") 192 | ))); 193 | 194 | // "id, id" を認識 195 | assertTrue(recognizer.recognize(of( 196 | new Expression.Terminal("id"), 197 | new Expression.Terminal(","), 198 | new Expression.Terminal("id") 199 | ))); 200 | 201 | // "id, id, id" を認識 202 | assertTrue(recognizer.recognize(of( 203 | new Expression.Terminal("id"), 204 | new Expression.Terminal(","), 205 | new Expression.Terminal("id"), 206 | new Expression.Terminal(","), 207 | new Expression.Terminal("id") 208 | ))); 209 | 210 | // ", id" は認識しない(不正) 211 | assertFalse(recognizer.recognize(of( 212 | new Expression.Terminal(","), 213 | new Expression.Terminal("id") 214 | ))); 215 | 216 | // "id," は認識しない(不完全) 217 | assertFalse(recognizer.recognize(of( 218 | new Expression.Terminal("id"), 219 | new Expression.Terminal(",") 220 | ))); 221 | } 222 | 223 | @Test 224 | void testBalancedParentheses() { 225 | // バランスの取れた括弧の文法 226 | // S -> ( S ) S | ε 227 | var grammar = new Grammar("S", of( 228 | new Rule("S", of( 229 | new Expression.Terminal("("), 230 | new Expression.NonTerminal("S"), 231 | new Expression.Terminal(")"), 232 | new Expression.NonTerminal("S") 233 | )), 234 | new Rule("S", of()) // 空規則 235 | )); 236 | 237 | var recognizer = new LL1Recognizer(grammar); 238 | 239 | // 空文字列を認識 240 | assertTrue(recognizer.recognize(of())); 241 | 242 | // "()" を認識 243 | assertTrue(recognizer.recognize(of( 244 | new Expression.Terminal("("), 245 | new Expression.Terminal(")") 246 | ))); 247 | 248 | // "()()" を認識 249 | assertTrue(recognizer.recognize(of( 250 | new Expression.Terminal("("), 251 | new Expression.Terminal(")"), 252 | new Expression.Terminal("("), 253 | new Expression.Terminal(")") 254 | ))); 255 | 256 | // "(())" を認識 257 | assertTrue(recognizer.recognize(of( 258 | new Expression.Terminal("("), 259 | new Expression.Terminal("("), 260 | new Expression.Terminal(")"), 261 | new Expression.Terminal(")") 262 | ))); 263 | 264 | // "((()))" を認識 265 | assertTrue(recognizer.recognize(of( 266 | new Expression.Terminal("("), 267 | new Expression.Terminal("("), 268 | new Expression.Terminal("("), 269 | new Expression.Terminal(")"), 270 | new Expression.Terminal(")"), 271 | new Expression.Terminal(")") 272 | ))); 273 | 274 | // "(()" は認識しない(不正) 275 | assertFalse(recognizer.recognize(of( 276 | new Expression.Terminal("("), 277 | new Expression.Terminal("("), 278 | new Expression.Terminal(")") 279 | ))); 280 | 281 | // "())" は認識しない(不正) 282 | assertFalse(recognizer.recognize(of( 283 | new Expression.Terminal("("), 284 | new Expression.Terminal(")"), 285 | new Expression.Terminal(")") 286 | ))); 287 | } 288 | } -------------------------------------------------------------------------------- /code/chapter5/src/test/java/parser/slr1/SLR1ParserTest.java: -------------------------------------------------------------------------------- 1 | package parser.slr1; 2 | 3 | import org.junit.jupiter.api.Test; 4 | 5 | import static java.util.List.of; 6 | import static org.junit.jupiter.api.Assertions.*; 7 | 8 | class SLR1ParserTest { 9 | 10 | @Test 11 | void testArithmeticGrammar() { 12 | // 算術式の文法(3レベルの優先順位:括弧、*、+) 13 | // E -> E + T | T 14 | // T -> T * F | F 15 | // F -> ( E ) | id 16 | var grammar = new Grammar("E", of( 17 | new Rule("E", of( 18 | new Expression.NonTerminal("E"), 19 | new Expression.Terminal("+"), 20 | new Expression.NonTerminal("T") 21 | )), 22 | new Rule("E", of(new Expression.NonTerminal("T"))), 23 | new Rule("T", of( 24 | new Expression.NonTerminal("T"), 25 | new Expression.Terminal("*"), 26 | new Expression.NonTerminal("F") 27 | )), 28 | new Rule("T", of(new Expression.NonTerminal("F"))), 29 | new Rule("F", of( 30 | new Expression.Terminal("("), 31 | new Expression.NonTerminal("E"), 32 | new Expression.Terminal(")") 33 | )), 34 | new Rule("F", of(new Expression.Terminal("id"))) 35 | )); 36 | 37 | var parser = new SLR1Parser(grammar); 38 | 39 | // "id" を認識 40 | assertTrue(parser.parse(of( 41 | new Expression.Terminal("id") 42 | ))); 43 | 44 | // "id + id" を認識 45 | assertTrue(parser.parse(of( 46 | new Expression.Terminal("id"), 47 | new Expression.Terminal("+"), 48 | new Expression.Terminal("id") 49 | ))); 50 | 51 | // "id * id" を認識 52 | assertTrue(parser.parse(of( 53 | new Expression.Terminal("id"), 54 | new Expression.Terminal("*"), 55 | new Expression.Terminal("id") 56 | ))); 57 | 58 | // "id + id * id" を認識(優先順位: id + (id * id)) 59 | assertTrue(parser.parse(of( 60 | new Expression.Terminal("id"), 61 | new Expression.Terminal("+"), 62 | new Expression.Terminal("id"), 63 | new Expression.Terminal("*"), 64 | new Expression.Terminal("id") 65 | ))); 66 | 67 | // "id * id + id" を認識(*が優先される: (id * id) + id) 68 | assertTrue(parser.parse(of( 69 | new Expression.Terminal("id"), 70 | new Expression.Terminal("*"), 71 | new Expression.Terminal("id"), 72 | new Expression.Terminal("+"), 73 | new Expression.Terminal("id") 74 | ))); 75 | 76 | // "(id + id) * id" を認識(括弧が優先される) 77 | assertTrue(parser.parse(of( 78 | new Expression.Terminal("("), 79 | new Expression.Terminal("id"), 80 | new Expression.Terminal("+"), 81 | new Expression.Terminal("id"), 82 | new Expression.Terminal(")"), 83 | new Expression.Terminal("*"), 84 | new Expression.Terminal("id") 85 | ))); 86 | 87 | // "((id))" を認識(ネストした括弧) 88 | assertTrue(parser.parse(of( 89 | new Expression.Terminal("("), 90 | new Expression.Terminal("("), 91 | new Expression.Terminal("id"), 92 | new Expression.Terminal(")"), 93 | new Expression.Terminal(")") 94 | ))); 95 | 96 | // "+ id" は認識しない(不正) 97 | assertFalse(parser.parse(of( 98 | new Expression.Terminal("+"), 99 | new Expression.Terminal("id") 100 | ))); 101 | 102 | // "id +" は認識しない(不完全) 103 | assertFalse(parser.parse(of( 104 | new Expression.Terminal("id"), 105 | new Expression.Terminal("+") 106 | ))); 107 | 108 | // "(id" は認識しない(括弧が閉じていない) 109 | assertFalse(parser.parse(of( 110 | new Expression.Terminal("("), 111 | new Expression.Terminal("id") 112 | ))); 113 | 114 | // "id)" は認識しない(括弧が対応していない) 115 | assertFalse(parser.parse(of( 116 | new Expression.Terminal("id"), 117 | new Expression.Terminal(")") 118 | ))); 119 | } 120 | 121 | @Test 122 | void testListGrammar() { 123 | // リスト文法(左再帰) 124 | // L -> L , E | E 125 | // E -> id 126 | var grammar = new Grammar("L", of( 127 | new Rule("L", of( 128 | new Expression.NonTerminal("L"), 129 | new Expression.Terminal(","), 130 | new Expression.NonTerminal("E") 131 | )), 132 | new Rule("L", of(new Expression.NonTerminal("E"))), 133 | new Rule("E", of(new Expression.Terminal("id"))) 134 | )); 135 | 136 | var parser = new SLR1Parser(grammar); 137 | 138 | // "id" を認識 139 | assertTrue(parser.parse(of( 140 | new Expression.Terminal("id") 141 | ))); 142 | 143 | // "id, id" を認識 144 | assertTrue(parser.parse(of( 145 | new Expression.Terminal("id"), 146 | new Expression.Terminal(","), 147 | new Expression.Terminal("id") 148 | ))); 149 | 150 | // "id, id, id" を認識 151 | assertTrue(parser.parse(of( 152 | new Expression.Terminal("id"), 153 | new Expression.Terminal(","), 154 | new Expression.Terminal("id"), 155 | new Expression.Terminal(","), 156 | new Expression.Terminal("id") 157 | ))); 158 | 159 | // ", id" は認識しない(不正) 160 | assertFalse(parser.parse(of( 161 | new Expression.Terminal(","), 162 | new Expression.Terminal("id") 163 | ))); 164 | 165 | // "id," は認識しない(不完全) 166 | assertFalse(parser.parse(of( 167 | new Expression.Terminal("id"), 168 | new Expression.Terminal(",") 169 | ))); 170 | 171 | // "id id" は認識しない(カンマがない) 172 | assertFalse(parser.parse(of( 173 | new Expression.Terminal("id"), 174 | new Expression.Terminal("id") 175 | ))); 176 | } 177 | 178 | @Test 179 | void testSimpleAssignmentGrammar() { 180 | // より単純な代入文の文法 181 | // S -> id = E | E 182 | // E -> E + T | T 183 | // T -> id 184 | var grammar = new Grammar("S", of( 185 | new Rule("S", of( 186 | new Expression.Terminal("id"), 187 | new Expression.Terminal("="), 188 | new Expression.NonTerminal("E") 189 | )), 190 | new Rule("S", of(new Expression.NonTerminal("E"))), 191 | new Rule("E", of( 192 | new Expression.NonTerminal("E"), 193 | new Expression.Terminal("+"), 194 | new Expression.NonTerminal("T") 195 | )), 196 | new Rule("E", of(new Expression.NonTerminal("T"))), 197 | new Rule("T", of(new Expression.Terminal("id"))) 198 | )); 199 | 200 | var parser = new SLR1Parser(grammar); 201 | 202 | // "id" を認識 203 | assertTrue(parser.parse(of( 204 | new Expression.Terminal("id") 205 | ))); 206 | 207 | // "id = id" を認識 208 | assertTrue(parser.parse(of( 209 | new Expression.Terminal("id"), 210 | new Expression.Terminal("="), 211 | new Expression.Terminal("id") 212 | ))); 213 | 214 | // "id = id + id" を認識 215 | assertTrue(parser.parse(of( 216 | new Expression.Terminal("id"), 217 | new Expression.Terminal("="), 218 | new Expression.Terminal("id"), 219 | new Expression.Terminal("+"), 220 | new Expression.Terminal("id") 221 | ))); 222 | 223 | // "id + id" を認識 224 | assertTrue(parser.parse(of( 225 | new Expression.Terminal("id"), 226 | new Expression.Terminal("+"), 227 | new Expression.Terminal("id") 228 | ))); 229 | 230 | // "= id" は認識しない(不正) 231 | assertFalse(parser.parse(of( 232 | new Expression.Terminal("="), 233 | new Expression.Terminal("id") 234 | ))); 235 | 236 | // "id =" は認識しない(不完全) 237 | assertFalse(parser.parse(of( 238 | new Expression.Terminal("id"), 239 | new Expression.Terminal("=") 240 | ))); 241 | } 242 | 243 | @Test 244 | void testBalancedParentheses() { 245 | // バランスの取れた括弧の文法(SLR(1)版) 246 | // S -> S S | ( S ) | ε 247 | // 注:この文法は実際にはSLR(1)では扱えないため、競合が発生する 248 | var grammar = new Grammar("S", of( 249 | new Rule("S", of( 250 | new Expression.NonTerminal("S"), 251 | new Expression.NonTerminal("S") 252 | )), 253 | new Rule("S", of( 254 | new Expression.Terminal("("), 255 | new Expression.NonTerminal("S"), 256 | new Expression.Terminal(")") 257 | )), 258 | new Rule("S", of()) // 空規則 259 | )); 260 | 261 | // SLR(1)パーサーの構築時に競合が検出されることを確認 262 | assertThrows(IllegalStateException.class, () -> { 263 | new SLR1Parser(grammar); 264 | }); 265 | } 266 | 267 | @Test 268 | void testConflictDetection() { 269 | // シフト/リデュース競合を含む文法 270 | // S -> if E then S | if E then S else S | a 271 | // E -> b 272 | var grammar = new Grammar("S", of( 273 | new Rule("S", of( 274 | new Expression.Terminal("if"), 275 | new Expression.NonTerminal("E"), 276 | new Expression.Terminal("then"), 277 | new Expression.NonTerminal("S") 278 | )), 279 | new Rule("S", of( 280 | new Expression.Terminal("if"), 281 | new Expression.NonTerminal("E"), 282 | new Expression.Terminal("then"), 283 | new Expression.NonTerminal("S"), 284 | new Expression.Terminal("else"), 285 | new Expression.NonTerminal("S") 286 | )), 287 | new Rule("S", of(new Expression.Terminal("a"))), 288 | new Rule("E", of(new Expression.Terminal("b"))) 289 | )); 290 | 291 | // Dangling else問題は典型的なshift/reduce競合を引き起こす 292 | assertThrows(IllegalStateException.class, () -> { 293 | new SLR1Parser(grammar); 294 | }); 295 | } 296 | } -------------------------------------------------------------------------------- /code/chapter3/src/main/java/parser/PegJsonParser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class PegJsonParser implements JsonParser { 7 | private int cursor; 8 | private String input; 9 | 10 | private int progressiveCursor; 11 | private ParseException progressiveException; 12 | 13 | private static class ParseException extends RuntimeException { 14 | public ParseException(String message) { 15 | super(message); 16 | } 17 | } 18 | 19 | public ParseResult parse(String input) { 20 | this.input = input; 21 | this.cursor = 0; 22 | try { 23 | var value = parseValue(); 24 | return new ParseResult<>(value, input.substring(this.cursor)); 25 | } catch (ParseException e) { 26 | throw progressiveException; 27 | } 28 | } 29 | 30 | private void recognize(String literal) { 31 | if(input.substring(cursor).startsWith(literal)) { 32 | cursor += literal.length(); 33 | } else { 34 | String substring = input.substring(cursor); 35 | int endIndex = cursor + (literal.length() > substring.length() ? substring.length() : literal.length()); 36 | throwParseException("expected: " + literal + ", actual: " + input.substring(cursor, endIndex)); 37 | } 38 | } 39 | 40 | private boolean isHexChar(char ch) { 41 | return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'); 42 | } 43 | 44 | private void skipWhitespace() { 45 | OUTER: 46 | while(cursor < input.length()) { 47 | char currentCharacter = input.charAt(cursor); 48 | switch (currentCharacter) { 49 | case '\f': 50 | case '\t': 51 | case '\r': 52 | case '\n': 53 | case '\b': 54 | case ' ': 55 | cursor++; 56 | continue OUTER; 57 | default: 58 | break OUTER; 59 | } 60 | } 61 | } 62 | 63 | private Ast.JsonValue parseValue() { 64 | int backup = cursor; 65 | try { 66 | return parseString(); 67 | } catch (ParseException e) { 68 | cursor = backup; 69 | } 70 | 71 | try { 72 | return parseNumber(); 73 | } catch (ParseException e) { 74 | cursor = backup; 75 | } 76 | 77 | try { 78 | return parseObject(); 79 | } catch (ParseException e) { 80 | cursor = backup; 81 | } 82 | 83 | try { 84 | return parseArray(); 85 | } catch (ParseException e) { 86 | cursor = backup; 87 | } 88 | 89 | try { 90 | return parseTrue(); 91 | } catch (ParseException e) { 92 | cursor = backup; 93 | } 94 | 95 | try { 96 | return parseFalse(); 97 | } catch (ParseException e) { 98 | cursor = backup; 99 | } 100 | 101 | return parseNull(); 102 | } 103 | 104 | private Ast.JsonTrue parseTrue() { 105 | recognize("true"); 106 | skipWhitespace(); 107 | return Ast.JsonTrue.getInstance(); 108 | } 109 | 110 | private Ast.JsonFalse parseFalse() { 111 | recognize("false"); 112 | skipWhitespace(); 113 | return Ast.JsonFalse.getInstance(); 114 | } 115 | 116 | private Ast.JsonNull parseNull() { 117 | recognize("null"); 118 | skipWhitespace(); 119 | return Ast.JsonNull.getInstance(); 120 | } 121 | 122 | private void parseLBrace() { 123 | recognize("{"); 124 | skipWhitespace(); 125 | } 126 | 127 | private void parseRBrace() { 128 | recognize("}"); 129 | skipWhitespace(); 130 | } 131 | 132 | private void parseLBracket() { 133 | recognize("["); 134 | skipWhitespace(); 135 | } 136 | 137 | private void parseRBracket() { 138 | recognize("]"); 139 | skipWhitespace(); 140 | } 141 | 142 | private void parseComma() { 143 | recognize(","); 144 | skipWhitespace(); 145 | } 146 | 147 | private void parseColon() { 148 | recognize(":"); 149 | skipWhitespace(); 150 | } 151 | 152 | 153 | private Ast.JsonString parseString() { 154 | if(cursor >= input.length()) { 155 | throwParseException("expected: \"" + " actual: EOF"); 156 | } 157 | char ch = input.charAt(cursor); 158 | if(ch != '"') { 159 | throwParseException("expected: \"" + "actual: " + ch); 160 | } 161 | cursor++; 162 | var builder = new StringBuilder(); 163 | OUTER: 164 | while(cursor < input.length()) { 165 | ch = input.charAt(cursor); 166 | switch(ch) { 167 | case '\\': 168 | cursor++; 169 | if(cursor >= input.length()) break OUTER; 170 | char nextCh = input.charAt(cursor); 171 | cursor++; 172 | switch (nextCh) { 173 | case 'b': 174 | builder.append('\b'); 175 | break; 176 | case 'f': 177 | builder.append('\f'); 178 | break; 179 | case 'n': 180 | builder.append('\n'); 181 | break; 182 | case 'r': 183 | builder.append('\r'); 184 | break; 185 | case 't': 186 | builder.append('\t'); 187 | break; 188 | case '\\': 189 | builder.append('\\'); 190 | break; 191 | case '"': 192 | builder.append('"'); 193 | break; 194 | case '/': 195 | builder.append('/'); 196 | break; 197 | case 'u': 198 | if(cursor + 4 <= input.length()) { 199 | char[] characters = input.substring(cursor, cursor + 4).toCharArray(); 200 | for(char character:characters) { 201 | if(!isHexChar(character)) { 202 | throwParseException("invalid unicode escape: " + character); 203 | } 204 | } 205 | char result = (char)Integer.parseInt(new String(characters), 16); 206 | builder.append(result); 207 | cursor += 4; 208 | } else { 209 | throwParseException("invalid unicode escape: " + input.substring(cursor)); 210 | } 211 | break; 212 | default: 213 | throwParseException("expected: b|f|n|r|t|\"|\\|/ actual: " + nextCh); 214 | } 215 | break; 216 | case '"': 217 | cursor++; 218 | break OUTER; 219 | default: 220 | builder.append(ch); 221 | cursor++; 222 | break; 223 | } 224 | } 225 | 226 | if(ch != '"') { 227 | throwParseException("expected: " + "\"" + " actual: " + ch); 228 | } else { 229 | skipWhitespace(); 230 | return new Ast.JsonString(builder.toString()); 231 | } 232 | throw new RuntimeException("never reach here"); 233 | } 234 | 235 | private void throwParseException(String message) throws ParseException { 236 | var exception = new ParseException(message); 237 | if(progressiveCursor < cursor) { 238 | progressiveCursor = cursor; 239 | progressiveException = exception; 240 | } 241 | throw exception; 242 | } 243 | 244 | private Ast.JsonNumber parseNumber() { 245 | int start = cursor; 246 | char ch = 0; 247 | while(cursor < input.length()) { 248 | ch = input.charAt(cursor); 249 | if(!('0' <= ch && ch <= '9')) break; 250 | cursor++; 251 | } 252 | if(start == cursor) { 253 | throwParseException("expected: [0-9] actual: " + (ch != 0 ? ch : "EOF")); 254 | } 255 | return new Ast.JsonNumber(Integer.parseInt(input.substring(start, cursor))); 256 | } 257 | 258 | private Pair parsePair() { 259 | var key = parseString(); 260 | parseColon(); 261 | var value = parseValue(); 262 | return new Pair<>(key, value); 263 | } 264 | 265 | private Ast.JsonObject parseObject() { 266 | int backup = cursor; 267 | try { 268 | parseLBrace(); 269 | parseRBrace(); 270 | return new Ast.JsonObject(new ArrayList<>()); 271 | } catch (ParseException e) { 272 | cursor = backup; 273 | } 274 | 275 | parseLBrace(); 276 | List> members = new ArrayList<>(); 277 | var member = parsePair(); 278 | members.add(member); 279 | try { 280 | while (true) { 281 | parseComma(); 282 | member = parsePair(); 283 | members.add(member); 284 | } 285 | } catch (ParseException e) { 286 | parseRBrace(); 287 | return new Ast.JsonObject(members); 288 | } 289 | } 290 | 291 | public Ast.JsonArray parseArray() { 292 | int backup = cursor; 293 | try { 294 | parseLBracket(); 295 | parseRBracket(); 296 | return new Ast.JsonArray(new ArrayList<>()); 297 | } catch (ParseException e) { 298 | cursor = backup; 299 | } 300 | 301 | parseLBracket(); 302 | List values = new ArrayList<>(); 303 | var value = parseValue(); 304 | values.add(value); 305 | try { 306 | while (true) { 307 | parseComma(); 308 | value = parseValue(); 309 | values.add(value); 310 | } 311 | } catch (ParseException e) { 312 | parseRBracket(); 313 | return new Ast.JsonArray(values); 314 | } 315 | } 316 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/lr0/LR0Recognizer.java: -------------------------------------------------------------------------------- 1 | package parser.lr0; 2 | 3 | import java.util.*; 4 | 5 | import static java.util.List.of; 6 | 7 | public class LR0Recognizer { 8 | private final Grammar augmentedGrammar; 9 | private final List states; 10 | private final Map> actionTable; 11 | private final Map> gotoTable; 12 | 13 | public LR0Recognizer(Grammar grammar) { 14 | // 拡大文法を作成 15 | this.augmentedGrammar = createAugmentedGrammar(grammar); 16 | 17 | // 状態を構築 18 | this.states = constructStates(); 19 | 20 | // アクションテーブルとGOTOテーブルを構築 21 | this.actionTable = new HashMap<>(); 22 | this.gotoTable = new HashMap<>(); 23 | constructTables(); 24 | } 25 | 26 | private Grammar createAugmentedGrammar(Grammar grammar) { 27 | // S' -> S のルールを追加 28 | var newStart = "S'"; 29 | var newRule = new Rule(newStart, of(new Expression.NonTerminal(grammar.start()))); 30 | var augmentedRules = new ArrayList(); 31 | augmentedRules.add(newRule); 32 | augmentedRules.addAll(grammar.rules()); 33 | return new Grammar(newStart, augmentedRules); 34 | } 35 | 36 | private List constructStates() { 37 | var states = new ArrayList(); 38 | var stateMap = new HashMap(); 39 | 40 | // 初期状態を作成 41 | var initialItem = new LR0Item(augmentedGrammar.rules().get(0), 0); 42 | var initialState = new LR0ItemSet(Set.of(initialItem)).closure(augmentedGrammar); 43 | 44 | var worklist = new LinkedList(); 45 | worklist.add(initialState); 46 | states.add(initialState); 47 | stateMap.put(initialState, 0); 48 | 49 | while (!worklist.isEmpty()) { 50 | var state = worklist.poll(); 51 | var transitions = computeTransitions(state); 52 | 53 | for (var nextState : transitions.values()) { 54 | if (!stateMap.containsKey(nextState)) { 55 | states.add(nextState); 56 | stateMap.put(nextState, states.size() - 1); 57 | worklist.add(nextState); 58 | } 59 | } 60 | } 61 | 62 | return states; 63 | } 64 | 65 | private Map computeTransitions(LR0ItemSet state) { 66 | var transitions = new HashMap(); 67 | var itemsBySymbol = new HashMap>(); 68 | 69 | // シンボルごとにアイテムをグループ化 70 | for (var item : state.items()) { 71 | var symbol = item.nextSymbol(); 72 | if (symbol != null) { 73 | itemsBySymbol.computeIfAbsent(symbol, k -> new HashSet<>()).add(item); 74 | } 75 | } 76 | 77 | // 各シンボルに対して遷移先の状態を計算 78 | for (var entry : itemsBySymbol.entrySet()) { 79 | var symbol = entry.getKey(); 80 | var items = entry.getValue(); 81 | var nextItems = new HashSet(); 82 | 83 | for (var item : items) { 84 | nextItems.add(item.advance()); 85 | } 86 | 87 | var nextState = new LR0ItemSet(nextItems).closure(augmentedGrammar); 88 | transitions.put(symbol, nextState); 89 | } 90 | 91 | return transitions; 92 | } 93 | 94 | private void constructTables() { 95 | for (int i = 0; i < states.size(); i++) { 96 | var state = states.get(i); 97 | actionTable.put(i, new HashMap<>()); 98 | gotoTable.put(i, new HashMap<>()); 99 | 100 | var transitions = computeTransitions(state); 101 | 102 | // シフトアクションとGOTOを設定 103 | for (var entry : transitions.entrySet()) { 104 | var symbol = entry.getKey(); 105 | var nextState = entry.getValue(); 106 | var nextStateIndex = states.indexOf(nextState); 107 | 108 | if (symbol instanceof Expression.Terminal) { 109 | actionTable.get(i).put(symbol, new Action.Shift(nextStateIndex)); 110 | } else if (symbol instanceof Expression.NonTerminal nt) { 111 | gotoTable.get(i).put(nt.name(), nextStateIndex); 112 | } 113 | } 114 | 115 | // リデュースアクションと受理アクションを設定 116 | for (var item : state.items()) { 117 | if (item.nextSymbol() == null) { // ドットが最後にある 118 | var rule = item.rule(); 119 | if (rule.name().equals("S'")) { 120 | // 受理アクション(終端記号$に対して) 121 | actionTable.get(i).put(null, new Action.Accept()); 122 | } else { 123 | // リデュースアクション 124 | // LR(0)では、すべての入力記号に対してリデュースを設定 125 | var ruleIndex = augmentedGrammar.rules().indexOf(rule); 126 | var reduceAction = new Action.Reduce(ruleIndex, rule); 127 | 128 | // すべての終端記号を収集 129 | var terminals = new HashSet(); 130 | for (var r : augmentedGrammar.rules()) { 131 | for (var expr : r.body()) { 132 | if (expr instanceof Expression.Terminal t) { 133 | terminals.add(t); 134 | } 135 | } 136 | } 137 | 138 | // すべての終端記号に対してリデュースアクションを設定 139 | for (var terminal : terminals) { 140 | var existing = actionTable.get(i).get(terminal); 141 | if (existing != null && !existing.equals(reduceAction)) { 142 | // シフト/リデュース競合またはリデュース/リデュース競合 143 | System.err.println("Conflict at state " + i + " for symbol " + terminal); 144 | } else { 145 | actionTable.get(i).put(terminal, reduceAction); 146 | } 147 | } 148 | 149 | // 入力の終わり(null)に対してもリデュースアクションを設定 150 | var existingEof = actionTable.get(i).get(null); 151 | if (existingEof != null && !existingEof.equals(reduceAction)) { 152 | System.err.println("Conflict at state " + i + " for EOF"); 153 | } else { 154 | actionTable.get(i).put(null, reduceAction); 155 | } 156 | } 157 | } 158 | } 159 | } 160 | } 161 | 162 | public boolean recognize(List input) { 163 | var inputQueue = new LinkedList<>(input); 164 | inputQueue.add(null); // 入力の終わりを表す 165 | 166 | var stack = new LinkedList(); 167 | stack.push(0); // 初期状態 168 | 169 | System.out.println("=== LR(0) Recognition Process ==="); 170 | 171 | while (!inputQueue.isEmpty()) { 172 | var currentState = stack.peek(); 173 | var currentSymbol = inputQueue.peek(); 174 | var action = actionTable.get(currentState).get(currentSymbol); 175 | 176 | System.out.printf("State: %d, Symbol: %s, Action: %s%n", 177 | currentState, currentSymbol, action); 178 | 179 | if (action == null) { 180 | System.out.println("No action found - recognition failed"); 181 | return false; 182 | } 183 | 184 | switch (action) { 185 | case Action.Shift shift -> { 186 | inputQueue.poll(); 187 | stack.push(shift.state()); 188 | System.out.println("Shifted to state " + shift.state()); 189 | } 190 | case Action.Reduce reduce -> { 191 | var rule = reduce.rule(); 192 | // スタックからルールの本体の長さ分ポップ 193 | for (int j = 0; j < rule.body().size(); j++) { 194 | stack.pop(); 195 | } 196 | 197 | var gotoState = gotoTable.get(stack.peek()).get(rule.name()); 198 | if (gotoState == null) { 199 | System.out.println("No goto found for " + rule.name() + " - recognition failed"); 200 | return false; 201 | } 202 | 203 | stack.push(gotoState); 204 | System.out.println("Reduced using rule: " + rule); 205 | } 206 | case Action.Accept accept -> { 207 | System.out.println("Input accepted!"); 208 | return true; 209 | } 210 | } 211 | } 212 | 213 | return false; 214 | } 215 | 216 | // アクションの種類を表す型 217 | public sealed interface Action { 218 | record Shift(int state) implements Action {} 219 | record Reduce(int ruleIndex, Rule rule) implements Action {} 220 | record Accept() implements Action {} 221 | } 222 | 223 | // デバッグ用メソッド 224 | public void printStates() { 225 | System.out.println("=== LR(0) States ==="); 226 | for (int i = 0; i < states.size(); i++) { 227 | System.out.println("State " + i + ":"); 228 | System.out.println(states.get(i)); 229 | } 230 | } 231 | 232 | public void printTables() { 233 | System.out.println("\n=== Action Table ==="); 234 | for (int i = 0; i < states.size(); i++) { 235 | System.out.print("State " + i + ": "); 236 | var actions = actionTable.get(i); 237 | for (var entry : actions.entrySet()) { 238 | System.out.print(entry.getKey() + "=" + entry.getValue() + " "); 239 | } 240 | System.out.println(); 241 | } 242 | 243 | System.out.println("\n=== Goto Table ==="); 244 | for (int i = 0; i < states.size(); i++) { 245 | var gotos = gotoTable.get(i); 246 | if (!gotos.isEmpty()) { 247 | System.out.print("State " + i + ": "); 248 | for (var entry : gotos.entrySet()) { 249 | System.out.print(entry.getKey() + "=" + entry.getValue() + " "); 250 | } 251 | System.out.println(); 252 | } 253 | } 254 | } 255 | } -------------------------------------------------------------------------------- /code/chapter5/src/main/java/parser/ll1/LL1Recognizer.java: -------------------------------------------------------------------------------- 1 | package parser.ll1; 2 | 3 | import java.util.*; 4 | 5 | public class LL1Recognizer { 6 | private final Grammar grammar; 7 | private final Map> firstSets; 8 | private final Map> followSets; 9 | private final Map> parseTable; 10 | private static final Expression.Terminal EPSILON = new Expression.Terminal("ε"); 11 | private static final Expression.Terminal EOF = new Expression.Terminal("$"); 12 | 13 | public LL1Recognizer(Grammar grammar) { 14 | this.grammar = grammar; 15 | this.firstSets = new HashMap<>(); 16 | this.followSets = new HashMap<>(); 17 | this.parseTable = new HashMap<>(); 18 | 19 | computeFirstSets(); 20 | computeFollowSets(); 21 | constructParseTable(); 22 | } 23 | 24 | // FIRST集合の計算 25 | private void computeFirstSets() { 26 | initializeFirstSets(); 27 | 28 | // 固定点に達するまで繰り返す 29 | boolean changed; 30 | do { 31 | changed = false; 32 | for (var rule : grammar.rules()) { 33 | if (updateFirstSet(rule)) { 34 | changed = true; 35 | } 36 | } 37 | } while (changed); 38 | } 39 | 40 | // FIRST集合の初期化 41 | private void initializeFirstSets() { 42 | // 終端記号のFIRST集合は自分自身 43 | for (var rule : grammar.rules()) { 44 | for (var expr : rule.body()) { 45 | if (expr instanceof Expression.Terminal t) { 46 | firstSets.computeIfAbsent(t.value(), k -> new HashSet<>()).add(t); 47 | } 48 | } 49 | } 50 | 51 | // 非終端記号のFIRST集合を初期化 52 | for (var rule : grammar.rules()) { 53 | firstSets.computeIfAbsent(rule.name(), k -> new HashSet<>()); 54 | } 55 | } 56 | 57 | // 一つのルールのFIRST集合を更新 58 | private boolean updateFirstSet(Rule rule) { 59 | var first = firstSets.get(rule.name()); 60 | int originalSize = first.size(); 61 | 62 | if (rule.body().isEmpty()) { 63 | // 空規則の場合 64 | first.add(EPSILON); 65 | } else { 66 | // ルール本体のFIRST集合を計算 67 | var bodyFirst = computeFirstOfSequence(rule.body()); 68 | first.addAll(bodyFirst); 69 | } 70 | 71 | return first.size() > originalSize; 72 | } 73 | 74 | // FOLLOW集合の計算 75 | private void computeFollowSets() { 76 | initializeFollowSets(); 77 | 78 | // 固定点に達するまで繰り返す 79 | boolean changed; 80 | do { 81 | changed = false; 82 | for (var rule : grammar.rules()) { 83 | if (updateFollowSets(rule)) { 84 | changed = true; 85 | } 86 | } 87 | } while (changed); 88 | } 89 | 90 | // FOLLOW集合の初期化 91 | private void initializeFollowSets() { 92 | // すべての非終端記号のFOLLOW集合を初期化 93 | for (var rule : grammar.rules()) { 94 | followSets.computeIfAbsent(rule.name(), k -> new HashSet<>()); 95 | } 96 | 97 | // 開始記号のFOLLOW集合に$を追加 98 | followSets.get(grammar.start()).add(EOF); 99 | } 100 | 101 | // 一つのルールに対してFOLLOW集合を更新 102 | private boolean updateFollowSets(Rule rule) { 103 | boolean changed = false; 104 | 105 | for (int i = 0; i < rule.body().size(); i++) { 106 | var expr = rule.body().get(i); 107 | if (expr instanceof Expression.NonTerminal nt) { 108 | // A -> αBβ の形で、Bに対する処理 109 | var beta = rule.body().subList(i + 1, rule.body().size()); 110 | if (updateFollowSetForNonTerminal(nt.name(), beta, rule.name())) { 111 | changed = true; 112 | } 113 | } 114 | } 115 | 116 | return changed; 117 | } 118 | 119 | // 非終端記号のFOLLOW集合を更新 120 | private boolean updateFollowSetForNonTerminal(String nonTerminal, List beta, String ruleName) { 121 | var follow = followSets.get(nonTerminal); 122 | int originalSize = follow.size(); 123 | 124 | // βのFIRST集合を計算 125 | var betaFirst = computeFirstOfSequence(beta); 126 | 127 | // FIRST(β) - {ε} をFOLLOW(B)に追加 128 | for (var terminal : betaFirst) { 129 | if (!terminal.equals(EPSILON)) { 130 | follow.add(terminal); 131 | } 132 | } 133 | 134 | // βがεを導出する場合、FOLLOW(A)をFOLLOW(B)に追加 135 | if (betaFirst.contains(EPSILON)) { 136 | var lhsFollow = followSets.get(ruleName); 137 | follow.addAll(lhsFollow); 138 | } 139 | 140 | return follow.size() > originalSize; 141 | } 142 | 143 | // パーステーブルの構築 144 | private void constructParseTable() { 145 | for (var rule : grammar.rules()) { 146 | parseTable.computeIfAbsent(rule.name(), k -> new HashMap<>()); 147 | addRuleToParseTable(rule); 148 | } 149 | } 150 | 151 | // ルールをパーステーブルに追加 152 | private void addRuleToParseTable(Rule rule) { 153 | var ruleFirst = computeFirstOfSequence(rule.body()); 154 | 155 | // FIRST集合の各終端記号に対してエントリを追加 156 | for (var terminal : ruleFirst) { 157 | if (!terminal.equals(EPSILON)) { 158 | addParseTableEntry(rule.name(), terminal, rule); 159 | } 160 | } 161 | 162 | // εがFIRST集合に含まれる場合、FOLLOW集合の各終端記号に対してエントリを追加 163 | if (ruleFirst.contains(EPSILON)) { 164 | var follow = followSets.get(rule.name()); 165 | for (var terminal : follow) { 166 | addParseTableEntry(rule.name(), terminal, rule); 167 | } 168 | } 169 | } 170 | 171 | // パーステーブルにエントリを追加(競合チェック付き) 172 | private void addParseTableEntry(String nonTerminal, Expression.Terminal terminal, Rule rule) { 173 | var existing = parseTable.get(nonTerminal).get(terminal); 174 | if (existing != null && !existing.equals(rule)) { 175 | System.err.println("LL(1) conflict at [" + nonTerminal + ", " + terminal + "]"); 176 | } 177 | parseTable.get(nonTerminal).put(terminal, rule); 178 | } 179 | 180 | // 記号列のFIRST集合を計算 181 | private Set computeFirstOfSequence(List sequence) { 182 | Set result = new HashSet<>(); 183 | 184 | if (sequence.isEmpty()) { 185 | result.add(EPSILON); 186 | return result; 187 | } 188 | 189 | for (var expr : sequence) { 190 | var symbolFirst = getFirstOfSymbol(expr); 191 | 192 | // εを除いて追加 193 | result.addAll(symbolFirst.stream() 194 | .filter(t -> !t.equals(EPSILON)) 195 | .toList()); 196 | 197 | // この記号がεを導出しない場合は終了 198 | if (!symbolFirst.contains(EPSILON)) { 199 | return result; 200 | } 201 | } 202 | 203 | // すべての記号がεを導出する場合 204 | result.add(EPSILON); 205 | return result; 206 | } 207 | 208 | // 単一の記号のFIRST集合を取得 209 | private Set getFirstOfSymbol(Expression symbol) { 210 | if (symbol instanceof Expression.Terminal t) { 211 | return Set.of(t); 212 | } else if (symbol instanceof Expression.NonTerminal nt) { 213 | var ntFirst = firstSets.get(nt.name()); 214 | return ntFirst != null ? new HashSet<>(ntFirst) : new HashSet<>(); 215 | } 216 | return new HashSet<>(); 217 | } 218 | 219 | // 入力を認識 220 | public boolean recognize(List input) { 221 | var inputQueue = new LinkedList<>(input); 222 | inputQueue.add(EOF); // 入力の終わりを表す 223 | 224 | var stack = new LinkedList(); 225 | stack.push(EOF); // スタックの底 226 | stack.push(new Expression.NonTerminal(grammar.start())); // 開始記号 227 | 228 | System.out.println("=== LL(1) Recognition Process ==="); 229 | 230 | while (!stack.peek().equals(EOF)) { 231 | var top = stack.peek(); 232 | var currentInput = inputQueue.peek(); 233 | 234 | System.out.printf("Stack top: %s, Input: %s%n", top, currentInput); 235 | 236 | if (top instanceof Expression.Terminal t) { 237 | if (t.equals(currentInput)) { 238 | stack.pop(); 239 | inputQueue.poll(); 240 | System.out.println("Matched terminal: " + t); 241 | } else { 242 | System.out.println("Mismatch - expected " + t + " but found " + currentInput); 243 | return false; 244 | } 245 | } else if (top instanceof Expression.NonTerminal nt) { 246 | var rule = parseTable.get(nt.name()).get(currentInput); 247 | if (rule == null) { 248 | System.out.println("No rule found for [" + nt.name() + ", " + currentInput + "]"); 249 | return false; 250 | } 251 | 252 | System.out.println("Applying rule: " + rule); 253 | stack.pop(); 254 | 255 | // ルールの本体を逆順でスタックにプッシュ(空規則でない場合) 256 | if (!rule.body().isEmpty()) { 257 | for (int i = rule.body().size() - 1; i >= 0; i--) { 258 | stack.push(rule.body().get(i)); 259 | } 260 | } 261 | } 262 | } 263 | 264 | // スタックが空で、入力も終わりに達していれば受理 265 | boolean accepted = stack.peek().equals(EOF) && inputQueue.peek().equals(EOF); 266 | if (accepted) { 267 | System.out.println("Input accepted!"); 268 | } else { 269 | System.out.println("Input rejected - remaining input: " + inputQueue); 270 | } 271 | 272 | return accepted; 273 | } 274 | 275 | // デバッグ用メソッド 276 | public void printFirstSets() { 277 | System.out.println("=== FIRST Sets ==="); 278 | for (var entry : firstSets.entrySet()) { 279 | System.out.println("FIRST(" + entry.getKey() + ") = " + entry.getValue()); 280 | } 281 | } 282 | 283 | public void printFollowSets() { 284 | System.out.println("\n=== FOLLOW Sets ==="); 285 | for (var entry : followSets.entrySet()) { 286 | System.out.println("FOLLOW(" + entry.getKey() + ") = " + entry.getValue()); 287 | } 288 | } 289 | 290 | public void printParseTable() { 291 | System.out.println("\n=== LL(1) Parse Table ==="); 292 | for (var ntEntry : parseTable.entrySet()) { 293 | for (var tEntry : ntEntry.getValue().entrySet()) { 294 | System.out.println("[" + ntEntry.getKey() + ", " + tEntry.getKey() + "] = " + tEntry.getValue()); 295 | } 296 | } 297 | } 298 | } --------------------------------------------------------------------------------