Since this is the domain object which will have custom business logic in future, we use {@link
9 | * Visitable#accept(Visitor)} to implement the same.
10 | *
11 | *
You might be wondering, in the {@link
12 | * com.arjunsk.compiler.ck.domain.tree.nodes.grammer.ParserRuleContext} we already have the
13 | * parent-child relationship. Then how is this class helpful. We use this class for generic
14 | * traversal of parent-child paths. Mainly used to propagate accept() and generate toString() of the
15 | * tree.
16 | */
17 | public interface ParseTree extends Visitable {
18 |
19 | ParseTree getParent();
20 |
21 | void setParent(ParseTree parent);
22 |
23 | /**
24 | * @return concatenation of all the children.getText() or return the Token value for Terminal
25 | * nodes.
26 | */
27 | String getText();
28 |
29 | /** @return this Object or Token entry. */
30 | Object getPayload();
31 |
32 | /** Add child to the tree. */
33 | void addChild(ParseTree child);
34 |
35 | /** Get child at the given index. */
36 | ParseTree getChild(int i);
37 |
38 | /** @return count of children */
39 | int getChildCount();
40 |
41 | /** @return bracket matched flattened string of the tree. */
42 | String toStringTree();
43 | }
44 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/visitor/interpreter/InterpreterVisitor.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.visitor.interpreter;
2 |
3 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.LetContext;
4 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.ShowContext;
5 | import com.arjunsk.compiler.ck.visitor.SimplerLangBaseVisitor;
6 | import java.util.HashMap;
7 | import java.util.Map;
8 |
9 | /**
10 | * SimplerLang Interpreter Implementation. Interpreter executes code line by line.
11 | *
12 | *
48 | *
49 | * The accept(visitor) `visitor` is propagated to the children of that tree node.
50 | *
51 | * @param node Visitable implementation
52 | */
53 | public T visitChildren(ParseTree node) {
54 | T result = defaultResult();
55 | for (int i = 0; i < node.getChildCount(); i++) {
56 | ParseTree c = node.getChild(i);
57 | result = c.accept(this);
58 | }
59 |
60 | return result; // return the last accept result of the children list.
61 | }
62 |
63 | protected T defaultResult() {
64 | return null;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/domain/tree/nodes/common/TerminalNode.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.domain.tree.nodes.common;
2 |
3 | import com.arjunsk.compiler.ck.domain.token.Token;
4 | import com.arjunsk.compiler.ck.domain.tree.ParseTree;
5 | import com.arjunsk.compiler.ck.visitor.Visitor;
6 |
7 | /**
8 | * Terminal node or Leaf node.
9 | *
10 | *
Eg:- VAR or INT
11 | */
12 | public class TerminalNode implements ParseTree {
13 |
14 | public ParseTree parent;
15 |
16 | public Token symbol;
17 |
18 | @Override
19 | public ParseTree getParent() {
20 | return this.parent;
21 | }
22 |
23 | @Override
24 | public void setParent(ParseTree parent) {
25 | this.parent = parent;
26 | }
27 |
28 | /**
29 | * Terminal nodes will have Token as the payload. (ie VAR or INT)
30 | *
31 | * @param symbol Token value for the terminal node
32 | */
33 | public void setSymbol(Token symbol) {
34 | this.symbol = symbol;
35 | }
36 |
37 | /**
38 | * Terminal node will have Token as the payload. (ie VAR or INT). We have it as Object return type
39 | * because for ContextNodes, it can be that node itself.
40 | *
41 | * @return Token value.
42 | */
43 | @Override
44 | public Object getPayload() {
45 | return this.symbol;
46 | }
47 |
48 | /**
49 | * Terminal node will have the Text as Token value.
50 | *
51 | * @return Token Value
52 | */
53 | @Override
54 | public String getText() {
55 | return this.symbol.getValue();
56 | }
57 |
58 | /** {@inheritDoc} */
59 | @Override
60 | public void addChild(ParseTree child) {}
61 |
62 | /** {@inheritDoc} */
63 | @Override
64 | public ParseTree getChild(int i) {
65 | return null;
66 | }
67 |
68 | /** {@inheritDoc} */
69 | @Override
70 | public int getChildCount() {
71 | return 0;
72 | }
73 |
74 | /**
75 | * Implementation of toStringTree for terminal node return only the Token Value.
76 | *
77 | * @return Token Value
78 | */
79 | @Override
80 | public String toStringTree() {
81 | return getText();
82 | }
83 |
84 | /**
85 | * {@inheritDoc}
86 | *
87 | *
Invoke visitTerminal function in the Visitor Implementation.
88 | */
89 | @Override
90 | public T accept(Visitor extends T> visitor) {
91 | return visitor.visitTerminal(this);
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/compiler-examples/lisp-compiler/src/main/java/com/arjunsk/compiler/lisp/core/lexer/Lexer.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.lisp.core.lexer;
2 |
3 | import com.arjunsk.compiler.lisp.domain.lexer.Token;
4 | import com.arjunsk.compiler.lisp.domain.lexer.support.TokenType;
5 | import com.arjunsk.compiler.lisp.exceptions.LexerException;
6 | import java.util.ArrayList;
7 | import java.util.List;
8 |
9 | public class Lexer {
10 |
11 | /**
12 | * Convert code to token stream.
13 | *
14 | * @param input High Level Code
15 | * @return List of tokens
16 | */
17 | public List tokenize(String input) {
18 | List result = new ArrayList<>();
19 |
20 | int currentIndex = 0;
21 | char currentChar;
22 |
23 | while (currentIndex < input.length()) {
24 |
25 | currentChar = input.charAt(currentIndex);
26 |
27 | if (currentChar == '(' || currentChar == ')') {
28 | result.add(new Token(TokenType.PAREN, currentChar + ""));
29 | currentIndex++;
30 | } else if (currentChar == '\n' || currentChar == '\r' || currentChar == ' ') {
31 | currentIndex++;
32 | } else if (currentChar == '"') {
33 |
34 | // Iterate till ending double quote.
35 | currentChar = input.charAt(++currentIndex);
36 | StringBuilder value = new StringBuilder();
37 | while (currentChar != '"') {
38 | value.append(currentChar);
39 | currentChar = input.charAt(++currentIndex);
40 | }
41 | result.add(new Token(TokenType.STRING, value.toString()));
42 | } else if (Character.isDigit(currentChar)) {
43 |
44 | // Iterate till last digit of the number
45 | StringBuilder value = new StringBuilder();
46 | while (Character.isDigit(currentChar)) {
47 | value.append(currentChar);
48 | currentChar = input.charAt(++currentIndex);
49 | }
50 | result.add(new Token(TokenType.NUMBER, value.toString()));
51 | } else if (Character.isLetter(currentChar)) {
52 |
53 | // Iterate till last character of the word.
54 | StringBuilder value = new StringBuilder();
55 | while (Character.isLetter(currentChar)) {
56 | value.append(currentChar);
57 | currentChar = input.charAt(++currentIndex);
58 | }
59 | result.add(new Token(TokenType.NAME, value.toString()));
60 | } else {
61 | throw new LexerException();
62 | }
63 | }
64 |
65 | return result;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/domain/tree/nodes/grammer/ParserRuleContext.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.domain.tree.nodes.grammer;
2 |
3 | import com.arjunsk.compiler.ck.domain.tree.ParseTree;
4 | import com.arjunsk.compiler.ck.visitor.Visitor;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | /** Base implementation for our Grammar Elements. */
9 | public class ParserRuleContext implements ParseTree {
10 |
11 | public ParseTree parent;
12 |
13 | public List children;
14 |
15 | /** {@inheritDoc} */
16 | @Override
17 | public ParseTree getParent() {
18 | return this.parent;
19 | }
20 |
21 | /** {@inheritDoc} */
22 | @Override
23 | public void setParent(ParseTree parent) {
24 | this.parent = parent;
25 | }
26 |
27 | /** {@inheritDoc} */
28 | @Override
29 | public String getText() {
30 | if (getChildCount() == 0) {
31 | return "";
32 | }
33 |
34 | StringBuilder builder = new StringBuilder();
35 | for (int i = 0; i < getChildCount(); i++) {
36 | builder.append(getChild(i).getText());
37 | }
38 |
39 | return builder.toString();
40 | }
41 |
42 | /** {@inheritDoc} */
43 | @Override
44 | public Object getPayload() {
45 | return this;
46 | }
47 |
48 | /** {@inheritDoc} */
49 | public void addChild(ParseTree child) {
50 | child.setParent(this);
51 | if (children == null) children = new ArrayList<>();
52 | children.add(child);
53 | }
54 |
55 | /** {@inheritDoc} */
56 | @Override
57 | public ParseTree getChild(int i) {
58 | return this.children.get(i);
59 | }
60 |
61 | /** {@inheritDoc} */
62 | @Override
63 | public int getChildCount() {
64 | return children != null ? children.size() : 0;
65 | }
66 |
67 | /** {@inheritDoc} */
68 | @Override
69 | public String toStringTree() {
70 | if (getChildCount() == 0) {
71 | return "";
72 | }
73 |
74 | StringBuilder sb = new StringBuilder();
75 |
76 | sb.append("( ");
77 | sb.append(this.getClass().getSimpleName());
78 | sb.append("(");
79 | for (int i = 0; i < getChildCount(); i++) {
80 | sb.append(" ").append(getChild(i).toStringTree()).append(" ");
81 | }
82 | sb.append(")");
83 | sb.append(" )");
84 |
85 | return sb.toString();
86 | }
87 |
88 | /** To be overridden in child implementations. */
89 | @Override
90 | public T accept(Visitor extends T> visitor) {
91 | return null;
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/README.md:
--------------------------------------------------------------------------------
1 | ## CK Compiler
2 | This is a project I created for learning internals of compilers. The `SimplerLang` grammar is taken from [this](https://shalithasuranga.medium.com/build-your-own-programming-language-with-antlr-5201955537a5) post.
3 |
4 | ### To compile `.ck` to `.class`
5 | 1. Open [CompilerTest.java](src/test/java/com/arjunsk/compiler/ck/CkCompilerTest.java)
6 | 2. Run `compile()`
7 | 3. Look for the `output/CgSample.class` generated under root.
8 | 4. To see the source code of `CgSample.class`, use IntelliJ decompiler.
9 |
10 | ### Grammar
11 | ```antlrv4
12 | grammar simplerlang;
13 |
14 | program : statement+;
15 | statement : let | show ;
16 |
17 | let : VAR '=' INT ;
18 | show : 'show' (INT | VAR) ;
19 |
20 | VAR : [a-z]+ ;
21 | INT : [0–9]+ ;
22 | WS : [ \n\t]+ -> skip;
23 | ```
24 |
25 | ### Features
26 | 1. Used a simple grammar to focus more on `compiler phases` rather than `language support`.
27 | 2. Took `ANTLR` generated code as a reference.
28 | 2. Implemented [Parse Tree](src/main/java/com/arjunsk/compiler/ck/domain/tree/ParseTree.java) & created [Visitable](src/main/java/com/arjunsk/compiler/ck/domain/tree/Visitable.java) Grammar Nodes.
29 | 3. Implemented [Visitor](src/main/java/com/arjunsk/compiler/ck/visitor/Visitor.java) for writing business logic on Tree nodes.
30 | 4. Implemented [CodeGeneration](src/main/java/com/arjunsk/compiler/ck/visitor/codegenerator/CodeGeneratorVisitor.java) using `Java ASM`.
31 | 5. Implemented [Interpreter](src/main/java/com/arjunsk/compiler/ck/visitor/interpreter/InterpreterVisitor.java) & [Semantic Analyzer](src/main/java/com/arjunsk/compiler/ck/visitor/semantic/SemanticAnalyzer.java).
32 |
33 | ### TODO
34 | * ~~Implement Semantic Analyser.~~
35 | * ~~Implement custom filename.(Unsupported)~~
36 | * ~~Implement Parse Tree.~~
37 | * ~~Implement Visitor Pattern.~~
38 | * ~~Implement byte code generation~~
39 |
40 | ### Compiler Phases
41 | 1. Lexical Analysis [Done]
42 | 2. Syntactic Analysis (ie Parsing) [Done]
43 | 3. Semantic analysis [Done] & Intermediate Code Generation [NA]
44 | 4. Optimization (optional)
45 | 5. Code Generation [Done]
46 |
47 | Intermediate Code Generation: code gets converted to independent intermediate code. We are not doing this in `ck-compiler`.
48 | We could use `LLVM` as Backend for implementing this feature.
49 |
50 | ### Reference:
51 | 1. [Java ASM](https://github.com/arjunsk/java-bytecode/tree/master/java-asm/ow2-asm-example/src/main/java/com/arjunsk/asm/asmifier)
52 | 2. [Ops Code](https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-6.html)
53 | 3. [Lecture Note](https://www.radford.edu/~nokie/classes/380/phases.html)
54 | 4. [LLVM Backend](https://llvm.org/docs/WritingAnLLVMBackend.html)
55 |
56 | > Do check out other modules in this project for better understanding.️
--------------------------------------------------------------------------------
/compiler-examples/svg-compiler/src/main/java/com/arjunsk/compiler/svg/core/parsing/Parser.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.svg.core.parsing;
2 |
3 | import com.arjunsk.compiler.svg.domain.lexer.ast.AstNode;
4 | import com.arjunsk.compiler.svg.domain.lexer.ast.support.AstNodeClass;
5 | import com.arjunsk.compiler.svg.domain.lexer.token.Token;
6 | import com.arjunsk.compiler.svg.domain.lexer.token.support.TokenType;
7 | import com.arjunsk.compiler.svg.exceptions.ParserException;
8 | import java.util.ArrayList;
9 | import java.util.List;
10 |
11 | public class Parser {
12 |
13 | private final List tokens;
14 |
15 | private int currentTokenIndex;
16 |
17 | public Parser(List tokens) {
18 | this.tokens = tokens;
19 | this.currentTokenIndex = 0;
20 | }
21 |
22 | public AstNode parse() {
23 |
24 | AstNode ast = new AstNode(AstNodeClass.BLOCK);
25 |
26 | boolean isPaperDefined = false;
27 | boolean isPenDefined = false;
28 |
29 | while (currentTokenIndex < tokens.size()) {
30 | Token currentToken = tokens.get(currentTokenIndex);
31 |
32 | if (currentToken.getType() == TokenType.WORD) {
33 | AstNode expression;
34 |
35 | switch (currentToken.getValue()) {
36 | case "Paper":
37 | if (isPaperDefined) throw new ParserException("Paper already defined");
38 |
39 | expression = new AstNode(AstNodeClass.CALL_EXPRESSION, "Paper");
40 | findNumericalArguments(1).forEach(expression::appendNode);
41 | ast.appendNode(expression);
42 | isPaperDefined = true;
43 | break;
44 | case "Pen":
45 | if (isPenDefined) throw new ParserException("Pen already defined");
46 |
47 | expression = new AstNode(AstNodeClass.CALL_EXPRESSION, "Pen");
48 | findNumericalArguments(1).forEach(expression::appendNode);
49 | ast.appendNode(expression);
50 | isPenDefined = true;
51 | break;
52 | case "Line":
53 | if (!(isPaperDefined && isPenDefined))
54 | throw new ParserException("No Paper & Pen defined");
55 |
56 | expression = new AstNode(AstNodeClass.CALL_EXPRESSION, "Line");
57 | findNumericalArguments(4).forEach(expression::appendNode);
58 | ast.appendNode(expression);
59 | break;
60 | default:
61 | throw new ParserException("Invalid Token");
62 | }
63 | } else {
64 | throw new ParserException("Unexpected Token Type" + currentToken.getType());
65 | }
66 | currentTokenIndex = currentTokenIndex + 1;
67 | }
68 | return ast;
69 | }
70 |
71 | private List findNumericalArguments(int argCount) {
72 | List result = new ArrayList<>();
73 | while (argCount-- > 0) {
74 | result.add(new AstNode(AstNodeClass.NUMBER, tokens.get(++currentTokenIndex).getValue()));
75 | }
76 | return result;
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/compiler-examples/svg-compiler/src/main/java/com/arjunsk/compiler/svg/core/transforming/Transformer.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.svg.core.transforming;
2 |
3 | import com.arjunsk.compiler.svg.domain.lexer.ast.AstNode;
4 | import com.arjunsk.compiler.svg.domain.lexer.ast.support.AstNodeClass;
5 | import com.arjunsk.compiler.svg.domain.transformer.ast.SvgAstNode;
6 | import com.arjunsk.compiler.svg.exceptions.TransformerException;
7 | import java.util.Iterator;
8 |
9 | /** Transform the AST to SVG AST. */
10 | public class Transformer {
11 |
12 | public SvgAstNode transform(AstNode oldAst) {
13 |
14 | SvgAstNode newAst = new SvgAstNode("svg");
15 | newAst.appendAttributes("width", 100);
16 | newAst.appendAttributes("height", 100);
17 | newAst.appendAttributes("viewBox", "0 0 100 100");
18 | newAst.appendAttributes("xmlns", "http://www.w3.org/2000/svg");
19 | newAst.appendAttributes("version", "1.1");
20 |
21 | AstNode currentAstNode;
22 | String penColor = "89";
23 | SvgAstNode expression;
24 |
25 | final Iterator children = oldAst.getChildren();
26 | while (children.hasNext()) {
27 | currentAstNode = children.next();
28 | if (currentAstNode.getNodeClass() == AstNodeClass.CALL_EXPRESSION) {
29 | switch (currentAstNode.getValue()) {
30 | case "Paper":
31 | expression = new SvgAstNode("rect");
32 | expression.appendAttributes("x", 0);
33 | expression.appendAttributes("y", 0);
34 | expression.appendAttributes("width", 100);
35 | expression.appendAttributes("height", 100);
36 | expression.appendAttributes(
37 | "fill", makeColor(currentAstNode.getChildren().next().getValue()));
38 | newAst.appendBody(expression);
39 | break;
40 | case "Pen":
41 | penColor = currentAstNode.getChildren().next().getValue();
42 | break;
43 | case "Line":
44 | expression = new SvgAstNode("line");
45 | final Iterator lineArgs = currentAstNode.getChildren();
46 | expression.appendAttributes("x1", Integer.parseInt(lineArgs.next().getValue()));
47 | expression.appendAttributes("y1", Integer.parseInt(lineArgs.next().getValue()));
48 | expression.appendAttributes("x2", Integer.parseInt(lineArgs.next().getValue()));
49 | expression.appendAttributes("y2", Integer.parseInt(lineArgs.next().getValue()));
50 | expression.appendAttributes("stroke", makeColor(penColor));
51 | expression.appendAttributes("stroke-linecap", "round");
52 | newAst.appendBody(expression);
53 | break;
54 | default:
55 | throw new TransformerException("Invalid Token");
56 | }
57 | }
58 | }
59 |
60 | return newAst;
61 | }
62 |
63 | private String makeColor(String level) {
64 | int output = 100 - Integer.parseInt(level);
65 | return "rgb(" + output + "%, " + output + "%, " + output + "%)";
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/parser/Parser.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.parser;
2 |
3 | import com.arjunsk.compiler.ck.domain.token.Token;
4 | import com.arjunsk.compiler.ck.domain.token.support.TokenType;
5 | import com.arjunsk.compiler.ck.domain.tree.nodes.common.TerminalNode;
6 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.LetContext;
7 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.ProgramContext;
8 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.ShowContext;
9 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.StatementContext;
10 | import com.arjunsk.compiler.ck.exceptions.ParserException;
11 | import com.arjunsk.compiler.ck.lexer.Lexer;
12 | import java.util.ArrayList;
13 | import java.util.List;
14 |
15 | public class Parser {
16 |
17 | private final Lexer lexer;
18 |
19 | public Parser(Lexer lexer) {
20 | this.lexer = lexer;
21 | }
22 |
23 | /**
24 | * Parse Logic for Program.
25 | *
26 | *
NOTE: We will use if-else to create a Deterministic Finite Automata (DFA).
27 | */
28 | public ProgramContext parseProgram() {
29 | List statements = new ArrayList<>();
30 | do {
31 | statements.add(parseStatement());
32 | } while (lexer.nextToken());
33 | return new ProgramContext(statements);
34 | }
35 |
36 | /** Parse Logic for Statement. Creates a LET or SHOW statement based on the Tokens passed. */
37 | public StatementContext parseStatement() {
38 |
39 | if (lexer.getCurrentToken() == null) {
40 | lexer.nextToken(); // Current Token = LET | SHOW
41 | }
42 |
43 | Token token = lexer.getCurrentToken(); // LET | SHOW
44 |
45 | if (token.getType() == TokenType.VARIABLE) { // LET
46 | return new StatementContext(parseLet(), null);
47 | } else if (token.getType() == TokenType.SHOW) { // SHOW
48 | return new StatementContext(null, parseShow());
49 | } else {
50 | throw new ParserException("Not of type LET or SHOW.");
51 | }
52 | }
53 |
54 | /** Parse Logic for Let. */
55 | public LetContext parseLet() {
56 | if (lexer.getCurrentToken() == null) {
57 | lexer.nextToken(); // Current Token = VAR
58 | }
59 | TerminalNode variableNameToken = parseTerminalNode(); // VAR
60 |
61 | lexer.nextToken(); // move to : =
62 | lexer.nextToken(); // move to : INT
63 |
64 | TerminalNode valueToken = parseTerminalNode(); // INT
65 |
66 | return new LetContext(variableNameToken, valueToken);
67 | }
68 |
69 | /** Parse Logic for Show. */
70 | public ShowContext parseShow() {
71 |
72 | if (lexer.getCurrentToken() == null) {
73 | lexer.nextToken(); // Current Token = SHOW
74 | }
75 |
76 | lexer.nextToken(); // Current Token = VAR | INT
77 |
78 | TerminalNode terminal = parseTerminalNode(); // VAR | INT
79 | final Token token = (Token) terminal.getPayload();
80 |
81 | if (token.getType() == TokenType.NUMBER) {
82 | return new ShowContext(terminal, null);
83 | } else if (token.getType() == TokenType.VARIABLE) {
84 | return new ShowContext(null, terminal);
85 | } else {
86 | throw new ParserException("Show not preceded with var or int");
87 | }
88 | }
89 |
90 | /** Parse Logic for Terminal Node. */
91 | public TerminalNode parseTerminalNode() {
92 |
93 | if (lexer.getCurrentToken() == null) {
94 | lexer.nextToken(); // Current Token = VAR | INT
95 | }
96 |
97 | TerminalNode token = new TerminalNode();
98 | token.setSymbol(lexer.getCurrentToken());
99 | return token;
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/lexer/Lexer.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.lexer;
2 |
3 | import com.arjunsk.compiler.ck.domain.token.Token;
4 | import com.arjunsk.compiler.ck.domain.token.support.TokenType;
5 | import com.arjunsk.compiler.ck.exceptions.LexerException;
6 | import java.util.Arrays;
7 |
8 | public class Lexer {
9 |
10 | private final String code;
11 | private final int codeLength;
12 |
13 | private int currentIndex;
14 |
15 | private Token currentToken;
16 | private Token previousToken;
17 |
18 | public Lexer(String code) {
19 | this.code = code;
20 | this.currentIndex = 0;
21 | this.codeLength = code.length();
22 | }
23 |
24 | /**
25 | * Updates currentToken to the next valid Token if it is available.
26 | *
27 | * @return true, if a valid token is available next.
28 | */
29 | public boolean nextToken() {
30 |
31 | while (!isEndOfCode()) { // while loop is to fetch nextToken, if a skipWS occurs.
32 |
33 | previousToken = currentToken; // in case you need the previous token
34 |
35 | final char currentChar = code.charAt(currentIndex);
36 |
37 | if (Arrays.asList(' ', '\r', '\t', '\n').contains(currentChar)) { // 1. WS
38 | skipWhiteSpace();
39 | continue;
40 | } else if (currentChar == '=') { // 2. LET
41 | currentToken = new Token(TokenType.EQUALS_OPERATOR);
42 | currentIndex++;
43 | } else if (Character.isDigit(currentChar)) { // 3. INT
44 | currentToken = new Token(TokenType.NUMBER, readNumber());
45 | } else if (Character.isLetter(currentChar)) {
46 | String variableName = readVariable();
47 | if (variableName.equalsIgnoreCase("show")) { // 4. SHOW
48 | currentToken = new Token(TokenType.SHOW);
49 | } else { // 5. VAR
50 | currentToken = new Token(TokenType.VARIABLE, variableName);
51 | }
52 | } else {
53 | throw new LexerException("Token not defined.");
54 | }
55 | return true;
56 | }
57 | return false;
58 | }
59 |
60 | /**
61 | * Read Integer as String
62 | *
63 | * @return String value of Integer Number.
64 | */
65 | private String readNumber() {
66 | StringBuilder sb = new StringBuilder();
67 | char currentChar = code.charAt(currentIndex);
68 | while (!isEndOfCode() && Character.isDigit(currentChar)) {
69 | sb.append(currentChar);
70 | currentIndex++;
71 | if (isEndOfCode()) break;
72 | currentChar = code.charAt(currentIndex);
73 | }
74 | return sb.toString();
75 | }
76 |
77 | /** @return String read from current index. */
78 | private String readVariable() {
79 | StringBuilder sb = new StringBuilder();
80 | char currentChar = code.charAt(currentIndex);
81 | while (!isEndOfCode() && Character.isLetter(currentChar)) {
82 | sb.append(currentChar);
83 | currentIndex++;
84 | if (isEndOfCode()) break;
85 | currentChar = code.charAt(currentIndex);
86 | }
87 | return sb.toString();
88 | }
89 |
90 | /** Skip WhiteSpace(WS) */
91 | private void skipWhiteSpace() {
92 | while (!isEndOfCode()) {
93 | if (Arrays.asList(' ', '\r', '\t', '\n').contains(code.charAt(currentIndex))) {
94 | currentIndex++;
95 | } else {
96 | break;
97 | }
98 | }
99 | }
100 |
101 | /** Check if End of Code is reached. */
102 | private boolean isEndOfCode() {
103 | return currentIndex >= codeLength;
104 | }
105 |
106 | /**
107 | * Get previous Token.
108 | *
109 | *
NOTE: for SimplerLang grammar we don't have much use of previous token. But it will be
110 | * useful when implementing complex Grammar.
111 | */
112 | public Token getPreviousToken() {
113 | return previousToken;
114 | }
115 |
116 | /** Get current Token. */
117 | public Token getCurrentToken() {
118 | return currentToken;
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/visitor/semantic/SemanticAnalyzer.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.visitor.semantic;
2 |
3 | import com.arjunsk.compiler.ck.domain.tree.nodes.common.TerminalNode;
4 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.LetContext;
5 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.ShowContext;
6 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.StatementContext;
7 | import com.arjunsk.compiler.ck.exceptions.SemanticException;
8 | import com.arjunsk.compiler.ck.visitor.SimplerLangBaseVisitor;
9 | import java.util.HashMap;
10 | import java.util.Map;
11 |
12 | /**
13 | * `Syntax` is the concept that concerns itself only whether or not the sentence is valid for the
14 | * grammar of the language. `Semantics` is about whether or not the sentence has a valid meaning.
15 | *
16 | *
NOTE: checking whether the variable is declared before "SHOW VAR" is an example of `Semantic`
17 | * check.
18 | */
19 | public class SemanticAnalyzer extends SimplerLangBaseVisitor {
20 |
21 | private final Map variableMap;
22 |
23 | public SemanticAnalyzer() {
24 | super();
25 | this.variableMap = new HashMap<>();
26 | }
27 |
28 | /** Validate Statement Semantics. */
29 | @Override
30 | public Void visitStatement(StatementContext context) {
31 | if (context.getLetContext() == null && context.getShowContext() == null) {
32 | throw new SemanticException("Statement should of type LET or SHOW.");
33 | } else if (context.getLetContext() != null && context.getShowContext() != null) {
34 | throw new SemanticException("Statement should be either of type LET or SHOW & not both.");
35 | }
36 |
37 | return super.visitStatement(context);
38 | }
39 |
40 | /** Validate LET Semantics. */
41 | @Override
42 | public Void visitLet(LetContext context) {
43 |
44 | String variableName = context.getVariableName().getText();
45 | String variableValue = context.getVariableValue().getText();
46 |
47 | if (variableName == null || variableName.isEmpty()) {
48 | throw new SemanticException("Variable name cannot be empty.");
49 | } else if (variableValue == null || variableValue.isEmpty()) {
50 | throw new SemanticException("Variable value cannot be empty.");
51 | }
52 |
53 | // Check if variable value is Integer. In our case, this will be already handled in the
54 | // Tokenizer.
55 | try {
56 | Integer.parseInt(variableValue);
57 | } catch (NumberFormatException | NullPointerException ex) {
58 | throw new SemanticException("Variable value should be integer.", ex);
59 | }
60 |
61 | // This will be used to check whether variable is declared using LET before invoking SHOW for
62 | // the variable.
63 | variableMap.put(variableName, variableValue);
64 |
65 | return super.visitLet(context);
66 | }
67 |
68 | /**
69 | * Validate SHOW Semantics.
70 | *
71 | *
NOTE: We validate if the variable is previously declared using LET.
72 | */
73 | @Override
74 | public Void visitShow(ShowContext context) {
75 |
76 | TerminalNode variableNameTN = context.getVariableName();
77 | TerminalNode integerValueTN = context.getIntegerValue();
78 |
79 | /* 1. Checking whether either of VAR | INT is present.*/
80 | boolean isVarPresent = variableNameTN != null && !variableNameTN.getText().isEmpty();
81 | boolean isIntPresent = integerValueTN != null && !integerValueTN.getText().isEmpty();
82 |
83 | if (!isVarPresent && !isIntPresent) {
84 | throw new SemanticException("SHOW should have integer or variable as argument");
85 | } else if (isVarPresent && isIntPresent) {
86 | throw new SemanticException("SHOW should have either integer or variable as argument");
87 | }
88 |
89 | /* 2. If SHOW Argument is Number, check if it is an integer. In our case, this will be
90 | already handled in the Tokenizer.*/
91 | if (integerValueTN != null) {
92 | try {
93 | Integer.parseInt(integerValueTN.getText());
94 | } catch (NumberFormatException | NullPointerException ex) {
95 | throw new SemanticException("SHOW argument is not a valid integer.", ex);
96 | }
97 | }
98 |
99 | /* 3. if SHOW Argument is Variable, check if the variable is declared previously.*/
100 | if (variableNameTN != null && !variableMap.containsKey(variableNameTN.getText())) {
101 | throw new SemanticException("SHOW argument variable is not declared.");
102 | }
103 |
104 | return super.visitShow(context);
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/compiler-examples/ck-compiler/src/main/java/com/arjunsk/compiler/ck/visitor/codegenerator/CodeGeneratorVisitor.java:
--------------------------------------------------------------------------------
1 | package com.arjunsk.compiler.ck.visitor.codegenerator;
2 |
3 | import static org.objectweb.asm.Opcodes.ACC_PUBLIC;
4 | import static org.objectweb.asm.Opcodes.ACC_STATIC;
5 | import static org.objectweb.asm.Opcodes.ACC_SUPER;
6 | import static org.objectweb.asm.Opcodes.ALOAD;
7 | import static org.objectweb.asm.Opcodes.ASTORE;
8 | import static org.objectweb.asm.Opcodes.BIPUSH;
9 | import static org.objectweb.asm.Opcodes.GETSTATIC;
10 | import static org.objectweb.asm.Opcodes.INVOKESTATIC;
11 | import static org.objectweb.asm.Opcodes.INVOKEVIRTUAL;
12 | import static org.objectweb.asm.Opcodes.V1_8;
13 |
14 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.LetContext;
15 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.ProgramContext;
16 | import com.arjunsk.compiler.ck.domain.tree.nodes.grammer.impl.ShowContext;
17 | import com.arjunsk.compiler.ck.visitor.SimplerLangBaseVisitor;
18 | import java.io.File;
19 | import java.io.FileOutputStream;
20 | import java.util.HashMap;
21 | import java.util.Map;
22 | import org.objectweb.asm.ClassWriter;
23 | import org.objectweb.asm.MethodVisitor;
24 | import org.objectweb.asm.Type;
25 |
26 | /**
27 | * Visitor that converts AST to .class java byte code.
28 | *
29 | *
NOTE 1: To generate ASM code from Java Class you can use ASMifier. This will help you write
30 | * complex ASM codes. Ref:- @see Java
32 | * ASMifier
33 | *
34 | *
NOTE 2: Ops Code reference: @see Java Ops Code
36 | */
37 | public class CodeGeneratorVisitor extends SimplerLangBaseVisitor {
38 |
39 | // Class Writer
40 | private final ClassWriter classWriter;
41 |
42 | // For assigning correct variable index from LET to SHOW.
43 | private final Map variableIndexMap;
44 | private int variableIndex;
45 |
46 | // Main Method visitor used across LET and SHOW.
47 | private MethodVisitor mainMethodVisitor;
48 |
49 | public CodeGeneratorVisitor() {
50 | this.classWriter = new ClassWriter(0);
51 | variableIndexMap = new HashMap<>();
52 |
53 | // Variable0 is reserved for args[] in : `main(String[] var0)`
54 | variableIndex = 1;
55 | }
56 |
57 | /** Called when the program node is visited. The main entry point. */
58 | @Override
59 | public Void visitProgram(ProgramContext context) {
60 |
61 | /** ASM = CODE : public class CgSample. */
62 | // BEGIN 1: creates a ClassWriter for the `CgSample.class` public class,
63 | classWriter.visit(
64 | V1_8, // Java 1.8
65 | ACC_PUBLIC + ACC_SUPER, // public static
66 | "CgSample", // Class Name
67 | null, // Generics
68 | "java/lang/Object", // Interface extends Object (Super Class),
69 | null // interface names
70 | );
71 |
72 | /** ASM = CODE : public static void main(String args[]). */
73 | // BEGIN 2: creates a MethodVisitor for the 'main' method
74 | mainMethodVisitor =
75 | classWriter.visitMethod(
76 | ACC_PUBLIC + ACC_STATIC, "main", "([Ljava/lang/String;)V", null, null);
77 |
78 | super.visitProgram(context);
79 |
80 | // END 2: Close main()
81 | mainMethodVisitor.visitEnd();
82 |
83 | // END 1: Close class()
84 | classWriter.visitEnd();
85 |
86 | byte[] code = classWriter.toByteArray();
87 | writeToFile(code, "output/CgSample.class");
88 |
89 | return null;
90 | }
91 |
92 | @Override
93 | public Void visitLet(LetContext context) {
94 |
95 | /** ASM = BIPUSH : Push bytes. */
96 | int variableIntegerVal = Integer.parseInt(context.getVariableValue().getText());
97 | mainMethodVisitor.visitIntInsn(BIPUSH, variableIntegerVal);
98 |
99 | /** ASM = CODE : Integer.valueOf( ) . */
100 | mainMethodVisitor.visitMethodInsn(
101 | INVOKESTATIC,
102 | Type.getType(Integer.class).getInternalName(),
103 | "valueOf",
104 | "(I)Ljava/lang/Integer;",
105 | false);
106 |
107 | /**
108 | * ASM = ASTORE : Store reference into local variable.
109 | *
110 | *
This stores the above valueOf(INT) as Integer Variable.
111 | */
112 | // STORE to Variable Pool at variableIndex
113 | mainMethodVisitor.visitVarInsn(ASTORE, variableIndex);
114 |
115 | // Saving the variableIndex for reference in SHOW()
116 | variableIndexMap.put(context.getVariableName().getText(), variableIndex);
117 | variableIndex++;
118 |
119 | return null;
120 | }
121 |
122 | @Override
123 | public Void visitShow(ShowContext context) {
124 |
125 | /** ASM = CODE : System.out */
126 | mainMethodVisitor.visitFieldInsn(GETSTATIC, "java/lang/System", "out", "Ljava/io/PrintStream;");
127 |
128 | if (context.getVariableName() != null) {
129 |
130 | /**
131 | * ASM = LOAD Variable: ALOAD variable
132 | *
133 | *
ALOAD: Load reference from local variable
134 | */
135 | // Fetch index from VariablePool
136 | int index = variableIndexMap.get(context.getVariableName().getText());
137 | // LOAD from variable pool
138 | mainMethodVisitor.visitVarInsn(ALOAD, index);
139 |
140 | /** ASM = INVOKE: println(Object) with variable loaded via ALOAD. */
141 | mainMethodVisitor.visitMethodInsn(
142 | INVOKEVIRTUAL, "java/io/PrintStream", "println", "(Ljava/lang/Object;)V", false);
143 | } else if (context.getIntegerValue() != null) {
144 |
145 | /** ASM = BIPUSH: Push byte. */
146 |
147 | // Get integer value of the constant.
148 | int integerVal = Integer.parseInt(context.getIntegerValue().getText());
149 | // PUSH integerValue
150 | mainMethodVisitor.visitIntInsn(BIPUSH, integerVal);
151 |
152 | /** ASM = INVOKE: println(I) with variable loaded via ALOAD. */
153 | mainMethodVisitor.visitMethodInsn(
154 | INVOKEVIRTUAL, "java/io/PrintStream", "println", "(I)V", false);
155 | }
156 | return null;
157 | }
158 |
159 | /**
160 | * Writes Byte Array to a file
161 | *
162 | * @param code Byte Array of Source code.
163 | * @param filePath File Path to write. Eg:- "Example.class"
164 | */
165 | private void writeToFile(byte[] code, String filePath) {
166 |
167 | File file = new File(filePath);
168 | file.getParentFile().mkdirs();
169 |
170 | try (FileOutputStream fos = new FileOutputStream(filePath)) {
171 | fos.write(code);
172 | } catch (Exception ex) {
173 | ex.printStackTrace();
174 | }
175 | }
176 | }
177 |
--------------------------------------------------------------------------------