├── .classpath ├── .project ├── README.md ├── info └── grammar.txt ├── resources └── Test.java └── src ├── exceptions └── AnalyzerException.java ├── lexer └── Lexer.java ├── parser ├── NonTerminal.java ├── Parser.java ├── Rule.java ├── Symbol.java └── Terminal.java ├── runner ├── GUI.java ├── JavaFilter.java └── Main.java └── token ├── Token.java └── TokenType.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | JavaCompiler 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This compiler is not complete: only contains lexer and parser. 2 | Java syntax specification is reduced. 3 | -------------------------------------------------------------------------------- /info/grammar.txt: -------------------------------------------------------------------------------- 1 | ClassDecl -> Modifiers class id { ClassBody } | EPSILON 2 | Modifiers -> AccessModifier StaticModifier 3 | AccessModifier -> public | private | EPSILON 4 | StaticModifier -> static | EPSILON 5 | ClassBody -> ClassMember ClassBody | EPSILON 6 | ClassMember -> Modifiers RestOfClassMember 7 | RestOfClassMember -> Type RestOfClassMember2 | void RestOfClassMember2 8 | Type -> id | BasicType 9 | BasicType -> int | double 10 | RestOfClassMember2 -> id MethodOrAttr 11 | MethodOrAttr -> MethodContinue | AttrContinue 12 | MethodContinue -> ( MethodParams ) { MethodBody } 13 | MethodParams -> OneParam MethodParamContinue | EPSILON 14 | OneParam -> Type id 15 | MethodParamContinue -> , OneParam MethodParamContinue | EPSILON 16 | AttrContinue -> AssignAttr NextAttr 17 | AssignAttr -> = Expression | EPSILON 18 | NextAttr -> , id AttrContinue | ; 19 | MethodBody -> Statement MethodBody | EPSILON 20 | Statement -> BlockDecl | Expression ; 21 | BlockDecl -> { MethodBody } 22 | Expression -> Term ExprOp | null 23 | Term -> intConst | doubleConst | Variable | ( Expression ) | new id ( MethodValues ) Link 24 | ExprOp -> ArithmOp Term ExprOp | EPSILON 25 | Variable -> id FunctionExt Link 26 | Link -> . id FunctionExt Link | EPSILON 27 | FunctionExt -> ( MethodValues ) | id | EPSILON 28 | MethodValues -> Expression ValueContinue | EPSILON 29 | ValueContinue -> , Expression ValueContinue | EPSILON 30 | ArithmOp -> + | - | * | / | = 31 | Statement -> IfElseBlock | WhileBlock | ReturnBlock | VarDecl 32 | IfElseBlock -> if ( BoolExpr ) Statement else Statement 33 | WhileBlock -> while ( BoolExpr ) Statement 34 | ReturnBlock -> return Expression ; 35 | VarDecl -> BasicType id = Expression ; 36 | BoolExpr -> true | false | Expression CompareOp Expression 37 | CompareOp -> > | < | == | != 38 | 39 | -------------------------------------------------------------------------------- /resources/Test.java: -------------------------------------------------------------------------------- 1 | public class Test { 2 | static int a = 10; // intConst 3 | private static double d = 0.1; 4 | public double value = (((0.943 - 0.3239))); 5 | 6 | /* 7 | * Ira is a monkey 8 | */ 9 | public static int myFunc1(String args) { 10 | 11 | while ((a + d)*d < 200) { 12 | if (d == 0.1) { 13 | a = a + 5; 14 | } else { 15 | a = a - 5; 16 | } 17 | } 18 | 19 | // create objects 20 | Integer t1 = new Integer(1); 21 | Integer t2 = new Integer(2).compareTo(new Integer(5)); 22 | Integer t3 = new Integer(3).intValue(); 23 | 24 | // call methods 25 | Test.myFunc2(5, 10); 26 | double b = myFunc2(3, 4); 27 | 28 | return a; 29 | } 30 | 31 | private static double myFunc2(double b, int c) { 32 | if (true) { 33 | b = b + (d - a); 34 | } else{ 35 | 36 | } 37 | if (c < 2) { 38 | d = d * 2; 39 | } else { 40 | c = 0; 41 | } 42 | return d * b; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/exceptions/AnalyzerException.java: -------------------------------------------------------------------------------- 1 | package exceptions; 2 | 3 | /** 4 | * The {@code AnalyzerException} class represents exceptions which may be caused 5 | * by lexical or syntax errors 6 | * 7 | * @author Ira Korshunova 8 | * 9 | */ 10 | @SuppressWarnings("serial") 11 | public class AnalyzerException extends Exception { 12 | /** 13 | * Position in the input source(lexer) or the number of token(parser), where 14 | * the error occured 15 | */ 16 | private int errorPosition; 17 | 18 | /** The detail message */ 19 | private String message; 20 | 21 | /** 22 | * Creates {@code AnalyzerException} object with specified error position 23 | * 24 | * @param errorPosition 25 | * position of the error 26 | */ 27 | public AnalyzerException(int errorPosition) { 28 | this.errorPosition = errorPosition; 29 | } 30 | 31 | /** 32 | * Creates {@code AnalyzerException} object with specified error position 33 | * and message 34 | * 35 | * @param message 36 | * detailed message 37 | * @param errorPosition 38 | * position of the error 39 | */ 40 | public AnalyzerException(String message, int errorPosition) { 41 | this.errorPosition = errorPosition; 42 | this.message = message; 43 | } 44 | 45 | /** 46 | * Returns error's position in the input 47 | * 48 | * @return error's position 49 | */ 50 | public int getErrorPosition() { 51 | return errorPosition; 52 | } 53 | 54 | @Override 55 | public String getMessage() { 56 | return message; 57 | } 58 | } -------------------------------------------------------------------------------- /src/lexer/Lexer.java: -------------------------------------------------------------------------------- 1 | package lexer; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | import java.util.TreeMap; 7 | import java.util.regex.Matcher; 8 | import java.util.regex.Pattern; 9 | 10 | import exceptions.AnalyzerException; 11 | 12 | import token.Token; 13 | import token.TokenType; 14 | 15 | /** 16 | * The {@code Lexer} class represents lexical analyzer for subset of Java 17 | * language. 18 | * 19 | * @author Ira Korshunova 20 | * 21 | */ 22 | public class Lexer { 23 | 24 | /** Mapping from type of token to its regular expression */ 25 | private Map regEx; 26 | 27 | /** List of tokens as they appear in the input source */ 28 | private List result; 29 | 30 | /** 31 | * Initializes a newly created {@code Lexer} object 32 | */ 33 | public Lexer() { 34 | regEx = new TreeMap(); 35 | launchRegEx(); 36 | result = new ArrayList(); 37 | } 38 | 39 | /** 40 | * Performs the tokenization of the input source code. 41 | * 42 | * @param source 43 | * string to be analyzed 44 | * @throws AnalyzerException 45 | * if lexical error exists in the source 46 | * 47 | */ 48 | public void tokenize(String source) throws AnalyzerException { 49 | int position = 0; 50 | Token token = null; 51 | do { 52 | token = separateToken(source, position); 53 | if (token != null) { 54 | position = token.getEnd(); 55 | result.add(token); 56 | } 57 | } while (token != null && position != source.length()); 58 | if (position != source.length()) { 59 | throw new AnalyzerException("Lexical error at position # "+ position, position); 60 | 61 | } 62 | } 63 | 64 | /** 65 | * Returns a sequence of tokens 66 | * 67 | * @return list of tokens 68 | */ 69 | public List getTokens() { 70 | return result; 71 | } 72 | 73 | /** 74 | * Returns a sequence of tokens without types {@code BlockComment}, 75 | * {@code LineComment} , {@code NewLine}, {@code Tab}, {@code WhiteSpace} 76 | * 77 | * @return list of tokens 78 | */ 79 | public List getFilteredTokens() { 80 | List filteredResult = new ArrayList(); 81 | for (Token t : this.result) { 82 | if (!t.getTokenType().isAuxiliary()) { 83 | filteredResult.add(t); 84 | } 85 | } 86 | return filteredResult; 87 | } 88 | 89 | /** 90 | * Scans the source from the specific index and returns the first separated 91 | * token 92 | * 93 | * @param source 94 | * source code to be scanned 95 | * @param fromIndex 96 | * the index from which to start the scanning 97 | * @return first separated token or {@code null} if no token was found 98 | * 99 | */ 100 | private Token separateToken(String source, int fromIndex) { 101 | if (fromIndex < 0 || fromIndex >= source.length()) { 102 | throw new IllegalArgumentException("Illegal index in the input stream!"); 103 | } 104 | for (TokenType tokenType : TokenType.values()) { 105 | Pattern p = Pattern.compile(".{" + fromIndex + "}" + regEx.get(tokenType), 106 | Pattern.DOTALL); 107 | Matcher m = p.matcher(source); 108 | if (m.matches()) { 109 | String lexema = m.group(1); 110 | return new Token(fromIndex, fromIndex + lexema.length(), lexema, tokenType); 111 | } 112 | } 113 | 114 | return null; 115 | } 116 | 117 | /** 118 | * Creates map from token types to its regular expressions 119 | * 120 | */ 121 | private void launchRegEx() { 122 | regEx.put(TokenType.BlockComment, "(/\\*.*?\\*/).*"); 123 | regEx.put(TokenType.LineComment, "(//(.*?)[\r$]?\n).*"); 124 | regEx.put(TokenType.WhiteSpace, "( ).*"); 125 | regEx.put(TokenType.OpenBrace, "(\$).*"); 126 | regEx.put(TokenType.CloseBrace, "(\$).*"); 127 | regEx.put(TokenType.Semicolon, "(;).*"); 128 | regEx.put(TokenType.Comma, "(,).*"); 129 | regEx.put(TokenType.OpeningCurlyBrace, "(\\{).*"); 130 | regEx.put(TokenType.ClosingCurlyBrace, "(\\}).*"); 131 | regEx.put(TokenType.DoubleConstant, "\\b(\\d{1,9}\\.\\d{1,32})\\b.*"); 132 | regEx.put(TokenType.IntConstant, "\\b(\\d{1,9})\\b.*"); 133 | regEx.put(TokenType.Void, "\\b(void)\\b.*"); 134 | regEx.put(TokenType.Int, "\\b(int)\\b.*"); 135 | regEx.put(TokenType.Double, "\\b(int|double)\\b.*"); 136 | regEx.put(TokenType.Tab, "(\\t).*"); 137 | regEx.put(TokenType.NewLine, "(\\n).*"); 138 | regEx.put(TokenType.Public, "\\b(public)\\b.*"); 139 | regEx.put(TokenType.Private, "\\b(private)\\b.*"); 140 | regEx.put(TokenType.False, "\\b(false)\\b.*"); 141 | regEx.put(TokenType.True, "\\b(true)\\b.*"); 142 | regEx.put(TokenType.Null, "\\b(null)\\b.*"); 143 | regEx.put(TokenType.Return, "\\b(return)\\b.*"); 144 | regEx.put(TokenType.New, "\\b(new)\\b.*"); 145 | regEx.put(TokenType.Class, "\\b(class)\\b.*"); 146 | regEx.put(TokenType.If, "\\b(if)\\b.*"); 147 | regEx.put(TokenType.Else, "\\b(else)\\b.*"); 148 | regEx.put(TokenType.While, "\\b(while)\\b.*"); 149 | regEx.put(TokenType.Static, "\\b(static)\\b.*"); 150 | regEx.put(TokenType.Point, "(\\.).*"); 151 | regEx.put(TokenType.Plus, "(\\+{1}).*"); 152 | regEx.put(TokenType.Minus, "(\\-{1}).*"); 153 | regEx.put(TokenType.Multiply, "(\\*).*"); 154 | regEx.put(TokenType.Divide, "(/).*"); 155 | regEx.put(TokenType.EqualEqual, "(==).*"); 156 | regEx.put(TokenType.Equal, "(=).*"); 157 | regEx.put(TokenType.ExclameEqual, "(\\!=).*"); 158 | regEx.put(TokenType.Greater, "(>).*"); 159 | regEx.put(TokenType.Less, "(<).*"); 160 | regEx.put(TokenType.Identifier, "\\b([a-zA-Z]{1}[0-9a-zA-Z_]{0,31})\\b.*"); 161 | } 162 | } -------------------------------------------------------------------------------- /src/parser/NonTerminal.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | /** 4 | * The {@code NonTerminal} class represents nonterminal symbol of grammar 5 | * 6 | * @author Ira Korshunova 7 | * 8 | */ 9 | public class NonTerminal extends Symbol { 10 | 11 | /** 12 | * Creates new {@code NonTerminal} object with specified code and 13 | * designation 14 | * 15 | * @param code code of nonterminal symbol 16 | * @param name designation of the nonterminal in the grammar 17 | */ 18 | public NonTerminal(int code, String name) { 19 | super(code, name); 20 | } 21 | 22 | @Override 23 | public boolean equals(Object obj) { 24 | if (obj == this) 25 | return true; 26 | if (obj == null) 27 | return false; 28 | if (obj.getClass() != NonTerminal.class) 29 | return false; 30 | NonTerminal nts = (NonTerminal) obj; 31 | return this.getCode() == nts.getCode(); 32 | } 33 | 34 | @Override 35 | public boolean isTerminal() { 36 | return false; 37 | } 38 | 39 | @Override 40 | public boolean isNonTerminal() { 41 | return true; 42 | } 43 | } -------------------------------------------------------------------------------- /src/parser/Parser.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.io.File; 4 | import java.io.FileNotFoundException; 5 | import java.util.AbstractMap.SimpleEntry; 6 | import java.util.ArrayList; 7 | import java.util.Arrays; 8 | import java.util.Collections; 9 | import java.util.HashMap; 10 | import java.util.HashSet; 11 | import java.util.List; 12 | import java.util.Map; 13 | import java.util.Scanner; 14 | import java.util.Set; 15 | import java.util.Stack; 16 | import java.util.StringTokenizer; 17 | 18 | import token.Token; 19 | import exceptions.AnalyzerException; 20 | 21 | /** 22 | * The {@code Parser} class represents predictive parser. It accepts only LL(1) 23 | * grammar. If the grammar is not LL(1) most likely you will get 24 | * {@code StackOverflowError}. Productions in the grammar use the following 25 | * format, for example: 26 | * 27 | *

28 | * 29 | *

 30 |  *  Goal -> A 
 31 |  *  A -> ( A ) | Two 
 32 |  *  Two -> a
 33 |  *

34 | * 35 | *

36 | * 37 | * Symbol is inferred as nonterminal by first uppercase char. "->" designates 38 | * definition, "|" designates alternation, and newlines designate termination. 39 | * Use "EPSILON" to represent an empty string. Place spaces between things you 40 | * don't want to read as one symbol: ( A ) != (A). 41 | * 42 | * @author Ira Korshunova 43 | * 44 | */ 45 | 46 | public class Parser { 47 | 48 | /** Terminal symbol of grammar which represents empty string */ 49 | public static Terminal epsilon = new Terminal(0, "EPSILON"); 50 | 51 | /** Terminal symbol which represents end of program */ 52 | public static Terminal endOfProgram = new Terminal(-1, "ENDOFPROGRAM"); 53 | 54 | /** Start symbol of the grammar */ 55 | private NonTerminal startSymbol; 56 | 57 | /** List of rules in the grammar without alternations */ 58 | private List rules; 59 | 60 | /** Grammar's alphabet. Contains terminal and nonterminal symbols */ 61 | private Set alphabet; 62 | 63 | /** Mapping from string representation of symbol to its object */ 64 | private Map nameToSymbol; 65 | 66 | /** Mapping from symbol to it's first set */ 67 | private Map> firstSet; 68 | 69 | /** Mapping from symbol to it's follow set */ 70 | private Map> followSet; 71 | 72 | /** Representation of parsing table for LL(1) parser */ 73 | private Map, Symbol[]> parsingTable; 74 | 75 | /** Stack of terminals, which were constructed from input tokens */ 76 | private Stack input; 77 | 78 | /** Sequence of applied rules during the derivations */ 79 | private List sequenceOfAppliedRules; 80 | 81 | /** 82 | * Initializes a newly created {@code Parser} object 83 | * 84 | */ 85 | 86 | public Parser() { 87 | rules = new ArrayList(); 88 | alphabet = new HashSet(); 89 | nameToSymbol = new HashMap(); 90 | alphabet.add(epsilon); 91 | firstSet = new HashMap>(); 92 | followSet = new HashMap>(); 93 | parsingTable = new HashMap, Symbol[]>(); 94 | sequenceOfAppliedRules = new ArrayList(); 95 | } 96 | 97 | /** 98 | * Parses the source, represented by the list of tokens, using the specified 99 | * LL(1) grammar rules 100 | * 101 | * @param grammarFile 102 | * file with grammar rules 103 | * @param list 104 | * list of tokens from the input 105 | * @throws FileNotFoundException 106 | * if file doesn't exist 107 | * @throws AnalyzerException 108 | * if the input contains syntax error 109 | */ 110 | public void parse(File grammarFile, List list) throws FileNotFoundException, 111 | AnalyzerException { 112 | parseRules(grammarFile); 113 | calculateFirst(); 114 | calculateFollow(); 115 | buildParsingTable(); 116 | input = convertTokensToStack(list); 117 | performParsingAlgorithm(); 118 | } 119 | 120 | /** 121 | * Returns the sequence of grammar rules, which were applied during the 122 | * parsing 123 | * 124 | * @return list of applied rules 125 | * 126 | */ 127 | public List getSequenceOfAppliedRules() { 128 | return sequenceOfAppliedRules; 129 | } 130 | 131 | /** 132 | * Implements LL(1) predictive parsing algorithm 133 | * 134 | * @throws AnalyzerException 135 | * if syntax error was found 136 | */ 137 | private void performParsingAlgorithm() throws AnalyzerException { 138 | Stack stack = new Stack(); 139 | stack.push(endOfProgram); 140 | stack.push(startSymbol); 141 | int parsedTokensCount = 0; 142 | do { 143 | 144 | Symbol stackTop = stack.peek(); 145 | Terminal inputTop = input.peek(); 146 | if (stackTop.isTerminal()) { 147 | if (stackTop.equals(inputTop)) { 148 | stack.pop(); 149 | input.pop(); 150 | parsedTokensCount++; 151 | } else { 152 | throw new AnalyzerException("Syntax error after token #" + parsedTokensCount, 153 | parsedTokensCount); 154 | } 155 | } else { 156 | SimpleEntry tableKey = new SimpleEntry( 157 | (NonTerminal) stackTop, inputTop); 158 | if (parsingTable.containsKey(tableKey)) { 159 | stack.pop(); 160 | Symbol[] tableEntry = parsingTable.get(tableKey); 161 | for (int j = tableEntry.length - 1; j > -1; j--) { 162 | if (!tableEntry[j].equals(epsilon)) 163 | stack.push(tableEntry[j]); 164 | } 165 | sequenceOfAppliedRules.add(getRule((NonTerminal) stackTop, tableEntry)); 166 | } else { 167 | throw new AnalyzerException("Syntax error after token #" + parsedTokensCount, 168 | parsedTokensCount); 169 | } 170 | } 171 | } while (!stack.isEmpty() && !input.isEmpty()); 172 | 173 | if (!input.isEmpty()) { 174 | throw new AnalyzerException("Syntax error after token #" + parsedTokensCount, 175 | parsedTokensCount); 176 | } 177 | } 178 | 179 | /** 180 | * Converts a list of tokens from lexer to stack of terminals for parser. 181 | * Fist token in the input will be at the top of the stack. 182 | * 183 | * @param inputTokens 184 | * list of input tokens 185 | * @return stack of terminal symbols 186 | */ 187 | private Stack convertTokensToStack(List inputTokens) { 188 | Stack input = new Stack(); 189 | Collections.reverse(inputTokens); 190 | input.push(endOfProgram); 191 | for (Token token : inputTokens) { 192 | Terminal s = (Terminal) nameToSymbol.get(token.getTokenString()); 193 | if (s == null) { 194 | switch (token.getTokenType()) { 195 | case Identifier: 196 | s = (Terminal) nameToSymbol.get("id"); 197 | break; 198 | case IntConstant: 199 | s = (Terminal) nameToSymbol.get("intConst"); 200 | break; 201 | case DoubleConstant: 202 | s = (Terminal) nameToSymbol.get("doubleConst"); 203 | break; 204 | default: 205 | throw new RuntimeException("Somethig is wrong!"); 206 | } 207 | } 208 | input.push(s); 209 | } 210 | return input; 211 | } 212 | 213 | /** 214 | * Automatically builds LL(1) parsing table by using follow and first set 215 | */ 216 | private void buildParsingTable() { 217 | for (Rule r : rules) { 218 | Symbol[] rightSide = r.getRightSide(); 219 | NonTerminal leftSide = r.getLeftSide(); 220 | Set firstSetForRightSide = first(rightSide); 221 | Set followSetForLeftSide = followSet.get(leftSide); 222 | 223 | for (Terminal s : firstSetForRightSide) { 224 | parsingTable.put(new SimpleEntry(leftSide, s), rightSide); 225 | } 226 | 227 | if (firstSetForRightSide.contains(epsilon)) { 228 | for (Terminal s : followSetForLeftSide) { 229 | parsingTable 230 | .put(new SimpleEntry(leftSide, s), rightSide); 231 | } 232 | } 233 | } 234 | } 235 | 236 | private void calculateFirst() { 237 | for (Symbol s : alphabet) { 238 | firstSet.put(s, new HashSet()); 239 | } 240 | for (Symbol s : alphabet) { 241 | first(s); 242 | } 243 | } 244 | 245 | /** 246 | * Calculates first set for specified symbol. By using the next rules: 247 | *

248 | * 249 | *

250 | 	 * 1. If X is terminal, then FIRST(X) is {X}.
251 | 	 * 2. If X -> EPSILON is production, then add EPSILON to FIRST(X).
252 | 	 * 3. If X is nonterminal and X -> Y1 Y2 ... Yk is a production, 
253 | 	 * then place a (terminal) in FIRST(X) if for some i a is in FIRST(Yi), and Y1, ... ,Yi-1 -> EPSILON. 
254 | 	 * If EPSILON is in FIRST(Yj) for all j = 1, 2, ... , k, then add EPSILON to FIRST(X).
255 | 	 *

256 | * 257 | *

258 | * 259 | * 260 | * @param s 261 | * terminal or nonterminal symbol of grammar 262 | */ 263 | private void first(Symbol s) { 264 | Set first = firstSet.get(s); 265 | Set auxiliarySet; 266 | if (s.isTerminal()) { 267 | first.add((Terminal) s); 268 | return; 269 | } 270 | 271 | for (Rule r : getRulesWithLeftSide((NonTerminal) s)) { 272 | Symbol[] rightSide = r.getRightSide(); 273 | first(rightSide[0]); 274 | auxiliarySet = new HashSet(firstSet.get(rightSide[0])); 275 | auxiliarySet.remove(epsilon); 276 | first.addAll(auxiliarySet); 277 | 278 | for (int i = 1; i < rightSide.length 279 | && firstSet.get(rightSide[i - 1]).contains(epsilon); i++) { 280 | first(rightSide[i]); 281 | auxiliarySet = new HashSet(firstSet.get(rightSide[i])); 282 | auxiliarySet.remove(epsilon); 283 | first.addAll(auxiliarySet); 284 | } 285 | 286 | boolean allContainEpsilon = true; 287 | for (Symbol rightS : rightSide) { 288 | if (!firstSet.get(rightS).contains(epsilon)) { 289 | allContainEpsilon = false; 290 | break; 291 | } 292 | } 293 | if (allContainEpsilon) 294 | first.add(epsilon); 295 | } 296 | } 297 | 298 | /** 299 | * Calculates first set for chain of symbols 300 | * 301 | * @param chain 302 | * string of symbols 303 | * @return first set for the specified string 304 | */ 305 | private Set first(Symbol[] chain) { 306 | Set firstSetForChain = new HashSet(); 307 | Set auxiliarySet; 308 | auxiliarySet = new HashSet(firstSet.get(chain[0])); 309 | auxiliarySet.remove(epsilon); 310 | firstSetForChain.addAll(auxiliarySet); 311 | 312 | for (int i = 1; i < chain.length && firstSet.get(chain[i - 1]).contains(epsilon); i++) { 313 | auxiliarySet = new HashSet(firstSet.get(chain[i])); 314 | auxiliarySet.remove(epsilon); 315 | firstSetForChain.addAll(auxiliarySet); 316 | } 317 | 318 | boolean allContainEpsilon = true; 319 | for (Symbol s : chain) { 320 | if (!firstSet.get(s).contains(epsilon)) { 321 | allContainEpsilon = false; 322 | break; 323 | } 324 | } 325 | if (allContainEpsilon) 326 | firstSetForChain.add(epsilon); 327 | 328 | return firstSetForChain; 329 | } 330 | 331 | private void calculateFollow() { 332 | for (Symbol s : alphabet) { 333 | if (s.isNonTerminal()) 334 | followSet.put(s, new HashSet()); 335 | } 336 | 337 | Map, Boolean> callTable = new HashMap, Boolean>(); 338 | for (Symbol firstS : alphabet) { 339 | for (Symbol secondS : alphabet) { 340 | callTable.put(new SimpleEntry(firstS, secondS), false); 341 | } 342 | } 343 | 344 | NonTerminal firstSymbol = rules.get(0).getLeftSide(); 345 | followSet.get(firstSymbol).add(endOfProgram); 346 | for (Symbol s : alphabet) { 347 | if (s.isNonTerminal()) { 348 | follow((NonTerminal) s, null, callTable); 349 | } 350 | } 351 | } 352 | 353 | /** 354 | * Calculates follow set for nonterminal symbols 355 | */ 356 | private void follow(NonTerminal s, Symbol caller, 357 | Map, Boolean> callTable) { 358 | Boolean called = callTable.get(new SimpleEntry(caller, s)); 359 | if (called != null) { 360 | if (called == true) 361 | return; 362 | else 363 | callTable.put(new SimpleEntry(caller, s), true); 364 | } 365 | 366 | Set follow = followSet.get(s); 367 | Set auxiliarySet; 368 | 369 | List> list = getLeftSideRightChain(s); 370 | for (SimpleEntry pair : list) { 371 | Symbol[] rightChain = pair.getValue(); 372 | NonTerminal leftSide = pair.getKey(); 373 | if (rightChain.length != 0) { 374 | auxiliarySet = first(rightChain); 375 | auxiliarySet.remove(epsilon); 376 | follow.addAll(auxiliarySet); 377 | if (first(rightChain).contains(epsilon)) { 378 | follow(leftSide, s, callTable); 379 | follow.addAll(followSet.get(leftSide)); 380 | } 381 | } else { 382 | follow(leftSide, s, callTable); 383 | follow.addAll(followSet.get(leftSide)); 384 | } 385 | } 386 | } 387 | 388 | /** 389 | * Constructs grammar rules from file 390 | * 391 | * @param grammarFile 392 | * file with grammar rules 393 | * @throws FileNotFoundException 394 | * if file with the specified pathname does not exist 395 | */ 396 | private void parseRules(File grammarFile) throws FileNotFoundException { 397 | nameToSymbol.put("EPSILON", epsilon); 398 | 399 | Scanner data = new Scanner(grammarFile); 400 | int code = 1; 401 | int ruleNumber = 0; 402 | while (data.hasNext()) { 403 | StringTokenizer t = new StringTokenizer(data.nextLine()); 404 | String symbolName = t.nextToken(); 405 | if (!nameToSymbol.containsKey(symbolName)) { 406 | Symbol s = new NonTerminal(code, symbolName); 407 | if (code == 1) 408 | startSymbol = (NonTerminal) s; 409 | nameToSymbol.put(symbolName, s); 410 | alphabet.add(s); 411 | code++; 412 | } 413 | t.nextToken();// -> 414 | 415 | NonTerminal leftSide = (NonTerminal) nameToSymbol.get(symbolName); 416 | while (t.hasMoreTokens()) { 417 | List rightSide = new ArrayList(); 418 | do { 419 | symbolName = t.nextToken(); 420 | if (!symbolName.equals("|")) { 421 | if (!nameToSymbol.containsKey(symbolName)) { 422 | Symbol s; 423 | if (Character.isUpperCase(symbolName.charAt(0))) 424 | s = new NonTerminal(code++, symbolName); 425 | else 426 | s = new Terminal(code++, symbolName); 427 | nameToSymbol.put(symbolName, s); 428 | alphabet.add(s); 429 | } 430 | rightSide.add(nameToSymbol.get(symbolName)); 431 | } 432 | } while (!symbolName.equals("|") && t.hasMoreTokens()); 433 | rules.add(new Rule(ruleNumber++, leftSide, rightSide.toArray(new Symbol[] {}))); 434 | } 435 | } 436 | } 437 | 438 | /** 439 | * Returns rules with specified left side 440 | * 441 | * @param nonTerminalSymbol 442 | * symbol in the left side of the production 443 | * @return set of rules which contain the specified symbol in the left side 444 | */ 445 | private Set getRulesWithLeftSide(NonTerminal nonTerminalSymbol) { 446 | Set set = new HashSet(); 447 | for (Rule r : rules) { 448 | if (r.getLeftSide().equals(nonTerminalSymbol)) 449 | set.add(r); 450 | } 451 | return set; 452 | } 453 | 454 | /** 455 | * Returns list of pairs. First element of the pair is the left side of the 456 | * rule if this rule contains specified symbol {@code s} in the right side. 457 | * The second element contains symbols after {@code s} in the right side of 458 | * the rule. 459 | * 460 | * @param s 461 | * @return 462 | */ 463 | private List> getLeftSideRightChain(Symbol s) { 464 | List> list = new ArrayList>(); 465 | for (Rule r : rules) { 466 | Symbol[] rightChain = r.getRightSide(); 467 | int index = Arrays.asList(rightChain).indexOf(s); 468 | if (index != -1) { 469 | rightChain = Arrays.copyOfRange(rightChain, index + 1, rightChain.length); 470 | list.add(new SimpleEntry(r.getLeftSide(), rightChain)); 471 | } 472 | } 473 | return list; 474 | } 475 | 476 | /** 477 | * Returns the rule with specified left and right side 478 | * 479 | * @param leftSide 480 | * symbol in the left side of the production 481 | * @param rightSide 482 | * symbols in the right side 483 | * @return rule with specified left and right side or {@code null} if such 484 | * rule doesn't exist in grammar 485 | */ 486 | private Rule getRule(NonTerminal leftSide, Symbol[] rightSide) { 487 | Set setOfRules = getRulesWithLeftSide(leftSide); 488 | for (Rule r : setOfRules) { 489 | if (rightSide.length != r.getRightSide().length) 490 | continue; 491 | for (int i = 0; i < rightSide.length; i++) { 492 | if (r.getRightSide()[i] != rightSide[i]) 493 | break; 494 | else { 495 | if (i == rightSide.length - 1) { 496 | return r; 497 | } 498 | } 499 | } 500 | } 501 | 502 | return null; 503 | } 504 | } 505 | -------------------------------------------------------------------------------- /src/parser/Rule.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | import java.util.Arrays; 4 | 5 | /** 6 | * Represents productions in context-free grammar.In this type of grammars left 7 | * side of productions contains only one nonterminal symbol. 8 | * 9 | * @author Ira Korshunova 10 | * 11 | */ 12 | public class Rule { 13 | 14 | /** Number of the rule */ 15 | private int ruleNumber; 16 | 17 | /** Left side of production */ 18 | private NonTerminal leftSide; 19 | 20 | /** Right side of production */ 21 | private Symbol[] rightSide; 22 | 23 | /** 24 | * Creates a rule 25 | * 26 | * @param ruleNumber 27 | * number of rule as it is in grammar description 28 | * @param leftSide 29 | * nonterminal symbol in the left side of rule 30 | * @param rightSide 31 | * terminals and nonterminals in the right side 32 | */ 33 | public Rule(int ruleNumber, NonTerminal leftSide, Symbol[] rightSide) { 34 | this.ruleNumber = ruleNumber; 35 | this.leftSide = leftSide; 36 | this.rightSide = rightSide; 37 | } 38 | 39 | public int getRuleNumber() { 40 | return ruleNumber; 41 | } 42 | 43 | public NonTerminal getLeftSide() { 44 | return leftSide; 45 | } 46 | 47 | public Symbol[] getRightSide() { 48 | return rightSide; 49 | } 50 | 51 | @Override 52 | public String toString() { 53 | return "Rule number: " + ruleNumber + "| " + leftSide + " -> " + Arrays.toString(rightSide); 54 | } 55 | 56 | } -------------------------------------------------------------------------------- /src/parser/Symbol.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | public abstract class Symbol { 4 | private int code; 5 | private String name; 6 | 7 | public Symbol(int code, String name) { 8 | this.code = code; 9 | this.name = name; 10 | } 11 | 12 | public int getCode() { 13 | return code; 14 | } 15 | 16 | public String getName() { 17 | return name; 18 | } 19 | 20 | public abstract boolean isTerminal(); 21 | 22 | public abstract boolean isNonTerminal(); 23 | 24 | @Override 25 | public String toString() { 26 | return name; 27 | } 28 | 29 | @Override 30 | public int hashCode() { 31 | return code; 32 | } 33 | } -------------------------------------------------------------------------------- /src/parser/Terminal.java: -------------------------------------------------------------------------------- 1 | package parser; 2 | 3 | /** 4 | * The {@code Terminal} class represents a terminal symbol of grammar 5 | * 6 | * @author Ira Korshunova 7 | * 8 | */ 9 | public class Terminal extends Symbol{ 10 | 11 | public Terminal(int code, String name) { 12 | super(code,name); 13 | } 14 | 15 | @Override 16 | public boolean equals(Object obj) { 17 | if (obj == this) 18 | return true; 19 | if(obj == null) 20 | return false; 21 | if (obj.getClass() != Terminal.class) 22 | return false; 23 | Terminal ts = (Terminal) obj; 24 | return this.getCode() == ts.getCode(); 25 | } 26 | 27 | @Override 28 | public boolean isTerminal() { 29 | return true; 30 | } 31 | 32 | @Override 33 | public boolean isNonTerminal() { 34 | return false; 35 | } 36 | } -------------------------------------------------------------------------------- /src/runner/GUI.java: -------------------------------------------------------------------------------- 1 | package runner; 2 | 3 | import java.awt.Color; 4 | import java.awt.event.ActionEvent; 5 | import java.awt.event.ActionListener; 6 | import java.io.BufferedReader; 7 | import java.io.File; 8 | import java.io.FileNotFoundException; 9 | import java.io.FileReader; 10 | import java.io.IOException; 11 | 12 | import javax.swing.BorderFactory; 13 | import javax.swing.BoxLayout; 14 | import javax.swing.JButton; 15 | import javax.swing.JFileChooser; 16 | import javax.swing.JFrame; 17 | import javax.swing.JOptionPane; 18 | import javax.swing.JPanel; 19 | import javax.swing.JScrollPane; 20 | import javax.swing.JTabbedPane; 21 | import javax.swing.JTextArea; 22 | 23 | import lexer.Lexer; 24 | import parser.Parser; 25 | import parser.Rule; 26 | import token.Token; 27 | import exceptions.AnalyzerException; 28 | 29 | @SuppressWarnings("serial") 30 | public class GUI extends JPanel { 31 | private JFrame frame; 32 | 33 | private JPanel codePanel; 34 | private JPanel buttonPanel; 35 | 36 | private JTabbedPane tabbedPane; 37 | 38 | private JTextArea codeArea; 39 | private JTextArea lexArea; 40 | private JTextArea syntaxArea; 41 | 42 | private JButton openBtn; 43 | private JButton runBtn; 44 | 45 | private String sourceCode; 46 | 47 | public GUI(JFrame frame) { 48 | this.frame = frame; 49 | this.setLayout(new BoxLayout(this, BoxLayout.X_AXIS)); 50 | codePanel = new JPanel(); 51 | codePanel.setLayout(new BoxLayout(codePanel, BoxLayout.Y_AXIS)); 52 | codeArea = new JTextArea(30, 50); 53 | codeArea.setBorder(BorderFactory.createLineBorder(Color.black)); 54 | JScrollPane codeScrollPane = new JScrollPane(codeArea); 55 | codeScrollPane.setAutoscrolls(true); 56 | codePanel.add(codeScrollPane); 57 | 58 | buttonPanel = new JPanel(); 59 | buttonPanel.setBorder(BorderFactory.createLineBorder(Color.black)); 60 | 61 | openBtn = new JButton("Open"); 62 | openBtn.addActionListener(new OpenActionListener()); 63 | buttonPanel.add(openBtn); 64 | 65 | runBtn = new JButton("Run"); 66 | runBtn.addActionListener(new RunActionListener()); 67 | buttonPanel.add(runBtn); 68 | 69 | codePanel.add(buttonPanel); 70 | this.add(codePanel); 71 | 72 | tabbedPane = new JTabbedPane(); 73 | 74 | lexArea = new JTextArea(32, 40); 75 | lexArea.setEditable(false); 76 | JScrollPane lexScrollPane = new JScrollPane(lexArea); 77 | lexScrollPane.setAutoscrolls(true); 78 | tabbedPane.add("Lexical analysis", lexScrollPane); 79 | 80 | syntaxArea = new JTextArea(32, 40); 81 | syntaxArea.setEditable(false); 82 | JScrollPane syntaxScrollPane = new JScrollPane(syntaxArea); 83 | syntaxScrollPane.setAutoscrolls(true); 84 | tabbedPane.add("Syntax analysis", syntaxScrollPane); 85 | 86 | this.add(tabbedPane); 87 | 88 | } 89 | 90 | private String readCodeToString(String filePath) throws IOException { 91 | StringBuffer fileData = new StringBuffer(); 92 | BufferedReader reader = new BufferedReader(new FileReader(filePath)); 93 | char[] buf = new char[1024]; 94 | int numRead = 0; 95 | while ((numRead = reader.read(buf)) != -1) { 96 | String readData = String.valueOf(buf, 0, numRead); 97 | fileData.append(readData); 98 | buf = new char[1024]; 99 | } 100 | reader.close(); 101 | return fileData.toString(); 102 | } 103 | 104 | private class OpenActionListener implements ActionListener { 105 | @Override 106 | public void actionPerformed(ActionEvent e) { 107 | JavaFilter javaFilter = new JavaFilter(); 108 | JFileChooser fileChooser = new JFileChooser(new File(System.getProperty("user.dir") 109 | + "/resources")); 110 | fileChooser.setFileFilter(javaFilter); 111 | fileChooser.setAcceptAllFileFilterUsed(false); 112 | int returnValue = fileChooser.showOpenDialog(GUI.this); 113 | if (returnValue == JFileChooser.APPROVE_OPTION) { 114 | if (javaFilter.getExtension(fileChooser.getSelectedFile()).equals("java")) { 115 | try { 116 | sourceCode = readCodeToString(fileChooser.getSelectedFile().getPath()); 117 | sourceCode += "\n"; 118 | codeArea.setText(sourceCode); 119 | } catch (IOException e1) { 120 | 121 | } 122 | 123 | } 124 | } 125 | } 126 | } 127 | 128 | private class RunActionListener implements ActionListener { 129 | 130 | @Override 131 | public void actionPerformed(ActionEvent e) { 132 | Lexer lexer = new Lexer(); 133 | Parser parser = new Parser(); 134 | try { 135 | lexArea.setText(""); 136 | syntaxArea.setText(""); 137 | 138 | // lexer 139 | sourceCode = codeArea.getText(); 140 | lexer.tokenize(sourceCode); 141 | JOptionPane.showMessageDialog(frame, "Lexical Analysis is completed", 142 | "Information", JOptionPane.INFORMATION_MESSAGE); 143 | 144 | // parser 145 | File grammarFile = new File(System.getProperty("user.dir") + "/info/grammar.txt"); 146 | parser.parse(grammarFile, lexer.getFilteredTokens()); 147 | JOptionPane.showMessageDialog(frame, "Parsing is completed!", "Information", 148 | JOptionPane.INFORMATION_MESSAGE); 149 | 150 | } catch (AnalyzerException exception) { 151 | JOptionPane.showMessageDialog(frame, exception.getMessage(), "Error", 152 | JOptionPane.ERROR_MESSAGE); 153 | } catch (FileNotFoundException fileNotFoundException) { 154 | JOptionPane.showMessageDialog(frame, "File with grammar is not found!", "Error", 155 | JOptionPane.ERROR_MESSAGE); 156 | } finally { 157 | int i = 0; 158 | for (Token token : lexer.getTokens()) { 159 | if (token.getTokenType().isAuxiliary()) 160 | lexArea.append(" " + token.toString() + "\n"); 161 | else { 162 | i++; 163 | lexArea.append(i + " " + token.toString() + "\n"); 164 | } 165 | } 166 | 167 | for (Rule r : parser.getSequenceOfAppliedRules()) { 168 | syntaxArea.append(r.toString() + "\n"); 169 | } 170 | } 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/runner/JavaFilter.java: -------------------------------------------------------------------------------- 1 | package runner; 2 | 3 | import java.io.File; 4 | 5 | import javax.swing.filechooser.FileFilter; 6 | 7 | public class JavaFilter extends FileFilter { 8 | 9 | @Override 10 | public boolean accept(File f) { 11 | if (f.isDirectory()) { 12 | return true; 13 | } 14 | 15 | String extension = getExtension(f); 16 | if (extension != null) { 17 | if (extension.equals("java")) { 18 | return true; 19 | } else { 20 | return false; 21 | } 22 | } 23 | return false; 24 | } 25 | 26 | public String getExtension(File f) { 27 | String ext = null; 28 | String s = f.getName(); 29 | int i = s.lastIndexOf('.'); 30 | 31 | if (i > 0 && i < s.length() - 1) { 32 | ext = s.substring(i + 1).toLowerCase(); 33 | } 34 | return ext; 35 | } 36 | 37 | @Override 38 | public String getDescription() { 39 | return "*.java"; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/runner/Main.java: -------------------------------------------------------------------------------- 1 | package runner; 2 | 3 | import javax.swing.JFrame; 4 | import javax.swing.SwingUtilities; 5 | 6 | public class Main { 7 | 8 | public static void main(String[] args) throws Exception { 9 | SwingUtilities.invokeLater(new Runnable() { 10 | @Override 11 | public void run() { 12 | JFrame frame = new JFrame("Lady Java"); 13 | frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); 14 | frame.add(new GUI(frame)); 15 | frame.pack(); 16 | frame.setResizable(false); 17 | frame.setVisible(true); 18 | } 19 | }); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/token/Token.java: -------------------------------------------------------------------------------- 1 | package token; 2 | 3 | /** 4 | * The {@code Token} class represents token (lexeme). A token is a string of 5 | * characters, categorized according to the rules as a symbol. For example: 6 | * Identifier, Comma, DoubleConstant. 7 | * 8 | * @author Ira Korshunova 9 | * 10 | */ 11 | 12 | public class Token { 13 | 14 | /** The beginning index of this token in the input */ 15 | private int beginIndex; 16 | 17 | /** The ending index of token in the input */ 18 | private int endIndex; 19 | 20 | /** Type(category) of token */ 21 | private TokenType tokenType; 22 | 23 | /** String of characters for this token */ 24 | private String tokenString; 25 | 26 | /** 27 | * Constructs new {@code Token} object with specified parameters. 28 | * 29 | * @param beginIndex 30 | * the beginning index of this token in the input, inclusive 31 | * @param endIndex 32 | * the ending index of token in the input, exclusive 33 | * @param tokenString 34 | * string of characters 35 | * @param tokenType 36 | * type of token 37 | */ 38 | public Token(int beginIndex, int endIndex, String tokenString, TokenType tokenType) { 39 | this.beginIndex = beginIndex; 40 | this.endIndex = endIndex; 41 | this.tokenType = tokenType; 42 | this.tokenString = tokenString; 43 | } 44 | 45 | /** 46 | * Returns the beginning index 47 | * 48 | * @return the beginning index of this token in the input, inclusive 49 | */ 50 | public int getBegin() { 51 | return beginIndex; 52 | } 53 | 54 | /** 55 | * Returns the ending index 56 | * 57 | * @return the ending index of token in the input, exclusive 58 | */ 59 | public int getEnd() { 60 | return endIndex; 61 | } 62 | 63 | 64 | /** 65 | * Returns a string for the token 66 | * 67 | * @return a string of characters associated with this token 68 | */ 69 | public String getTokenString() { 70 | return tokenString; 71 | } 72 | 73 | 74 | /** 75 | * Returns token's type 76 | * 77 | * @return type associated with this token 78 | */ 79 | public TokenType getTokenType() { 80 | return tokenType; 81 | } 82 | 83 | @Override 84 | public String toString() { 85 | if (!this.getTokenType().isAuxiliary()) 86 | return tokenType + " '" + tokenString + "' [" + beginIndex + ";" + endIndex + "] "; 87 | else 88 | return tokenType + " [" + beginIndex + ";" + endIndex + "] "; 89 | } 90 | } -------------------------------------------------------------------------------- /src/token/TokenType.java: -------------------------------------------------------------------------------- 1 | package token; 2 | 3 | /** 4 | * The {@code TokeType} enumeration represents types of tokens in subset of Java 5 | * language 6 | * 7 | * @author Ira Korshunova 8 | * 9 | */ 10 | public enum TokenType { 11 | BlockComment, 12 | 13 | LineComment, 14 | 15 | WhiteSpace, 16 | 17 | Tab, 18 | 19 | NewLine, 20 | 21 | CloseBrace, 22 | 23 | OpenBrace, 24 | 25 | OpeningCurlyBrace, 26 | 27 | ClosingCurlyBrace, 28 | 29 | DoubleConstant, 30 | 31 | IntConstant, 32 | 33 | Plus, 34 | 35 | Minus, 36 | 37 | Multiply, 38 | 39 | Divide, 40 | 41 | Point, 42 | 43 | EqualEqual, 44 | 45 | Equal, 46 | 47 | ExclameEqual, 48 | 49 | Greater, 50 | 51 | Less, 52 | 53 | Static, 54 | 55 | Public, 56 | 57 | Private, 58 | 59 | Int, 60 | 61 | Double, 62 | 63 | Void, 64 | 65 | False, 66 | 67 | True, 68 | 69 | Null, 70 | 71 | Return, 72 | 73 | New, 74 | 75 | Class, 76 | 77 | If, 78 | 79 | While, 80 | 81 | Else, 82 | 83 | Semicolon, 84 | 85 | Comma, 86 | 87 | Identifier; 88 | 89 | /** 90 | * Determines if this token is auxiliary 91 | * 92 | * @return {@code true} if token is auxiliary, {@code false} otherwise 93 | */ 94 | public boolean isAuxiliary() { 95 | return this == BlockComment || this == LineComment || this == NewLine || this == Tab 96 | || this == WhiteSpace; 97 | } 98 | } --------------------------------------------------------------------------------