├── asmb ├── .gitignore ├── asm-language.pdf ├── specification.pdf ├── code-generation.pdf ├── static-semantics.pdf ├── backend-specification.pdf ├── src ├── compiler │ ├── frontend │ │ ├── frontend.h │ │ ├── scanner │ │ │ ├── state_transition_table │ │ │ │ ├── table_errors.h │ │ │ │ ├── table_errors.cpp │ │ │ │ ├── state_transition_table.h │ │ │ │ └── state_transition_table.cpp │ │ │ ├── scanner.h │ │ │ └── scanner.cpp │ │ └── parser │ │ │ ├── parser.h │ │ │ └── parser.cpp │ ├── token │ │ ├── identifier │ │ │ ├── identifier_token.cpp │ │ │ └── identifier_token.h │ │ ├── integer │ │ │ ├── integer_token.cpp │ │ │ └── integer_token.h │ │ ├── end_of_file │ │ │ ├── end_of_file_token.cpp │ │ │ └── end_of_file_token.h │ │ ├── token.h │ │ ├── token_type_id │ │ │ ├── token_type_id.h │ │ │ └── token_type_id.cpp │ │ ├── index.h │ │ ├── operator │ │ │ ├── operator_token.h │ │ │ └── operator_token.cpp │ │ ├── delimiter │ │ │ ├── delimiter_token.h │ │ │ └── delimiter_token.cpp │ │ ├── keyword │ │ │ ├── keyword_token.h │ │ │ └── keyword_token.cpp │ │ └── index.cpp │ ├── compiler.h │ ├── backend │ │ ├── optimizer │ │ │ ├── optimizer.h │ │ │ └── optimizer.cpp │ │ ├── var_stack │ │ │ ├── var_stack.h │ │ │ └── var_stack.cpp │ │ ├── code_generator │ │ │ ├── code_generator.h │ │ │ └── code_generator.cpp │ │ ├── target_keywords.h │ │ ├── backend.h │ │ └── backend.cpp │ ├── node │ │ ├── node_labels.h │ │ ├── node.h │ │ └── node.cpp │ └── compiler.cpp ├── main.cpp └── timer.h ├── assets ├── deterministic-finite-automaton.png ├── state-transition-table.csv └── deterministic-finite-automaton.json ├── LICENSE ├── Makefile └── README.md /asmb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/asmb -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | *.o 3 | *.gch 4 | test*.txt 5 | build 6 | comp 7 | *.asm 8 | -------------------------------------------------------------------------------- /asm-language.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/asm-language.pdf -------------------------------------------------------------------------------- /specification.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/specification.pdf -------------------------------------------------------------------------------- /code-generation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/code-generation.pdf -------------------------------------------------------------------------------- /static-semantics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/static-semantics.pdf -------------------------------------------------------------------------------- /backend-specification.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/backend-specification.pdf -------------------------------------------------------------------------------- /src/compiler/frontend/frontend.h: -------------------------------------------------------------------------------- 1 | #ifndef FRONTEND_H 2 | #define FRONTEND_H 3 | 4 | #include "parser/parser.h" 5 | 6 | #endif -------------------------------------------------------------------------------- /assets/deterministic-finite-automaton.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gbroques/compiler/HEAD/assets/deterministic-finite-automaton.png -------------------------------------------------------------------------------- /src/compiler/token/identifier/identifier_token.cpp: -------------------------------------------------------------------------------- 1 | #include "identifier_token.h" 2 | 3 | IdentifierToken::IdentifierToken(std::string value, int line_number) 4 | : Token(IdentifierTokenId, value, line_number) {} 5 | -------------------------------------------------------------------------------- /src/compiler/compiler.h: -------------------------------------------------------------------------------- 1 | #ifndef COMPILER_H 2 | #define COMPILER_H 3 | 4 | #include 5 | 6 | class Compiler 7 | { 8 | public: 9 | void compile(std::string filename); 10 | 11 | private: 12 | std::string get_basename(std::string filename); 13 | }; 14 | 15 | #endif -------------------------------------------------------------------------------- /src/compiler/token/integer/integer_token.cpp: -------------------------------------------------------------------------------- 1 | #include "integer_token.h" 2 | 3 | bool IntegerToken::is_integer(char character) 4 | { 5 | return isdigit(character); 6 | } 7 | 8 | IntegerToken::IntegerToken(std::string value, int line_number) 9 | : Token(IntegerTokenId, value, line_number) {} 10 | -------------------------------------------------------------------------------- /src/compiler/token/end_of_file/end_of_file_token.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "end_of_file_token.h" 3 | 4 | bool EndOfFileToken::is_eof(char character) 5 | { 6 | return character == EOF; 7 | } 8 | 9 | EndOfFileToken::EndOfFileToken(int line_number) 10 | : Token(EndOfFileTokenId, "EOF", line_number) {} 11 | -------------------------------------------------------------------------------- /src/compiler/token/end_of_file/end_of_file_token.h: -------------------------------------------------------------------------------- 1 | #ifndef EOF_TOKEN_H 2 | #define EOF_TOKEN_H 3 | 4 | #include 5 | #include "../index.h" 6 | 7 | class EndOfFileToken : public Token 8 | { 9 | public: 10 | static bool is_eof(char character); 11 | EndOfFileToken(int line_number); 12 | }; 13 | 14 | #endif -------------------------------------------------------------------------------- /src/compiler/token/integer/integer_token.h: -------------------------------------------------------------------------------- 1 | #ifndef INTEGER_TOKEN_H 2 | #define INTEGER_TOKEN_H 3 | 4 | #include 5 | #include "../index.h" 6 | 7 | class IntegerToken : public Token 8 | { 9 | public: 10 | static bool is_integer(char character); 11 | IntegerToken(std::string value, int line_number); 12 | }; 13 | 14 | #endif -------------------------------------------------------------------------------- /src/compiler/frontend/scanner/state_transition_table/table_errors.h: -------------------------------------------------------------------------------- 1 | #ifndef TABLE_ERRORS_H 2 | #define TABLE_ERRORS_H 3 | 4 | typedef enum { 5 | InvalidCharacter = -10, 6 | InvalidTransition = -20, 7 | MaxIntegerLength = -30, 8 | MaxIdentifierLength = -40 9 | } StateTransitionTableErrors; 10 | 11 | bool is_table_error(int num); 12 | 13 | #endif -------------------------------------------------------------------------------- /src/compiler/token/token.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_H 2 | #define TOKEN_H 3 | 4 | #include "index.h" 5 | #include "delimiter/delimiter_token.h" 6 | #include "end_of_file/end_of_file_token.h" 7 | #include "identifier/identifier_token.h" 8 | #include "integer/integer_token.h" 9 | #include "keyword/keyword_token.h" 10 | #include "operator/operator_token.h" 11 | 12 | #endif -------------------------------------------------------------------------------- /src/compiler/token/identifier/identifier_token.h: -------------------------------------------------------------------------------- 1 | #ifndef IDENTIFIER_TOKEN_H 2 | #define IDENTIFIER_TOKEN_H 3 | 4 | #include 5 | #include "../index.h" 6 | 7 | class IdentifierToken : public Token 8 | { 9 | public: 10 | static bool is_identifier(Token token); 11 | IdentifierToken(std::string value, int line_number); 12 | }; 13 | 14 | #endif -------------------------------------------------------------------------------- /src/compiler/frontend/scanner/state_transition_table/table_errors.cpp: -------------------------------------------------------------------------------- 1 | #include "table_errors.h" 2 | 3 | bool is_table_error(int num) 4 | { 5 | switch (num) { 6 | case InvalidCharacter: 7 | case InvalidTransition: 8 | case MaxIntegerLength: 9 | case MaxIdentifierLength: 10 | return true; 11 | default: 12 | return false; 13 | } 14 | } -------------------------------------------------------------------------------- /src/compiler/backend/optimizer/optimizer.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIMIZER_H 2 | #define OPTIMIZER_H 3 | 4 | #include 5 | #include 6 | 7 | class Optimizer 8 | { 9 | public: 10 | void optimize(std::string target_filename); 11 | bool is_line(std::string line, std::string target_keyword); 12 | std::vector split_line(std::string line); 13 | int to_int(std::string str); 14 | 15 | private: 16 | std::ifstream target; 17 | }; 18 | 19 | #endif -------------------------------------------------------------------------------- /src/compiler/token/token_type_id/token_type_id.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_TYPE_ID_H 2 | #define TOKEN_TYPE_ID_H 3 | 4 | #include 5 | 6 | typedef enum { 7 | DelimiterTokenId = 1000, 8 | EndOfFileTokenId = 1001, 9 | IdentifierTokenId = 1002, 10 | IntegerTokenId = 1003, 11 | KeywordTokenId = 1004, 12 | OperatorTokenId = 1005, 13 | DefaultTokenTypeId = 1006 14 | } TokenTypeId; 15 | 16 | bool is_token_type_id(int num); 17 | 18 | std::string get_token_type_name(int id); 19 | 20 | #endif -------------------------------------------------------------------------------- /src/compiler/backend/var_stack/var_stack.h: -------------------------------------------------------------------------------- 1 | #ifndef VAR_STACK 2 | #define VAR_STACK 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../../token/token.h" 9 | 10 | class VarStack 11 | { 12 | public: 13 | VarStack(); 14 | void push(); 15 | void pop(); 16 | void insert(Token var); 17 | void erase(Token var); 18 | int find(Token var); 19 | int num_vars_in_current_scope(); 20 | 21 | private: 22 | std::deque> var_stack; 23 | 24 | }; 25 | 26 | #endif -------------------------------------------------------------------------------- /src/compiler/frontend/scanner/scanner.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANNER_H 2 | #define SCANNER_H 3 | 4 | #include "../../token/token.h" 5 | 6 | class Scanner 7 | { 8 | public: 9 | Scanner(std::string filename); 10 | ~Scanner(); 11 | Token read(); 12 | 13 | private: 14 | std::ifstream file; 15 | int line_number; 16 | char next_char; 17 | bool is_eof_reached; 18 | void check_file(std::ifstream& file, std::string filename); 19 | void close_file(); 20 | void check_for_invalid_character(int state); 21 | void check_for_table_error(int state); 22 | void check_for_eof(); 23 | }; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/compiler/node/node_labels.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_LABELS_H 2 | #define NODE_LABELS_H 3 | 4 | #include 5 | 6 | const std::string START = "S"; 7 | const std::string BLOCK = "block"; 8 | const std::string VARS = "vars"; 9 | const std::string EXPR = "expr"; 10 | const std::string HASH = "H"; 11 | const std::string R_LETTER = "R"; 12 | const std::string STATS = "stats"; 13 | const std::string M_STAT = "m_stat"; 14 | const std::string STAT = "stat"; 15 | const std::string IN = "in"; 16 | const std::string OUT = "out"; 17 | const std::string IFSTAT = "ifstat"; 18 | const std::string LOOP = "loop"; 19 | const std::string ASSIGN = "assign"; 20 | const std::string OPERATOR = "O"; 21 | 22 | #endif -------------------------------------------------------------------------------- /src/compiler/compiler.cpp: -------------------------------------------------------------------------------- 1 | #include "compiler.h" 2 | 3 | #include "frontend/frontend.h" 4 | #include "backend/backend.h" 5 | 6 | 7 | void Compiler::compile(std::string filename) 8 | { 9 | Parser parser(filename); 10 | Node* parse_tree = parser.parse(); 11 | 12 | std::string basename = get_basename(filename); 13 | Backend backend(basename); 14 | backend.traverse(parse_tree); 15 | 16 | Optimizer optimizer; 17 | optimizer.optimize(basename + ASM_EXT); 18 | 19 | Node::destroy(parse_tree); 20 | } 21 | 22 | std::string Compiler::get_basename(std::string filename) 23 | { 24 | size_t last_index = filename.find_last_of('.'); 25 | return filename.substr(0, last_index); 26 | } -------------------------------------------------------------------------------- /src/compiler/node/node.h: -------------------------------------------------------------------------------- 1 | #ifndef NODE_H 2 | #define NODE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "../token/token.h" 8 | #include "node_labels.h" 9 | 10 | class Node 11 | { 12 | public: 13 | static Node* of(std::string label, int level); 14 | static void destroy(Node* node); 15 | void append_child(Node* child); 16 | void append_token(Token token); 17 | static void print(Node* node); 18 | std::vector get_identifier_tokens(); 19 | std::vector children; 20 | std::string label; 21 | std::vector tokens; 22 | int level; 23 | 24 | private: 25 | static void print_node(Node* node); 26 | static void print_children(Node* node); 27 | friend std::ostream& operator<<(std::ostream&, const Node&); 28 | }; 29 | 30 | #endif -------------------------------------------------------------------------------- /src/compiler/backend/code_generator/code_generator.h: -------------------------------------------------------------------------------- 1 | #ifndef CODE_GENERATOR_H 2 | #define CODE_GENERATOR_H 3 | 4 | #include 5 | #include 6 | 7 | #include "../target_keywords.h" 8 | 9 | const std::string ASM_EXT = ".asm"; 10 | 11 | class CodeGenerator 12 | { 13 | public: 14 | CodeGenerator(std::string base_filename); 15 | ~CodeGenerator(); 16 | void print_to_target(std::string str); 17 | void print_label(std::string label); 18 | std::string get_temp_var(); 19 | std::string get_and_store_temp_var(); 20 | std::string get_label(); 21 | void set_temp_vars_to_zero(); 22 | void close_target(); 23 | 24 | private: 25 | int temp_var_count; // Keep track of the number of temporary variables 26 | int label_count; 27 | std::ofstream target; 28 | }; 29 | 30 | #endif -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "timer.h" 7 | #include "compiler/compiler.h" 8 | 9 | std::string get_filename(int argc, char** argv); 10 | 11 | int main(int argc, char** argv) 12 | { 13 | timer timer_ = timer(); 14 | timer_.start("Timer started."); 15 | 16 | std::string filename = get_filename(argc, argv); 17 | 18 | Compiler compiler; 19 | compiler.compile(filename); 20 | 21 | std::cout << "OK\n"; 22 | 23 | timer_.stop("Timer stopped."); 24 | 25 | std::cout << "Total elapsed time: " << timer_.timeVal() << " seconds.\n"; 26 | 27 | return 0; 28 | } 29 | 30 | std::string get_filename(int argc, char** argv) 31 | { 32 | if (argc != 2) { 33 | fprintf(stderr, "Usage: %s filename\n", argv[0]); 34 | exit(1); 35 | } 36 | std::string filename(argv[1]); 37 | return filename; 38 | } -------------------------------------------------------------------------------- /src/compiler/token/index.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_INDEX_H 2 | #define TOKEN_INDEX_H 3 | 4 | #include 5 | #include 6 | #include "token_type_id/token_type_id.h" 7 | 8 | class Token 9 | { 10 | public: 11 | Token(); 12 | Token(TokenTypeId typeId, std::string value, int line_number); 13 | Token(TokenTypeId typeId, std::string value); 14 | Token(TokenTypeId typeId, char value); 15 | bool is_eof(); 16 | bool is_identifier(); 17 | bool is_integer(); 18 | std::string get_value(); 19 | 20 | private: 21 | TokenTypeId typeId; 22 | std::string value; 23 | int line_number; 24 | friend std::ostream& operator<<(std::ostream&, const Token&); 25 | friend bool operator==(const Token&, const Token&); 26 | friend bool operator!=(const Token&, const Token&); 27 | friend bool operator<(const Token&, const Token&); 28 | }; 29 | 30 | #endif -------------------------------------------------------------------------------- /src/compiler/token/operator/operator_token.h: -------------------------------------------------------------------------------- 1 | #ifndef OPERATOR_TOKEN_H 2 | #define OPERATOR_TOKEN_H 3 | 4 | #include 5 | #include 6 | #include "../index.h" 7 | 8 | class OperatorToken : public Token 9 | { 10 | public: 11 | static bool is_operator(char character); 12 | static bool is_addition_token(Token token); 13 | static bool is_subtraction_token(Token token); 14 | static bool is_multiplication_token(Token token); 15 | static bool is_division_token(Token token); 16 | static bool is_negation_token(Token token); 17 | static bool is_less_than_token(Token token); 18 | static bool is_greater_than_token(Token token); 19 | static bool is_equals_token(Token token); 20 | static bool is_assignment_token(Token token); 21 | OperatorToken(std::string value, int line_number); 22 | OperatorToken(char value); 23 | 24 | private: 25 | static std::set operators; 26 | }; 27 | 28 | #endif -------------------------------------------------------------------------------- /src/compiler/backend/target_keywords.h: -------------------------------------------------------------------------------- 1 | #ifndef TARGET_KEYWORDS_H 2 | #define TARGET_KEYWORDS_H 3 | 4 | #include 5 | 6 | const std::string PUSH = "PUSH"; 7 | const std::string POP = "POP"; 8 | const std::string STOP = "STOP"; 9 | const std::string STACK_READ = "STACKR"; 10 | const std::string STACK_WRITE = "STACKW"; 11 | const std::string LOAD = "LOAD"; 12 | const std::string STORE = "STORE"; 13 | const std::string WRITE = "WRITE"; 14 | const std::string READ = "READ"; 15 | const std::string ADD = "ADD"; 16 | const std::string SUB = "SUB"; 17 | const std::string MULT = "MULT"; 18 | const std::string DIV = "DIV"; 19 | const std::string BREAK_ZERO_OR_POSITIVE = "BRZPOS"; 20 | const std::string BREAK_ZERO_OR_NEGATIVE = "BRZNEG"; 21 | const std::string BREAK = "BR"; 22 | const std::string BREAK_ZERO = "BRZERO"; 23 | const std::string BREAK_POSITIVE = "BRPOS"; 24 | const std::string BREAK_NEGATIVE = "BRNEG"; 25 | 26 | const std::string TEMP_VAR_PREFIX = "T"; 27 | const std::string LABEL_PREFIX = "L"; 28 | 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/compiler/token/delimiter/delimiter_token.h: -------------------------------------------------------------------------------- 1 | #ifndef DELIMITER_TOKEN_H 2 | #define DELIMITER_TOKEN_H 3 | 4 | #include 5 | #include 6 | #include "../index.h" 7 | 8 | class DelimiterToken : public Token 9 | { 10 | public: 11 | static bool is_delimiter(char character); 12 | static bool is_dot_token(Token token); 13 | static bool is_left_parentheses_token(Token token); 14 | static bool is_right_parentheses_token(Token token); 15 | static bool is_comma_token(Token token); 16 | static bool is_left_curly_brace_token(Token token); 17 | static bool is_right_curly_brace_token(Token token); 18 | static bool is_semi_colon_token(Token token); 19 | static bool is_left_square_bracket_token(Token token); 20 | static bool is_right_square_bracket_token(Token token); 21 | DelimiterToken(std::string value, int line_number); 22 | DelimiterToken(char value); 23 | 24 | private: 25 | static std::set delimiters; 26 | }; 27 | 28 | #endif -------------------------------------------------------------------------------- /src/compiler/token/token_type_id/token_type_id.cpp: -------------------------------------------------------------------------------- 1 | #include "token_type_id.h" 2 | 3 | bool is_token_type_id(int num) 4 | { 5 | switch (num) { 6 | case DelimiterTokenId: 7 | case EndOfFileTokenId: 8 | case IdentifierTokenId: 9 | case IntegerTokenId: 10 | case KeywordTokenId: 11 | case OperatorTokenId: 12 | case DefaultTokenTypeId: 13 | return true; 14 | default: 15 | return false; 16 | } 17 | } 18 | 19 | std::string get_token_type_name(int id) 20 | { 21 | switch (id) { 22 | case DelimiterTokenId: 23 | return "DelimeterToken"; 24 | case EndOfFileTokenId: 25 | return "EndOfFileToken"; 26 | case IdentifierTokenId: 27 | return "IdentifierToken"; 28 | case IntegerTokenId: 29 | return "IntegerToken"; 30 | case KeywordTokenId: 31 | return "KeywordToken"; 32 | case OperatorTokenId: 33 | return "OperatorToken"; 34 | default: 35 | return "DefaultToken"; 36 | } 37 | } -------------------------------------------------------------------------------- /src/compiler/token/keyword/keyword_token.h: -------------------------------------------------------------------------------- 1 | #ifndef KEYWORD_TOKEN_H 2 | #define KEYWORD_TOKEN_H 3 | 4 | #include 5 | #include 6 | #include "../index.h" 7 | 8 | class KeywordToken : public Token 9 | { 10 | public: 11 | static bool is_keyword(std::string string); 12 | static bool is_start_token(Token token); 13 | static bool is_end_token(Token token); 14 | static bool is_iter_token(Token token); 15 | static bool is_void_token(Token token); 16 | static bool is_var_token(Token token); 17 | static bool is_return_token(Token token); 18 | static bool is_read_token(Token token); 19 | static bool is_print_token(Token token); 20 | static bool is_program_token(Token token); 21 | static bool is_if_token(Token token); 22 | static bool is_then_token(Token token); 23 | static bool is_let_token(Token token); 24 | KeywordToken(std::string value, int line_number); 25 | KeywordToken(std::string value); 26 | 27 | private: 28 | static std::set keywords; 29 | }; 30 | 31 | #endif -------------------------------------------------------------------------------- /src/compiler/frontend/parser/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | 4 | #include 5 | 6 | #include "../scanner/scanner.h" 7 | #include "../../token/token.h" 8 | #include "../../node/node.h" 9 | 10 | class Parser 11 | { 12 | public: 13 | Parser(std::string filename); 14 | ~Parser(); 15 | Node* parse(); 16 | 17 | private: 18 | Scanner* scanner; 19 | Token token; 20 | 21 | Node* S(); 22 | Node* block(int level); 23 | Node* vars(int level); 24 | Node* expr(int level); 25 | Node* H(int level); 26 | Node* R(int level); 27 | Node* stats(int level); 28 | Node* m_stat(int level); 29 | bool is_first_of_stats(Token token); 30 | Node* stat(int level); 31 | Node* in(int level); 32 | Node* out(int level); 33 | Node* ifstat(int level); 34 | Node* loop(int level); 35 | Node* assign(int level); 36 | Node* O(int level); 37 | bool is_O_token(Token token); 38 | void print_error_and_exit(); 39 | void check_for_comma_token(); 40 | }; 41 | 42 | #endif -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2018 G Roques 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /src/compiler/frontend/scanner/state_transition_table/state_transition_table.h: -------------------------------------------------------------------------------- 1 | #ifndef STATE_TRANSITION_TABLE_H 2 | #define STATE_TRANSITION_TABLE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "table_errors.h" 8 | 9 | #define COMMENT_CHAR '!' 10 | #define COMMENT_STATE 9 11 | 12 | class StateTransitionTable 13 | { 14 | public: 15 | static int get_next_state(int current_state, char character); 16 | static bool is_intermediate_state(int state); 17 | static bool is_final_state(int state); 18 | static bool is_error_state(int state); 19 | static bool is_invalid_char(char character); 20 | static bool is_valid_char(char character); 21 | static void print_error(int error, int line_number); 22 | static void print_invalid_char_error(char character, int line_number); 23 | static Token get_token(int state, std::string string, int line_number); 24 | 25 | private: 26 | static int get_column_index(char character); 27 | static const std::vector> table; 28 | static std::string get_error_message(int error); 29 | static std::string get_base_error_message(); 30 | }; 31 | 32 | #endif -------------------------------------------------------------------------------- /src/compiler/backend/var_stack/var_stack.cpp: -------------------------------------------------------------------------------- 1 | #include "var_stack.h" 2 | #include 3 | 4 | VarStack::VarStack() 5 | { 6 | var_stack.emplace_back(); 7 | } 8 | 9 | void VarStack::push() 10 | { 11 | var_stack.emplace_back(); 12 | } 13 | 14 | void VarStack::pop() 15 | { 16 | var_stack.pop_back(); 17 | } 18 | 19 | void VarStack::insert(Token var) 20 | { 21 | var_stack.back().insert(var); 22 | } 23 | 24 | void VarStack::erase(Token var) 25 | { 26 | var_stack.back().erase(var); 27 | } 28 | 29 | /** 30 | * Finds a variable and returns the distance from the top of the stack. 31 | * 32 | * Returns -1 if not found. 33 | */ 34 | int VarStack::find(Token var) 35 | { 36 | int position_from_top_of_stack = 0; 37 | for (int i = var_stack.size() - 1; i >= 0; i--) { 38 | std::set tokens = var_stack[i]; 39 | for (auto token = tokens.rbegin(); token != tokens.rend(); ++token) { 40 | if (var == *token) { 41 | return position_from_top_of_stack; 42 | } 43 | position_from_top_of_stack++; 44 | } 45 | position_from_top_of_stack++; 46 | } 47 | return -1; 48 | } 49 | 50 | int VarStack::num_vars_in_current_scope() 51 | { 52 | std::set current_scope = var_stack[var_stack.size() - 1]; 53 | return current_scope.size(); 54 | } -------------------------------------------------------------------------------- /src/compiler/backend/code_generator/code_generator.cpp: -------------------------------------------------------------------------------- 1 | #include "code_generator.h" 2 | #include 3 | 4 | CodeGenerator::CodeGenerator(std::string base_filename) 5 | { 6 | target.open(base_filename + ASM_EXT); 7 | temp_var_count = 0; 8 | label_count = 0; 9 | } 10 | 11 | CodeGenerator::~CodeGenerator() 12 | { 13 | target.close(); 14 | } 15 | 16 | void CodeGenerator::print_to_target(std::string str) 17 | { 18 | target << str << std::endl; 19 | } 20 | 21 | void CodeGenerator::print_label(std::string label) 22 | { 23 | target << label << ": NOOP" << std::endl; 24 | } 25 | 26 | std::string CodeGenerator::get_temp_var() 27 | { 28 | std::string temp_var = TEMP_VAR_PREFIX + std::to_string(temp_var_count); 29 | temp_var_count++; 30 | return temp_var; 31 | } 32 | 33 | std::string CodeGenerator::get_and_store_temp_var() 34 | { 35 | std::string temp_var = get_temp_var(); 36 | print_to_target(STORE + " " + temp_var); 37 | return temp_var; 38 | } 39 | 40 | void CodeGenerator::set_temp_vars_to_zero() 41 | { 42 | for (int i = 0; i < temp_var_count; i++) { 43 | target << TEMP_VAR_PREFIX << i << " 0\n"; 44 | } 45 | } 46 | 47 | std::string CodeGenerator::get_label() 48 | { 49 | std::string label = LABEL_PREFIX + std::to_string(label_count); 50 | label_count++; 51 | return label; 52 | } 53 | 54 | void CodeGenerator::close_target() 55 | { 56 | target.close(); 57 | } 58 | -------------------------------------------------------------------------------- /assets/state-transition-table.csv: -------------------------------------------------------------------------------- 1 | 0-9,!,+ - * / < > : #,= . ( ) comma { } ; [ ],a-z,A-Z,EoF,White Space 2 | 1,9,10,11,12,Error,EoFTk,0 3 | 2,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 4 | 3,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 5 | 4,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 6 | 5,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 7 | 6,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 8 | 7,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 9 | 8,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 10 | Error,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk,IntegerTk 11 | 9,0,9,9,9,9,9,9 12 | OperatorTk,OperatorTk,OperatorTk,OperatorTk,OperatorTk,OperatorTk,OperatorTk,OperatorTk 13 | DelimeterTk,DelimeterTk,DelimeterTk,DelimeterTk,DelimeterTk,DelimeterTk,DelimeterTk,DelimeterTk 14 | 13,IdentifierTk,IdentifierTk,IdentifierTk,13,13,IdentifierTk,IdentifierTk 15 | 14,IdentifierTk,IdentifierTk,IdentifierTk,14,14,IdentifierTk,IdentifierTk 16 | 15,IdentifierTk,IdentifierTk,IdentifierTk,15,15,IdentifierTk,IdentifierTk 17 | 16,IdentifierTk,IdentifierTk,IdentifierTk,16,16,IdentifierTk,IdentifierTk 18 | 17,IdentifierTk,IdentifierTk,IdentifierTk,17,17,IdentifierTk,IdentifierTk 19 | 18,IdentifierTk,IdentifierTk,IdentifierTk,18,18,IdentifierTk,IdentifierTk 20 | 19,IdentifierTk,IdentifierTk,IdentifierTk,19,19,IdentifierTk,IdentifierTk 21 | Error,IdentifierTk,IdentifierTk,IdentifierTk,Error,Error,IdentifierTk,IdentifierTk -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # A simple Make file with the following functionality: 2 | # 3 | # * Out-of-source builds (object files get dumped in a separate directory from the source) 4 | # * Automatic (and accurate!) header dependencies 5 | # * Automatic determination of list of object/source files 6 | # * Automatic generation of include directory flags 7 | # 8 | # Source: https://spin.atomicobject.com/2016/08/26/makefile-c-projects/ 9 | 10 | TARGET_EXEC ?= comp 11 | 12 | BUILD_DIR ?= ./build 13 | 14 | SRC_DIRS ?= ./src 15 | 16 | SRCS := $(shell find $(SRC_DIRS) -name *.cpp -or -name *.c -or -name *.s) 17 | OBJS := $(SRCS:%=$(BUILD_DIR)/%.o) 18 | DEPS := $(OBJS:.o=.d) 19 | 20 | INC_DIRS := $(shell find $(SRC_DIRS) -type d) 21 | INC_FLAGS := $(addprefix -I,$(INC_DIRS)) 22 | # -I Search for header files in include directories 23 | 24 | CPPFLAGS ?= $(INC_FLAGS) -MMD -MP 25 | 26 | CXXFLAGS=-g -std=c++11 -Wall 27 | # -g Enable extra debugging information for GDB 28 | # -std=c++11 Enable C++11 29 | # -Wall Enable all warning flags 30 | 31 | $(BUILD_DIR)/$(TARGET_EXEC): $(OBJS) 32 | $(CXX) $(OBJS) -o $(TARGET_EXEC) $(LDFLAGS) 33 | 34 | # Assembly 35 | $(BUILD_DIR)/%.s.o: %.s 36 | $(MKDIR_P) $(dir $@) 37 | $(AS) $(ASFLAGS) -c $< -o $@ 38 | 39 | # C source 40 | $(BUILD_DIR)/%.c.o: %.c 41 | $(MKDIR_P) $(dir $@) 42 | $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@ 43 | 44 | # C++ source 45 | $(BUILD_DIR)/%.cpp.o: %.cpp 46 | $(MKDIR_P) $(dir $@) 47 | $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ 48 | 49 | 50 | .PHONY: clean 51 | 52 | clean: 53 | $(RM) -r $(BUILD_DIR) $(TARGET_EXEC) 54 | 55 | -include $(DEPS) 56 | 57 | MKDIR_P ?= mkdir -p 58 | -------------------------------------------------------------------------------- /src/compiler/token/index.cpp: -------------------------------------------------------------------------------- 1 | #include "index.h" 2 | 3 | Token::Token() 4 | { 5 | this->typeId = DefaultTokenTypeId; 6 | this->value = ""; 7 | this->line_number = 0; 8 | } 9 | 10 | Token::Token(TokenTypeId typeId, std::string value, int line_number) 11 | { 12 | this->typeId = typeId; 13 | this->value = value; 14 | this->line_number = line_number; 15 | } 16 | 17 | Token::Token(TokenTypeId typeId, std::string value) 18 | { 19 | this->typeId = typeId; 20 | this->value = value; 21 | this->line_number = 0; 22 | } 23 | 24 | Token::Token(TokenTypeId typeId, char value) 25 | { 26 | std::string string_value(1, value); 27 | 28 | this->typeId = typeId; 29 | this->value = string_value; 30 | this->line_number = 0; 31 | } 32 | 33 | bool Token::is_eof() 34 | { 35 | return this->typeId == EndOfFileTokenId; 36 | } 37 | 38 | bool Token::is_identifier() 39 | { 40 | return this->typeId == IdentifierTokenId; 41 | } 42 | 43 | bool Token::is_integer() 44 | { 45 | return this->typeId == IntegerTokenId; 46 | } 47 | 48 | std::string Token::get_value() 49 | { 50 | return value; 51 | } 52 | 53 | std::ostream& operator<<(std::ostream &stream, const Token &token) 54 | { 55 | std::string token_type = get_token_type_name(token.typeId); 56 | return stream << token_type << "(" << token.value << ") on line " << token.line_number; 57 | } 58 | 59 | bool operator==(const Token &t1, const Token &t2) 60 | { 61 | return t1.typeId == t2.typeId && t1.value == t2.value; 62 | } 63 | 64 | bool operator!=(const Token &t1, const Token &t2) 65 | { 66 | return t1.typeId != t2.typeId || t1.value != t2.value; 67 | } 68 | 69 | bool operator<(const Token &t1, const Token &t2) 70 | { 71 | return t1.value < t2.value; 72 | } -------------------------------------------------------------------------------- /src/compiler/node/node.cpp: -------------------------------------------------------------------------------- 1 | #include "node.h" 2 | #include 3 | #include 4 | 5 | Node* Node::of(std::string label, int level) 6 | { 7 | Node* node = new Node(); 8 | node->label = label; 9 | node->level = level; 10 | return node; 11 | } 12 | 13 | void Node::append_child(Node* child) 14 | { 15 | this->children.push_back(child); 16 | } 17 | 18 | void Node::append_token(Token token) 19 | { 20 | this->tokens.push_back(token); 21 | } 22 | 23 | void Node::destroy(Node* node) 24 | { 25 | if (node == NULL) { 26 | return; 27 | } 28 | std::vector children = node->children; 29 | std::for_each(children.begin(), children.end(), &destroy); 30 | delete node; 31 | } 32 | 33 | void Node::print(Node* node) 34 | { 35 | if (node == NULL) { 36 | return; 37 | } 38 | 39 | print_node(node); 40 | 41 | print_children(node); 42 | } 43 | 44 | void Node::print_node(Node* node) 45 | { 46 | printf("%*c%d:%-9s ", node->level, ' ', node->level, node->label.c_str()); 47 | for (auto i = node->tokens.begin(); i != node->tokens.end(); ++i) 48 | std::cout << *i << ' '; 49 | printf("\n"); 50 | } 51 | 52 | void Node::print_children(Node* node) 53 | { 54 | std::vector children = node->children; 55 | for (unsigned int i = 0; i < children.size(); i++) { 56 | print(children.at(i)); 57 | } 58 | } 59 | 60 | std::vector Node::get_identifier_tokens() 61 | { 62 | std::vector identifier_tokens; 63 | for (auto token : tokens) { 64 | if (token.is_identifier()) { 65 | identifier_tokens.push_back(token); 66 | } 67 | } 68 | return identifier_tokens; 69 | } 70 | 71 | 72 | std::ostream& operator<<(std::ostream &stream, const Node &node) 73 | { 74 | return stream << node.label; 75 | } 76 | -------------------------------------------------------------------------------- /src/compiler/backend/backend.h: -------------------------------------------------------------------------------- 1 | #ifndef BACKEND_H 2 | #define BACKEND_H 3 | 4 | #include 5 | 6 | #include "../node/node.h" 7 | #include "var_stack/var_stack.h" 8 | #include "code_generator/code_generator.h" 9 | #include "optimizer/optimizer.h" 10 | 11 | /** 12 | * Backend part of compiler responsible for: 13 | * - static semantics 14 | * - storage allocation 15 | * - and code generation 16 | */ 17 | 18 | class Backend 19 | { 20 | public: 21 | Backend(std::string base_filename); 22 | void traverse(Node* node); 23 | 24 | private: 25 | VarStack var_stack; 26 | CodeGenerator* code_generator; 27 | const static std::set new_scope_labels; 28 | const static std::set labels_containing_expr; 29 | void check_for_variables(Node* node); 30 | void check_location(int location, std::string err_msg, Token token); 31 | void print_error_and_exit(std::string msg, Token token); 32 | void traverse_children(Node* node); 33 | void pop_vars_in_current_scope(); 34 | void traverse_child(Node* node, int child_index); 35 | static bool introduces_new_scope(Node* node); 36 | static bool contains_variable_declarations(Node* node); 37 | static bool contains_expression(Node* node); 38 | static bool does_not_contain_expression(Node* node); 39 | void check_for_assignments(Node* node, int location); 40 | void check_for_input_statements(Node* node, int location); 41 | void check_for_negation(Node* node); 42 | void check_for_r_letter(Node* node); 43 | void check_for_expr(Node* node); 44 | std::string get_operation(Token token); 45 | void check_for_print_statements(Node* node); 46 | void check_for_ifstat(Node* node); 47 | void check_for_loop(Node* node); 48 | std::vector get_break_conditions(Token token); 49 | void evaluate_condition(Node* node, std::string out_label); 50 | }; 51 | 52 | #endif -------------------------------------------------------------------------------- /src/compiler/backend/optimizer/optimizer.cpp: -------------------------------------------------------------------------------- 1 | #include "optimizer.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "../target_keywords.h" 8 | #include 9 | 10 | void Optimizer::optimize(std::string target_filename) 11 | { 12 | target.open(target_filename); 13 | std::ofstream tmp; 14 | std::string tmp_filename = "tmp" + target_filename; 15 | tmp.open(tmp_filename); 16 | 17 | std::string prev_line; 18 | std::string line; 19 | while (std::getline(target, line)) { 20 | bool print_to_tmp = true; 21 | if (is_line(prev_line, STACK_WRITE) && is_line(line, STACK_READ)) { 22 | std::vector prev_words = split_line(prev_line); 23 | std::vector words = split_line(line); 24 | int prev_location = to_int(words[1]); 25 | int location = to_int(words[1]); 26 | if (prev_location == location) { 27 | print_to_tmp = false; 28 | } 29 | } 30 | if (print_to_tmp) { 31 | tmp << line << std::endl; 32 | } 33 | prev_line = line; 34 | } 35 | std::getline(target, line); 36 | 37 | tmp.close(); 38 | target.close(); 39 | 40 | if (remove(target_filename.c_str()) != 0) { 41 | perror("Error deleting file"); 42 | } 43 | 44 | if (rename(tmp_filename.c_str(), target_filename.c_str()) != 0) { 45 | perror("Error renaming file"); 46 | } 47 | } 48 | 49 | bool Optimizer::is_line(std::string line, std::string target_keyword) 50 | { 51 | return line.find(target_keyword) != std::string::npos; 52 | } 53 | 54 | std::vector Optimizer::split_line(std::string line) 55 | { 56 | std::istringstream iss(line); 57 | std::vector words((std::istream_iterator(iss)), 58 | std::istream_iterator()); 59 | return words; 60 | } 61 | 62 | int Optimizer::to_int(std::string str) 63 | { 64 | int num; 65 | std::istringstream iss(str); 66 | iss >> num; 67 | return num; 68 | } -------------------------------------------------------------------------------- /src/compiler/token/operator/operator_token.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "operator_token.h" 3 | 4 | const char ADDITION = '+'; 5 | const char SUBTRACTION = '-'; 6 | const char MULTIPLICATION = '*'; 7 | const char DIVISION = '/'; 8 | const char NEGATION = '#'; 9 | const char LESS_THAN = '<'; 10 | const char GREATER_THAN = '>'; 11 | const char ASSIGNMENT = '='; 12 | const char EQUALS = ':'; 13 | 14 | bool OperatorToken::is_operator(char character) 15 | { 16 | return operators.find(character) != operators.end(); 17 | } 18 | 19 | bool OperatorToken::is_addition_token(Token token) 20 | { 21 | Token addition_token = OperatorToken(ADDITION); 22 | return token == addition_token; 23 | } 24 | 25 | bool OperatorToken::is_subtraction_token(Token token) 26 | { 27 | Token subtraction_token = OperatorToken(SUBTRACTION); 28 | return token == subtraction_token; 29 | } 30 | 31 | bool OperatorToken::is_multiplication_token(Token token) 32 | { 33 | Token multiplication_token = OperatorToken(MULTIPLICATION); 34 | return token == multiplication_token; 35 | } 36 | 37 | bool OperatorToken::is_division_token(Token token) 38 | { 39 | Token division_token = OperatorToken(DIVISION); 40 | return token == division_token; 41 | } 42 | 43 | bool OperatorToken::is_negation_token(Token token) 44 | { 45 | Token negation_token = OperatorToken(NEGATION); 46 | return token == negation_token; 47 | } 48 | 49 | bool OperatorToken::is_less_than_token(Token token) 50 | { 51 | Token less_than_token = OperatorToken(LESS_THAN); 52 | return token == less_than_token; 53 | } 54 | 55 | bool OperatorToken::is_greater_than_token(Token token) 56 | { 57 | Token greater_than_token = OperatorToken(GREATER_THAN); 58 | return token == greater_than_token; 59 | } 60 | 61 | bool OperatorToken::is_equals_token(Token token) 62 | { 63 | Token equals_token = OperatorToken(EQUALS); 64 | return token == equals_token; 65 | } 66 | 67 | bool OperatorToken::is_assignment_token(Token token) 68 | { 69 | Token assignment_token = OperatorToken(ASSIGNMENT); 70 | return token == assignment_token; 71 | } 72 | 73 | OperatorToken::OperatorToken(std::string value, int line_number) 74 | : Token(OperatorTokenId, value, line_number) {} 75 | 76 | OperatorToken::OperatorToken(char value) 77 | : Token(OperatorTokenId, value) {} 78 | 79 | std::set OperatorToken::operators = { 80 | ADDITION, 81 | SUBTRACTION, 82 | MULTIPLICATION, 83 | DIVISION, 84 | NEGATION, 85 | LESS_THAN, 86 | GREATER_THAN, 87 | EQUALS, 88 | ASSIGNMENT 89 | }; 90 | -------------------------------------------------------------------------------- /src/compiler/frontend/scanner/scanner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "scanner.h" 6 | #include "state_transition_table/state_transition_table.h" 7 | 8 | Scanner::Scanner(std::string filename) 9 | { 10 | file.open(filename); 11 | check_file(file, filename); 12 | line_number = 1; 13 | next_char = ' '; 14 | is_eof_reached = false; 15 | } 16 | 17 | Scanner::~Scanner() 18 | { 19 | close_file(); 20 | } 21 | 22 | void Scanner::check_file(std::ifstream& file, std::string filename) 23 | { 24 | if (!file) { 25 | std::cerr << "Error: Cannot open file '" << filename << "'.\n"; 26 | exit(1); 27 | } 28 | } 29 | 30 | void Scanner::close_file() 31 | { 32 | file.close(); 33 | } 34 | 35 | Token Scanner::read() 36 | { 37 | int state = 0; 38 | int next_state = StateTransitionTable::get_next_state(state, next_char); 39 | std::string string = ""; 40 | 41 | do { 42 | check_for_eof(); 43 | 44 | check_for_invalid_character(state); 45 | 46 | next_state = StateTransitionTable::get_next_state(state, next_char); 47 | 48 | check_for_table_error(next_state); 49 | 50 | if (StateTransitionTable::is_final_state(next_state)) { 51 | return StateTransitionTable::get_token(next_state, string, line_number); 52 | } else { 53 | state = next_state; 54 | if (!isspace(next_char) && next_char != COMMENT_CHAR && state != COMMENT_STATE) { 55 | string += next_char; 56 | } 57 | } 58 | if (next_char == '\n') { 59 | line_number++; 60 | } 61 | } while (file >> std::noskipws >> next_char || !is_eof_reached); 62 | 63 | return EndOfFileToken(line_number); 64 | } 65 | 66 | void Scanner::check_for_invalid_character(int state) 67 | { 68 | if (StateTransitionTable::is_invalid_char(next_char) && state != COMMENT_STATE) { 69 | StateTransitionTable::print_invalid_char_error(next_char, line_number); 70 | exit(1); 71 | } 72 | } 73 | 74 | void Scanner::check_for_table_error(int state) 75 | { 76 | if (is_table_error(state)) { 77 | StateTransitionTable::print_error(state, line_number); 78 | exit(1); 79 | } 80 | } 81 | 82 | /** 83 | * Since we can't read in an EOF character, 84 | * we need to read in some last character to 85 | * properly recognize tokens at the end of a file. 86 | * The space ' ' character is arbitrary. 87 | */ 88 | void Scanner::check_for_eof() 89 | { 90 | if (file.eof()) { 91 | next_char = ' '; 92 | is_eof_reached = true; 93 | } 94 | } -------------------------------------------------------------------------------- /src/compiler/token/delimiter/delimiter_token.cpp: -------------------------------------------------------------------------------- 1 | #include "delimiter_token.h" 2 | 3 | const char DOT = '.'; 4 | const char LEFT_PARENTHESES = '('; 5 | const char RIGHT_PARENTHESES = ')'; 6 | const char COMMA = ','; 7 | const char LEFT_CURLY_BRACE = '{'; 8 | const char RIGHT_CURLY_BRACE = '}'; 9 | const char SEMI_COLON = ';'; 10 | const char LEFT_SQUARE_BRACKET = '['; 11 | const char RIGHT_SQUARE_BRACKET = ']'; 12 | 13 | bool DelimiterToken::is_delimiter(char character) 14 | { 15 | return delimiters.find(character) != delimiters.end(); 16 | } 17 | 18 | bool DelimiterToken::is_dot_token(Token token) 19 | { 20 | Token dot_token = DelimiterToken(DOT); 21 | return token == dot_token; 22 | } 23 | 24 | bool DelimiterToken::is_left_parentheses_token(Token token) 25 | { 26 | Token left_parentheses_token = DelimiterToken(LEFT_PARENTHESES); 27 | return token == left_parentheses_token; 28 | } 29 | 30 | bool DelimiterToken::is_right_parentheses_token(Token token) 31 | { 32 | Token right_parentheses_token = DelimiterToken(RIGHT_PARENTHESES); 33 | return token == right_parentheses_token; 34 | } 35 | 36 | bool DelimiterToken::is_comma_token(Token token) 37 | { 38 | Token comma_token = DelimiterToken(COMMA); 39 | return token == comma_token; 40 | } 41 | 42 | bool DelimiterToken::is_left_curly_brace_token(Token token) 43 | { 44 | Token left_curly_brace_token = DelimiterToken(LEFT_CURLY_BRACE); 45 | return token == left_curly_brace_token; 46 | } 47 | 48 | bool DelimiterToken::is_right_curly_brace_token(Token token) 49 | { 50 | Token right_curly_brace_token = DelimiterToken(RIGHT_CURLY_BRACE); 51 | return token == right_curly_brace_token; 52 | } 53 | 54 | bool DelimiterToken::is_semi_colon_token(Token token) 55 | { 56 | Token semi_colon_token = DelimiterToken(SEMI_COLON); 57 | return token == semi_colon_token; 58 | } 59 | 60 | bool DelimiterToken::is_left_square_bracket_token(Token token) 61 | { 62 | Token left_square_bracket_token = DelimiterToken(LEFT_SQUARE_BRACKET); 63 | return token == left_square_bracket_token; 64 | } 65 | 66 | bool DelimiterToken::is_right_square_bracket_token(Token token) 67 | { 68 | Token right_square_bracket_token = DelimiterToken(RIGHT_SQUARE_BRACKET); 69 | return token == right_square_bracket_token; 70 | } 71 | 72 | DelimiterToken::DelimiterToken(std::string value, int line_number) 73 | : Token(DelimiterTokenId, value, line_number) {} 74 | 75 | DelimiterToken::DelimiterToken(char value) 76 | : Token(DelimiterTokenId, value) {} 77 | 78 | std::set DelimiterToken::delimiters = { 79 | DOT, 80 | LEFT_PARENTHESES, 81 | RIGHT_PARENTHESES, 82 | COMMA, 83 | LEFT_CURLY_BRACE, 84 | RIGHT_CURLY_BRACE, 85 | SEMI_COLON, 86 | LEFT_SQUARE_BRACKET, 87 | RIGHT_SQUARE_BRACKET 88 | }; -------------------------------------------------------------------------------- /src/compiler/token/keyword/keyword_token.cpp: -------------------------------------------------------------------------------- 1 | #include "keyword_token.h" 2 | 3 | const std::string START = "start"; 4 | const std::string END = "end"; 5 | const std::string ITER = "iter"; 6 | const std::string VOID = "void"; 7 | const std::string VAR = "var"; 8 | const std::string RETURN = "return"; 9 | const std::string READ = "read"; 10 | const std::string PRINT = "print"; 11 | const std::string PROGRAM = "program"; 12 | const std::string IF = "if"; 13 | const std::string THEN = "then"; 14 | const std::string LET = "let"; 15 | 16 | bool KeywordToken::is_keyword(std::string string) 17 | { 18 | return keywords.find(string) != keywords.end(); 19 | } 20 | 21 | bool KeywordToken::is_start_token(Token token) 22 | { 23 | KeywordToken start_token = KeywordToken(START); 24 | return token == start_token; 25 | } 26 | 27 | bool KeywordToken::is_end_token(Token token) 28 | { 29 | KeywordToken end_token = KeywordToken(END); 30 | return token == end_token; 31 | } 32 | 33 | bool KeywordToken::is_iter_token(Token token) 34 | { 35 | KeywordToken iter_token = KeywordToken(ITER); 36 | return token == iter_token; 37 | } 38 | 39 | bool KeywordToken::is_void_token(Token token) 40 | { 41 | KeywordToken void_token = KeywordToken(VOID); 42 | return token == void_token; 43 | } 44 | 45 | bool KeywordToken::is_var_token(Token token) 46 | { 47 | KeywordToken var_token = KeywordToken(VAR); 48 | return token == var_token; 49 | } 50 | 51 | bool KeywordToken::is_return_token(Token token) 52 | { 53 | KeywordToken return_token = KeywordToken(RETURN); 54 | return token == return_token; 55 | } 56 | 57 | bool KeywordToken::is_read_token(Token token) 58 | { 59 | KeywordToken read_token = KeywordToken(READ); 60 | return token == read_token; 61 | } 62 | 63 | bool KeywordToken::is_print_token(Token token) 64 | { 65 | KeywordToken print_token = KeywordToken(PRINT); 66 | return token == print_token; 67 | } 68 | 69 | bool KeywordToken::is_program_token(Token token) 70 | { 71 | KeywordToken program_token = KeywordToken(PROGRAM); 72 | return token == program_token; 73 | } 74 | 75 | bool KeywordToken::is_if_token(Token token) 76 | { 77 | KeywordToken if_token = KeywordToken(IF); 78 | return token == if_token; 79 | } 80 | 81 | bool KeywordToken::is_then_token(Token token) 82 | { 83 | KeywordToken then_token = KeywordToken(THEN); 84 | return token == then_token; 85 | } 86 | 87 | bool KeywordToken::is_let_token(Token token) 88 | { 89 | KeywordToken let_token = KeywordToken(LET); 90 | return token == let_token; 91 | } 92 | 93 | KeywordToken::KeywordToken(std::string value, int line_number) 94 | : Token(KeywordTokenId, value, line_number) {} 95 | 96 | KeywordToken::KeywordToken(std::string value) 97 | : Token(KeywordTokenId, value) {} 98 | 99 | std::set KeywordToken::keywords = { 100 | START, 101 | END, 102 | ITER, 103 | VOID, 104 | VAR, 105 | RETURN, 106 | READ, 107 | PRINT, 108 | PROGRAM, 109 | IF, 110 | THEN, 111 | LET 112 | }; -------------------------------------------------------------------------------- /src/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef TIMER_H 2 | #define TIMER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class timer 9 | { 10 | friend std::ostream& operator<<(std::ostream& os, timer& t); 11 | 12 | private: 13 | bool running; 14 | clock_t start_clock; 15 | time_t start_time; 16 | double acc_time; 17 | 18 | double elapsed_time(); 19 | 20 | public: 21 | 22 | // 'running' is initially false. A timer needs to be explicitly started 23 | // using 'start' or 'restart' 24 | timer() : running(false), start_clock(0), start_time(0), acc_time(0) { } 25 | 26 | void start(const char* msg = 0); 27 | void restart(const char* msg = 0); 28 | void stop(const char* msg = 0); 29 | void check(const char* msg = 0); 30 | double timeVal(); // added 12/12/07 31 | 32 | }; // class timer 33 | 34 | //=========================================================================== 35 | // added 12/12/07 36 | 37 | inline double timer::timeVal() 38 | { 39 | time_t acc_sec = time(0) - start_time; 40 | if (acc_sec < 3600) 41 | return (clock() - start_clock) / (1.0 * CLOCKS_PER_SEC); 42 | else 43 | return (1.0 * acc_sec); 44 | 45 | } 46 | 47 | //=========================================================================== 48 | // Return the total time that the timer has been in the "running" 49 | // state since it was first "started" or last "restarted". For 50 | // "short" time periods (less than an hour), the actual cpu time 51 | // used is reported instead of the elapsed time. 52 | 53 | inline double timer::elapsed_time() 54 | { 55 | time_t acc_sec = time(0) - start_time; 56 | if (acc_sec < 3600) 57 | return (clock() - start_clock) / (1.0 * CLOCKS_PER_SEC); 58 | else 59 | return (1.0 * acc_sec); 60 | 61 | } // timer::elapsed_time 62 | 63 | //=========================================================================== 64 | // Start a timer. If it is already running, let it continue running. 65 | // Print an optional message. 66 | 67 | inline void timer::start(const char* msg) 68 | { 69 | // Print an optional message, something like "Starting timer t"; 70 | if (msg) std::cout << msg << std::endl; 71 | 72 | // Return immediately if the timer is already running 73 | if (running) return; 74 | 75 | // Set timer status to running and set the start time 76 | running = true; 77 | start_clock = clock(); 78 | start_time = time(0); 79 | 80 | } // timer::start 81 | 82 | //=========================================================================== 83 | // Turn the timer off and start it again from 0. Print an optional message. 84 | 85 | inline void timer::restart(const char* msg) 86 | { 87 | // Print an optional message, something like "Restarting timer t"; 88 | if (msg) std::cout << msg << std::endl; 89 | 90 | // Set timer status to running, reset accumulated time, and set start time 91 | running = true; 92 | acc_time = 0; 93 | start_clock = clock(); 94 | start_time = time(0); 95 | 96 | } // timer::restart 97 | 98 | //=========================================================================== 99 | // Stop the timer and print an optional message. 100 | 101 | inline void timer::stop(const char* msg) 102 | { 103 | // Print an optional message, something like "Stopping timer t"; 104 | if (msg) std::cout << msg << std::endl; 105 | 106 | // Compute accumulated running time and set timer status to not running 107 | if (running) acc_time += elapsed_time(); 108 | running = false; 109 | 110 | } // timer::stop 111 | 112 | //=========================================================================== 113 | // Print out an optional message followed by the current timer timing. 114 | 115 | inline void timer::check(const char* msg) 116 | { 117 | // Print an optional message, something like "Checking timer t"; 118 | if (msg) std::cout << msg << " : "; 119 | 120 | std::cout << "Elapsed time [" << std::setiosflags(std::ios::fixed) 121 | << std::setprecision(2) 122 | << acc_time + (running ? elapsed_time() : 0) << "] seconds\n"; 123 | 124 | } // timer::check 125 | 126 | //=========================================================================== 127 | // Allow timers to be printed to ostreams using the syntax 'os << t' 128 | // for an ostream 'os' and a timer 't'. For example, "cout << t" will 129 | // print out the total amount of time 't' has been "running". 130 | 131 | inline std::ostream& operator<<(std::ostream& os, timer& t) 132 | { 133 | os << std::setprecision(2) << std::setiosflags(std::ios::fixed) 134 | << t.acc_time + (t.running ? t.elapsed_time() : 0); 135 | return os; 136 | } 137 | 138 | //=========================================================================== 139 | 140 | #endif // TIMER_H 141 | 142 | -------------------------------------------------------------------------------- /src/compiler/frontend/scanner/state_transition_table/state_transition_table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "../../token/token.h" 3 | #include "state_transition_table.h" 4 | 5 | int StateTransitionTable::get_next_state(int current_state, char character) 6 | { 7 | int column_index = get_column_index(character); 8 | return table[current_state][column_index]; 9 | } 10 | 11 | int StateTransitionTable::get_column_index(char character) 12 | { 13 | if (IntegerToken::is_integer(character)) { 14 | return 0; 15 | } else if (COMMENT_CHAR == character) { 16 | return 1; 17 | } else if (OperatorToken::is_operator(character)) { 18 | return 2; 19 | } else if (DelimiterToken::is_delimiter(character)) { 20 | return 3; 21 | } else if (islower(character) && isalpha(character)) { 22 | return 4; 23 | } else if (isupper(character) && isalpha(character)) { 24 | return 5; 25 | } else if (EndOfFileToken::is_eof(character)) { 26 | return 6; 27 | } else if (isspace(character)) { 28 | return 7; 29 | } 30 | return -1; 31 | } 32 | 33 | bool StateTransitionTable::is_invalid_char(char character) 34 | { 35 | return !is_valid_char(character); 36 | } 37 | 38 | bool StateTransitionTable::is_valid_char(char character) 39 | { 40 | return (IntegerToken::is_integer(character) || 41 | COMMENT_CHAR == character || 42 | OperatorToken::is_operator(character) || 43 | DelimiterToken::is_delimiter(character) || 44 | isalpha(character) || 45 | EndOfFileToken::is_eof(character) || 46 | isspace(character)); 47 | } 48 | 49 | bool StateTransitionTable::is_intermediate_state(int state) 50 | { 51 | return !is_final_state(state); 52 | } 53 | 54 | bool StateTransitionTable::is_final_state(int state) 55 | { 56 | return is_token_type_id(state); 57 | } 58 | 59 | void StateTransitionTable::print_error(int error, int line_number) 60 | { 61 | std::string message = get_base_error_message(); 62 | message.append(get_error_message(error)); 63 | message.append(" on line "); 64 | std::cerr << message << line_number << std::endl; 65 | } 66 | 67 | 68 | void StateTransitionTable::print_invalid_char_error(char character, int line_number) 69 | { 70 | std::string message = get_base_error_message(); 71 | message.append(get_error_message(InvalidCharacter)); 72 | std::cerr << message << " " << character << " on line " << line_number << std::endl; 73 | } 74 | 75 | std::string StateTransitionTable::get_base_error_message() 76 | { 77 | return "Scanner Error: "; 78 | } 79 | 80 | std::string StateTransitionTable::get_error_message(int error) 81 | { 82 | std::string message; 83 | switch (error) { 84 | case InvalidCharacter: 85 | message = "Invalid Character"; 86 | break; 87 | case InvalidTransition: 88 | message = "Invalid transition"; 89 | break; 90 | case MaxIntegerLength: 91 | message = "Maximum integer length exceeded"; 92 | break; 93 | case MaxIdentifierLength: 94 | message = "Maximum identifier length exceeded"; 95 | break; 96 | } 97 | return message; 98 | } 99 | 100 | Token StateTransitionTable::get_token(int state, std::string string, int line_number) 101 | { 102 | switch (state) { 103 | case DelimiterTokenId: 104 | return DelimiterToken(string, line_number); 105 | case EndOfFileTokenId: 106 | return EndOfFileToken(line_number); 107 | case IdentifierTokenId: 108 | if (KeywordToken::is_keyword(string)) { 109 | return KeywordToken(string, line_number); 110 | } else { 111 | return IdentifierToken(string, line_number); 112 | } 113 | case IntegerTokenId: 114 | return IntegerToken(string, line_number); 115 | case OperatorTokenId: 116 | return OperatorToken(string, line_number); 117 | default: 118 | std::string default_token_name = get_token_type_name(DefaultTokenTypeId); 119 | return Token(DefaultTokenTypeId, default_token_name, line_number); 120 | } 121 | } 122 | 123 | const std::vector> StateTransitionTable::table = { 124 | {1, COMMENT_STATE, 10, 11, 12, InvalidTransition, EndOfFileTokenId, 0}, 125 | {2, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 126 | {3, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 127 | {4, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 128 | {5, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 129 | {6, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 130 | {7, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 131 | {8, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 132 | {MaxIntegerLength, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId, IntegerTokenId}, 133 | {COMMENT_STATE, 0, COMMENT_STATE, COMMENT_STATE, COMMENT_STATE, COMMENT_STATE, COMMENT_STATE, COMMENT_STATE}, 134 | {OperatorTokenId, OperatorTokenId, OperatorTokenId, OperatorTokenId, OperatorTokenId, OperatorTokenId, OperatorTokenId, OperatorTokenId}, 135 | {DelimiterTokenId, DelimiterTokenId, DelimiterTokenId, DelimiterTokenId, DelimiterTokenId, DelimiterTokenId, DelimiterTokenId, DelimiterTokenId}, 136 | {13, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 13, 13, IdentifierTokenId, IdentifierTokenId}, 137 | {14, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 14, 14, IdentifierTokenId, IdentifierTokenId}, 138 | {15, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 15, 15, IdentifierTokenId, IdentifierTokenId}, 139 | {16, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 16, 16, IdentifierTokenId, IdentifierTokenId}, 140 | {17, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 17, 17, IdentifierTokenId, IdentifierTokenId}, 141 | {18, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 18, 18, IdentifierTokenId, IdentifierTokenId}, 142 | {19, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, 19, 19, IdentifierTokenId, IdentifierTokenId}, 143 | {MaxIdentifierLength, IdentifierTokenId, IdentifierTokenId, IdentifierTokenId, MaxIdentifierLength, MaxIdentifierLength, IdentifierTokenId, IdentifierTokenId} 144 | }; -------------------------------------------------------------------------------- /assets/deterministic-finite-automaton.json: -------------------------------------------------------------------------------- 1 | {"nodes":[{"x":157,"y":484,"text":"0","isAcceptState":false},{"x":78,"y":62,"text":"1","isAcceptState":false},{"x":243,"y":62,"text":"2","isAcceptState":false},{"x":408,"y":62,"text":"3","isAcceptState":false},{"x":571,"y":62,"text":"4","isAcceptState":false},{"x":731,"y":62,"text":"5","isAcceptState":false},{"x":889,"y":62,"text":"6","isAcceptState":false},{"x":1039,"y":62,"text":"7","isAcceptState":false},{"x":1196,"y":62,"text":"8","isAcceptState":false},{"x":914,"y":243,"text":"Integer","isAcceptState":true},{"x":296,"y":304,"text":"9","isAcceptState":false},{"x":52,"y":905,"text":"EoF","isAcceptState":true},{"x":131,"y":766,"text":"10","isAcceptState":false},{"x":277,"y":766,"text":"11","isAcceptState":false},{"x":347,"y":905,"text":"Delimeter","isAcceptState":true},{"x":206,"y":905,"text":"Operator","isAcceptState":true},{"x":261,"y":484,"text":"12","isAcceptState":false},{"x":447,"y":484,"text":"13","isAcceptState":false},{"x":628,"y":484,"text":"14","isAcceptState":false},{"x":804,"y":484,"text":"15","isAcceptState":false},{"x":988,"y":484,"text":"16","isAcceptState":false},{"x":1163,"y":484,"text":"17","isAcceptState":false},{"x":1163,"y":621,"text":"18","isAcceptState":false},{"x":1163,"y":754,"text":"19","isAcceptState":false},{"x":854,"y":796,"text":"Identifier","isAcceptState":true},{"x":1163,"y":913,"text":"Error","isAcceptState":true},{"x":1196,"y":215,"text":"Error","isAcceptState":true}],"links":[{"type":"SelfLink","node":0,"text":"White Space","anchorAngle":-2.3241857218886333},{"type":"Link","nodeA":0,"nodeB":1,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5851483941739288,"perpendicularPart":8.050256927692732},{"type":"Link","nodeA":1,"nodeB":2,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":2,"nodeB":3,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":3,"nodeB":4,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":4,"nodeB":5,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":5,"nodeB":6,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":6,"nodeB":7,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":7,"nodeB":8,"text":"0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":8,"nodeB":9,"text":"Other","lineAngleAdjust":3.141592653589793,"parallelPart":0.7436810161591846,"perpendicularPart":-6.285164336537177},{"type":"Link","nodeA":1,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.6373683296954721,"perpendicularPart":93.75947936855259},{"type":"Link","nodeA":2,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.5504200810762689,"perpendicularPart":78.06804573927559},{"type":"Link","nodeA":3,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.4953548686447575,"perpendicularPart":52.40246708206535},{"type":"Link","nodeA":4,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.5378432285087427,"perpendicularPart":26.741285371653998},{"type":"Link","nodeA":5,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.4914566037735849,"perpendicularPart":14.13026672557348},{"type":"Link","nodeA":6,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.4424681456568139,"perpendicularPart":24.30388322996191},{"type":"Link","nodeA":7,"nodeB":9,"text":"Other","lineAngleAdjust":0,"parallelPart":0.5868674596671657,"perpendicularPart":12.583798731170065},{"type":"SelfLink","node":10,"text":"Other","anchorAngle":0.23071680105796785},{"type":"Link","nodeA":0,"nodeB":10,"text":"!","lineAngleAdjust":3.141592653589793,"parallelPart":0.7151149675925228,"perpendicularPart":-17.780268447713468},{"type":"Link","nodeA":10,"nodeB":0,"text":"!","lineAngleAdjust":3.141592653589793,"parallelPart":0.31223964990901837,"perpendicularPart":-26.943796112260234},{"type":"Link","nodeA":0,"nodeB":11,"text":"EoF","lineAngleAdjust":0,"parallelPart":0.5046901724156246,"perpendicularPart":47.41685963617797},{"type":"Link","nodeA":0,"nodeB":12,"text":"+ - * / < > = : #","lineAngleAdjust":3.141592653589793,"parallelPart":0.5248877805486285,"perpendicularPart":-10.692239710413233},{"type":"Link","nodeA":0,"nodeB":13,"text":". ( ) , { } ; [ ]","lineAngleAdjust":3.141592653589793,"parallelPart":0.6494186789319024,"perpendicularPart":-64.195469153747},{"type":"Link","nodeA":12,"nodeB":15,"text":"Any","lineAngleAdjust":0,"parallelPart":0.7104952395010701,"perpendicularPart":11.037896261783567},{"type":"Link","nodeA":13,"nodeB":14,"text":"Any","lineAngleAdjust":0,"parallelPart":0.6230234383761355,"perpendicularPart":17.39501584370531},{"type":"Link","nodeA":0,"nodeB":16,"text":"a-z","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":16,"nodeB":17,"text":"A-Z a-z 0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":17,"nodeB":18,"text":"A-Z a-z 0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":18,"nodeB":19,"text":"A-Z a-z 0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":19,"nodeB":20,"text":"A-Z a-z 0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":20,"nodeB":21,"text":"A-Z a-z 0-9","lineAngleAdjust":0,"parallelPart":0.5,"perpendicularPart":0},{"type":"Link","nodeA":21,"nodeB":22,"text":"A-Z a-z 0-9","lineAngleAdjust":3.141592653589793,"parallelPart":0.48936170212765956,"perpendicularPart":0},{"type":"Link","nodeA":22,"nodeB":23,"text":"A-Z a-z 0-9","lineAngleAdjust":3.141592653589793,"parallelPart":0.5813953488372093,"perpendicularPart":0},{"type":"Link","nodeA":23,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.3502390868425112,"perpendicularPart":0},{"type":"Link","nodeA":16,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.7806720140070113,"perpendicularPart":14.359217821106121},{"type":"Link","nodeA":17,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.6233580183078378,"perpendicularPart":14.506205510316766},{"type":"Link","nodeA":18,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.6889426461556899,"perpendicularPart":13.168891036721396},{"type":"Link","nodeA":19,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.5989243108445105,"perpendicularPart":17.842948187644318},{"type":"Link","nodeA":20,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.6740876150682248,"perpendicularPart":24.119937088430255},{"type":"Link","nodeA":21,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.5109379468115528,"perpendicularPart":28.685830901003786},{"type":"Link","nodeA":22,"nodeB":24,"text":"Other","lineAngleAdjust":0,"parallelPart":0.32666962713907344,"perpendicularPart":8.236790030458957},{"type":"Link","nodeA":23,"nodeB":25,"text":"A-Z a-z 0-9","lineAngleAdjust":3.141592653589793,"parallelPart":0.5413533834586466,"perpendicularPart":0},{"type":"Link","nodeA":8,"nodeB":26,"text":"0-9","lineAngleAdjust":3.141592653589793,"parallelPart":0.5490196078431373,"perpendicularPart":0}],"canvasWidth":1280,"canvasHeight":960} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Compiler 2 | 3 | This is a simple compiler written for an undergraduate course in Program Translation. 4 | 5 | ## Usage 6 | 7 | 1. Run `make`. 8 | 9 | 2. Create a program file. For example, `myprogram.txt`: 10 | 11 | ``` 12 | ! myprogram.txt ! 13 | program 14 | var num 15 | start 16 | let num = 42 , 17 | print num , 18 | end 19 | ``` 20 | 21 | 3. Compile the program into assembly code. 22 | 23 | ``` 24 | $ comp myprogram.txt 25 | ``` 26 | 27 | 4. Run the interpreter on the corresponding assembly code 28 | 29 | ``` 30 | $ asmb myprogram.asm 31 | ``` 32 | 33 | ## Sample Programs and Language Features 34 | 35 | ### Variables 36 | 37 | ``` 38 | program 39 | var num 40 | start 41 | let num = 42 , 42 | print num , 43 | end 44 | ``` 45 | 46 | Output: 47 | 48 | ``` 49 | 42 50 | ``` 51 | 52 | 53 | ### Loops 54 | 55 | ``` 56 | program 57 | var i 58 | start 59 | let i = 0 , 60 | iter (i < 3) 61 | start 62 | print i , 63 | let i = (i + 1) , 64 | end , 65 | , 66 | end 67 | ``` 68 | 69 | Output: 70 | 71 | ``` 72 | 0 73 | 1 74 | 2 75 | ``` 76 | 77 | 78 | ### Conditionals 79 | 80 | ``` 81 | program 82 | start 83 | if (10 > 5) 84 | print 1 , 85 | , 86 | end 87 | ``` 88 | 89 | Output: 90 | 91 | ``` 92 | 1 93 | ``` 94 | 95 | #### Supported Operators 96 | 97 | * **>** - Greater than 98 | * **<** - Less than 99 | * **:** - Equals 100 | 101 | 102 | ### Arithmetic and Expressions 103 | 104 | ``` 105 | program 106 | start 107 | print #(((2 + 2) * 3) / 4) , 108 | end 109 | ``` 110 | 111 | Output: 112 | 113 | ``` 114 | -3 115 | ``` 116 | 117 | **NOTE:** All operators have standard meaning except **#** means *negation*. 118 | 119 | 120 | ### Input 121 | 122 | ``` 123 | program 124 | start 125 | var num 126 | read num , 127 | print num , 128 | end 129 | ``` 130 | 131 | The program would print whatever the user input. 132 | 133 | 134 | ### Comments 135 | 136 | ``` 137 | program 138 | start 139 | ! This is a comment ! 140 | print 1 , 141 | end 142 | ``` 143 | 144 | Comments are surrounded in exclamation points `!`. 145 | 146 | ## Frontend 147 | 148 | The frontend of our compiler is composed of two parts: 149 | 150 | 1. Scanner - Converts a stream of characters into tokens 151 | 2. Parser - Converts the tokens into a parse tree 152 | 153 | The scanner uses a driver and state transition table. 154 | 155 | ### Deterministic Finite Automaton 156 | ![Deterministic Finite Automaton](assets/deterministic-finite-automaton.png) 157 | 158 | To edit import `assets/deterministic-finite-automaton.json` at https://merfoo.github.io/fsm/ 159 | 160 | 161 | ### State Transition Table 162 | 163 | The following table is located at `src/compiler/frontend/scanner/state_transition_table/state_transition_table.cpp`. 164 | 165 | The function corresponding to the finite automaton driver is `Scanner::read()` in `src/compiler/frontend/scanner/scanner.cpp`. 166 | 167 | To edit import `assets/state-transition-table.csv` into your favorite spreadsheet program. 168 | 169 | | 0-9 | ! | + - * / < > = : # | . ( ) , { } ; [ ] | a-z | A-Z | EoF | White Space | 170 | |-------------|--------------|-------------------|-------------------|-------------|-------------|--------------|--------------| 171 | | 1 | 9 | 10 | 11 | 12 | Error | EoF | 0 | 172 | | 3 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 173 | | 2 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 174 | | 4 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 175 | | 5 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 176 | | 6 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 177 | | 7 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 178 | | 8 | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 179 | | Error | Integer | Integer | Integer | Integer | Integer | Integer | Integer | 180 | | 9 | 0 | 9 | 9 | 9 | 9 | 9 | 9 | 181 | | Operator | Operator | Operator | Operator | Operator | Operator | Operator | Operator | 182 | | Delimiter | Delimiter | Delimiter | Delimiter | Delimiter | Delimiter | Delimiter | Delimiter | 183 | | 13 | Identifier | Identifier | Identifier | 13 | 13 | Identifier | Identifier | 184 | | 14 | Identifier | Identifier | Identifier | 14 | 14 | Identifier | Identifier | 185 | | 15 | Identifier | Identifier | Identifier | 15 | 15 | Identifier | Identifier | 186 | | 16 | Identifier | Identifier | Identifier | 16 | 16 | Identifier | Identifier | 187 | | 17 | Identifier | Identifier | Identifier | 17 | 17 | Identifier | Identifier | 188 | | 18 | Identifier | Identifier | Identifier | 18 | 18 | Identifier | Identifier | 189 | | 19 | Identifier | Identifier | Identifier | 19 | 19 | Identifier | Identifier | 190 | | Error | Identifier | Identifier | Identifier | Error | Error | Identifier | Identifier | 191 | 192 | ### BNF 193 | 194 | The parser enforces the following grammar rules. 195 | 196 | \ -> **program** \ \ 197 | 198 | \ -> **start** \ \ **end** 199 | 200 | \ -> **var** **Identifier** \ | **empty** 201 | 202 | \ -> \ **+** \ | \ **-** \ | \ **/** \ | \ **\*** \ | \ 203 | 204 | \ -> **#** \ | \ 205 | 206 | \ -> ( \ ) | **Identifier** | **Integer** 207 | 208 | \ -> \ \ 209 | 210 | \ -> \ | **empty** 211 | 212 | \ -> \ **,** | \ **,** | \ **,** | \ **,** | \ **,** | \ **,** 213 | 214 | \ -> **read** **Identifier** 215 | 216 | \ -> **print** \ 217 | 218 | \ -> **if** **(** \ \ \ **)** \ 219 | 220 | \ -> **iter** **(** \ \ \ **)** \ 221 | 222 | \ -> **let** **Identifier** **=** \ 223 | 224 | \ -> **<** | **>** | **:** 225 | 226 | ## Backend 227 | 228 | The backend of our compiler is composed of three parts: 229 | 230 | 1. Static semantics 231 | 2. Code generation 232 | 3. and optimization 233 | 234 | ### Static Semantics 235 | 236 | The only static semantics imposed by the compiler are proper use of variables. Before using a variable, you must first declare it using the **var** keyword. 237 | 238 | In our language scopes are imposed by blocks denoted by **start** and **end**, conditionals denoted by **if**, and loops denoted by **iter**. 239 | 240 | For our compiler, we implement **local scoping** in contrast to global scoping. 241 | 242 | ### Code Generation 243 | We traverse the decorated parse tree for each node generate corresponding assembly code. 244 | 245 | ### Optimization 246 | For optimization we remove redundant assembly code statements to read from stack memory when we just wrote to that same location in stack memory. 247 | 248 | For example, consider the following program: 249 | 250 | ``` 251 | program 252 | var id1 253 | start 254 | let id1 = 2 , 255 | print id1 , 256 | end 257 | ``` 258 | 259 | For which the compiler generates the following assembly code: 260 | 261 | ``` 262 | PUSH 263 | PUSH 264 | LOAD 2 265 | STACKW 1 266 | STACKR 1 267 | STORE T0 268 | WRITE T0 269 | POP 270 | POP 271 | STOP 272 | T0 0 273 | ``` 274 | 275 | The optimization removes the `STACKR 1` statement since it is immediately preceded by `STACKW 1`. 276 | -------------------------------------------------------------------------------- /src/compiler/backend/backend.cpp: -------------------------------------------------------------------------------- 1 | #include "backend.h" 2 | #include 3 | 4 | const std::set Backend::new_scope_labels = {BLOCK, IFSTAT, LOOP}; 5 | const std::set Backend::labels_containing_expr = {ASSIGN, OUT, IFSTAT, LOOP}; 6 | 7 | Backend::Backend(std::string base_filename) 8 | { 9 | code_generator = new CodeGenerator(base_filename); 10 | } 11 | 12 | void Backend::traverse(Node* node) 13 | { 14 | if (node == NULL) { 15 | return; 16 | } 17 | 18 | if (introduces_new_scope(node)) { // Entering a scope 19 | var_stack.push(); 20 | code_generator->print_to_target(PUSH); 21 | } 22 | 23 | check_for_variables(node); 24 | 25 | check_for_print_statements(node); 26 | 27 | check_for_negation(node); 28 | 29 | check_for_r_letter(node); 30 | 31 | check_for_expr(node); 32 | 33 | check_for_ifstat(node); 34 | 35 | check_for_loop(node); 36 | 37 | if (does_not_contain_expression(node)) { 38 | traverse_children(node); 39 | } 40 | 41 | if (introduces_new_scope(node)) { // Exiting a scope 42 | pop_vars_in_current_scope(); 43 | var_stack.pop(); 44 | code_generator->print_to_target(POP); 45 | } 46 | 47 | if (node->label == START) { 48 | pop_vars_in_current_scope(); // Print POP for all global vars 49 | code_generator->print_to_target(STOP); 50 | code_generator->set_temp_vars_to_zero(); 51 | code_generator->close_target(); 52 | } 53 | } 54 | 55 | void Backend::pop_vars_in_current_scope() 56 | { 57 | int num_vars_in_current_scope = var_stack.num_vars_in_current_scope(); 58 | for (int i = 0; i < num_vars_in_current_scope; i++) { 59 | code_generator->print_to_target(POP); 60 | } 61 | } 62 | 63 | void Backend::check_for_variables(Node* node) 64 | { 65 | std::vector id_tokens = node->get_identifier_tokens(); 66 | 67 | for (auto id_token : id_tokens) { 68 | if (contains_variable_declarations(node)) { 69 | int location = var_stack.find(id_token); 70 | if (location == 0) { 71 | std::string msg = "Duplicate variable declaration"; 72 | print_error_and_exit(msg, id_token); 73 | } 74 | code_generator->print_to_target(PUSH); 75 | var_stack.insert(id_token); 76 | } else { 77 | int location = var_stack.find(id_token); 78 | if (location == -1) { 79 | std::string msg = "Undeclared variable"; 80 | print_error_and_exit(msg, id_token); 81 | } 82 | check_for_assignments(node, location); 83 | check_for_input_statements(node, location); 84 | } 85 | } 86 | } 87 | 88 | void Backend::print_error_and_exit(std::string msg, Token token) 89 | { 90 | std::cerr << msg << " " << token << std::endl; 91 | exit(EXIT_FAILURE); 92 | } 93 | 94 | 95 | void Backend::traverse_child(Node* node, int child_index) 96 | { 97 | Node* child = node->children[child_index]; 98 | traverse_children(child); 99 | } 100 | 101 | void Backend::traverse_children(Node* node) 102 | { 103 | std::vector children = node->children; 104 | for (unsigned int i = 0; i < children.size(); i++) { 105 | traverse(children.at(i)); 106 | } 107 | } 108 | 109 | bool Backend::introduces_new_scope(Node* node) 110 | { 111 | return new_scope_labels.count(node->label) == 1; 112 | } 113 | 114 | bool Backend::contains_variable_declarations(Node* node) 115 | { 116 | return node->label == VARS; 117 | } 118 | 119 | bool Backend::does_not_contain_expression(Node* node) 120 | { 121 | return !contains_expression(node); 122 | } 123 | 124 | bool Backend::contains_expression(Node* node) 125 | { 126 | return labels_containing_expr.count(node->label) == 1 || 127 | (node->label == EXPR && node->children.size() == 2) || // -> does not contain 128 | (node->label == HASH && node->tokens.size() == 1); 129 | } 130 | 131 | void Backend::check_for_assignments(Node* node, int location) 132 | { 133 | if (node->label == ASSIGN) { 134 | traverse_children(node); 135 | std::string str = STACK_WRITE + " " + std::to_string(location); 136 | code_generator->print_to_target(str); 137 | } 138 | } 139 | 140 | void Backend::check_for_input_statements(Node* node, int location) 141 | { 142 | if (node->label == IN) { 143 | std::string temp_var = code_generator->get_temp_var(); 144 | std::string str = READ + " " + temp_var; 145 | code_generator->print_to_target(str); 146 | str = LOAD + " " + temp_var; 147 | code_generator->print_to_target(str); 148 | str = STACK_WRITE + " " + std::to_string(location); 149 | code_generator->print_to_target(str); 150 | } 151 | } 152 | 153 | void Backend::check_for_negation(Node* node) 154 | { 155 | if (node->label == HASH && node->tokens.size() == 1) { 156 | traverse_children(node); 157 | code_generator->print_to_target(MULT + " -1"); 158 | } 159 | } 160 | 161 | void Backend::check_for_r_letter(Node* node) 162 | { 163 | if (node->label == R_LETTER) { 164 | for (auto token : node->tokens) { 165 | if (token.is_identifier()) { 166 | int location = var_stack.find(token); 167 | std::string str = STACK_READ + " " + std::to_string(location); 168 | code_generator->print_to_target(str); 169 | } 170 | if (token.is_integer()) { 171 | std::string str = LOAD + " " + token.get_value(); 172 | code_generator->print_to_target(str); 173 | } 174 | } 175 | } 176 | } 177 | 178 | void Backend::check_for_expr(Node* node) 179 | { 180 | if (node->label == EXPR and node->children.size() == 2) { 181 | traverse_child(node, 1); 182 | 183 | std::string temp_var = code_generator->get_and_store_temp_var(); 184 | 185 | traverse_child(node, 0); 186 | 187 | Token operator_token = node->tokens[0]; 188 | std::string operation = get_operation(operator_token); 189 | 190 | code_generator->print_to_target(operation + " " + temp_var); 191 | } 192 | } 193 | 194 | std::string Backend::get_operation(Token operator_token) 195 | { 196 | std::string operation; 197 | if (OperatorToken::is_addition_token(operator_token)) { 198 | operation = ADD; 199 | } else if (OperatorToken::is_subtraction_token(operator_token)) { 200 | operation = SUB; 201 | } else if (OperatorToken::is_multiplication_token(operator_token)) { 202 | operation = MULT; 203 | } else if (OperatorToken::is_division_token(operator_token)) { 204 | operation = DIV; 205 | } 206 | return operation; 207 | } 208 | 209 | void Backend::check_for_print_statements(Node* node) 210 | { 211 | if (node->label == OUT) { 212 | traverse_children(node); 213 | std::string temp_var = code_generator->get_and_store_temp_var(); 214 | code_generator->print_to_target(WRITE + " " + temp_var); 215 | } 216 | } 217 | 218 | void Backend::check_for_ifstat(Node* node) 219 | { 220 | if (node->label == IFSTAT) { 221 | std::string label = code_generator->get_label(); 222 | 223 | evaluate_condition(node, label); 224 | 225 | code_generator->print_label(label); 226 | } 227 | } 228 | 229 | void Backend::check_for_loop(Node* node) 230 | { 231 | if (node->label == LOOP) { 232 | std::string loop_label = code_generator->get_label(); 233 | std::string out_label = code_generator->get_label(); 234 | 235 | code_generator->print_label(loop_label); 236 | 237 | evaluate_condition(node, out_label); 238 | 239 | code_generator->print_to_target(BREAK + " " + loop_label); 240 | code_generator->print_label(out_label); 241 | } 242 | } 243 | 244 | std::vector Backend::get_break_conditions(Token relational_operator) 245 | { 246 | std::vector break_conditions; 247 | if (OperatorToken::is_less_than_token(relational_operator)) { 248 | break_conditions.push_back(BREAK_ZERO_OR_POSITIVE); 249 | } else if (OperatorToken::is_greater_than_token(relational_operator)) { 250 | break_conditions.push_back(BREAK_ZERO_OR_NEGATIVE); 251 | } else if (OperatorToken::is_equals_token(relational_operator)) { 252 | break_conditions.push_back(BREAK_NEGATIVE); 253 | break_conditions.push_back(BREAK_POSITIVE); 254 | } 255 | return break_conditions; 256 | } 257 | 258 | /** 259 | * Evaluates conditions for and nodes 260 | */ 261 | void Backend::evaluate_condition(Node* node, std::string out_label) 262 | { 263 | traverse_child(node, 2); 264 | 265 | std::string temp_var = code_generator->get_and_store_temp_var(); 266 | 267 | traverse_child(node, 0); 268 | 269 | code_generator->print_to_target(SUB + " " + temp_var); 270 | 271 | Node* second_child = node->children[1]; 272 | Token relational_operator = second_child->tokens[0]; 273 | 274 | std::vector break_conditions = get_break_conditions(relational_operator); 275 | for (auto break_condition : break_conditions) { 276 | code_generator->print_to_target(break_condition + " " + out_label); 277 | } 278 | 279 | traverse_child(node, 3); 280 | } -------------------------------------------------------------------------------- /src/compiler/frontend/parser/parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "parser.h" 4 | #include "../scanner/scanner.h" 5 | #include "../../token/token.h" 6 | 7 | Parser::Parser(std::string filename) 8 | { 9 | scanner = new Scanner(filename); 10 | } 11 | 12 | Parser::~Parser() 13 | { 14 | delete scanner; 15 | } 16 | 17 | Node* Parser::parse() 18 | { 19 | Node* root; 20 | token = scanner->read(); 21 | root = S(); 22 | if (token.is_eof()) { 23 | return root; 24 | } 25 | print_error_and_exit(); 26 | } 27 | 28 | /** 29 | * -> program 30 | */ 31 | Node* Parser::S() 32 | { 33 | int level = 1; 34 | if (KeywordToken::is_program_token(token)) { 35 | Node* node = Node::of(START, level); 36 | token = scanner->read(); 37 | node->append_child(vars(level)); 38 | node->append_child(block(level)); 39 | return node; 40 | } 41 | print_error_and_exit(); 42 | } 43 | 44 | /** 45 | * -> start end 46 | */ 47 | Node* Parser::block(int level) 48 | { 49 | level++; 50 | if (KeywordToken::is_start_token(token)) { 51 | Node* node = Node::of(BLOCK, level); 52 | token = scanner->read(); 53 | node->append_child(vars(level)); 54 | node->append_child(stats(level)); 55 | if (KeywordToken::is_end_token(token)) { 56 | token = scanner->read(); 57 | return node; 58 | } 59 | } 60 | print_error_and_exit(); 61 | } 62 | 63 | /** 64 | * -> var Identifier | empty 65 | */ 66 | Node* Parser::vars(int level) 67 | { 68 | if (KeywordToken::is_var_token(token)) { 69 | level++; 70 | Node* node = Node::of(VARS, level); 71 | token = scanner->read(); 72 | if (token.is_identifier()) { 73 | node->append_token(token); 74 | token = scanner->read(); 75 | node->append_child(vars(level)); 76 | return node; 77 | } else { 78 | print_error_and_exit(); 79 | } 80 | } else { 81 | return NULL; 82 | } 83 | } 84 | 85 | /** 86 | * -> + | - 87 | * -> / | * 88 | * -> 89 | */ 90 | Node* Parser::expr(int level) 91 | { 92 | level++; 93 | Node* node = Node::of(EXPR, level); 94 | node->append_child(H(level)); 95 | if (OperatorToken::is_addition_token(token)) { 96 | node->append_token(token); 97 | token = scanner->read(); 98 | node->append_child(expr(level)); 99 | return node; 100 | } else if (OperatorToken::is_subtraction_token(token)) { 101 | node->append_token(token); 102 | token = scanner->read(); 103 | node->append_child(expr(level)); 104 | return node; 105 | } else if (OperatorToken::is_division_token(token)) { 106 | node->append_token(token); 107 | token = scanner->read(); 108 | node->append_child(expr(level)); 109 | return node; 110 | } else if (OperatorToken::is_multiplication_token(token)) { 111 | node->append_token(token); 112 | token = scanner->read(); 113 | node->append_child(expr(level)); 114 | return node; 115 | } 116 | return node; 117 | } 118 | 119 | /** 120 | * -> # | 121 | */ 122 | Node* Parser::H(int level) 123 | { 124 | level++; 125 | Node* node = Node::of(HASH, level); 126 | if (OperatorToken::is_negation_token(token)) { 127 | node->append_token(token); 128 | token = scanner->read(); 129 | node->append_child(R(level)); 130 | return node; 131 | } else { 132 | node->append_child(R(level)); 133 | return node; 134 | } 135 | } 136 | 137 | /** 138 | * -> ( ) | Identifier | Integer 139 | */ 140 | Node* Parser::R(int level) 141 | { 142 | level++; 143 | Node* node = Node::of(R_LETTER, level); 144 | if (DelimiterToken::is_left_parentheses_token(token)) { 145 | token = scanner->read(); 146 | node->append_child(expr(level)); 147 | if (DelimiterToken::is_right_parentheses_token(token)) { 148 | token = scanner->read(); 149 | return node; 150 | } 151 | } else if (token.is_identifier()) { 152 | node->append_token(token); 153 | token = scanner->read(); 154 | return node; 155 | } else if (token.is_integer()) { 156 | node->append_token(token); 157 | token = scanner->read(); 158 | return node; 159 | } 160 | print_error_and_exit(); 161 | } 162 | 163 | /** 164 | * -> 165 | */ 166 | Node* Parser::stats(int level) 167 | { 168 | level++; 169 | Node* node = Node::of(STAT, level); 170 | node->append_child(stat(level)); 171 | node->append_child(m_stat(level)); 172 | return node; 173 | } 174 | 175 | /** 176 | * -> | empty 177 | */ 178 | Node* Parser::m_stat(int level) 179 | { 180 | if (is_first_of_stats(token)) { 181 | level++; 182 | Node* node = Node::of(M_STAT, level); 183 | node->append_child(stats(level)); 184 | return node; 185 | } else { 186 | return NULL; 187 | } 188 | } 189 | 190 | bool Parser::is_first_of_stats(Token token) 191 | { 192 | return KeywordToken::is_read_token(token) || 193 | KeywordToken::is_print_token(token) || 194 | KeywordToken::is_start_token(token) || 195 | KeywordToken::is_if_token(token) || 196 | KeywordToken::is_iter_token(token) || 197 | KeywordToken::is_let_token(token); 198 | } 199 | 200 | /** 201 | * -> , | , | , | , | , | , 202 | */ 203 | Node* Parser::stat(int level) 204 | { 205 | level++; 206 | Node* node = Node::of(STAT, level); 207 | if (KeywordToken::is_read_token(token)) { 208 | node->append_child(in(level)); 209 | check_for_comma_token(); 210 | return node; 211 | } else if (KeywordToken::is_print_token(token)) { 212 | node->append_child(out(level)); 213 | check_for_comma_token(); 214 | return node; 215 | } else if (KeywordToken::is_start_token(token)) { 216 | node->append_child(block(level)); 217 | check_for_comma_token(); 218 | return node; 219 | } else if (KeywordToken::is_if_token(token)) { 220 | node->append_child(ifstat(level)); 221 | check_for_comma_token(); 222 | return node; 223 | } else if (KeywordToken::is_iter_token(token)) { 224 | node->append_child(loop(level)); 225 | check_for_comma_token(); 226 | return node; 227 | } else if (KeywordToken::is_let_token(token)) { 228 | node->append_child(assign(level)); 229 | check_for_comma_token(); 230 | return node; 231 | } 232 | print_error_and_exit(); 233 | } 234 | 235 | void Parser::check_for_comma_token() 236 | { 237 | if (DelimiterToken::is_comma_token(token)) { 238 | token = scanner->read(); 239 | } else { 240 | print_error_and_exit(); 241 | } 242 | } 243 | 244 | /** 245 | * -> read Identifier 246 | */ 247 | Node* Parser::in(int level) 248 | { 249 | if (KeywordToken::is_read_token(token)) { 250 | level++; 251 | Node* node = Node::of(IN, level); 252 | token = scanner->read(); 253 | if (token.is_identifier()) { 254 | node->append_token(token); 255 | token = scanner->read(); 256 | return node; 257 | } 258 | } 259 | print_error_and_exit(); 260 | } 261 | 262 | /** 263 | * -> print 264 | */ 265 | Node* Parser::out(int level) 266 | { 267 | level++; 268 | if (KeywordToken::is_print_token(token)) { 269 | Node* node = Node::of(OUT, level); 270 | token = scanner->read(); 271 | node->append_child(expr(level)); 272 | return node; 273 | } 274 | print_error_and_exit(); 275 | } 276 | 277 | /** 278 | * -> if ( ) 279 | */ 280 | Node* Parser::ifstat(int level) 281 | { 282 | level++; 283 | if (KeywordToken::is_if_token(token)) { 284 | Node* node = Node::of(IFSTAT, level); 285 | token = scanner->read(); 286 | if (DelimiterToken::is_left_parentheses_token(token)) { 287 | token = scanner->read(); 288 | node->append_child(expr(level)); 289 | node->append_child(O(level)); 290 | node->append_child(expr(level)); 291 | if (DelimiterToken::is_right_parentheses_token(token)) { 292 | token = scanner->read(); 293 | node->append_child(stat(level)); 294 | return node; 295 | } 296 | } 297 | } 298 | print_error_and_exit(); 299 | } 300 | 301 | /** 302 | * -> iter ( ) 303 | */ 304 | Node* Parser::loop(int level) 305 | { 306 | level++; 307 | if (KeywordToken::is_iter_token(token)) { 308 | Node* node = Node::of(LOOP, level); 309 | token = scanner->read(); 310 | if (DelimiterToken::is_left_parentheses_token(token)) { 311 | token = scanner->read(); 312 | node->append_child(expr(level)); 313 | node->append_child(O(level)); 314 | node->append_child(expr(level)); 315 | if (DelimiterToken::is_right_parentheses_token(token)) { 316 | token = scanner->read(); 317 | node->append_child(stat(level)); 318 | return node; 319 | } 320 | } 321 | } 322 | print_error_and_exit(); 323 | } 324 | 325 | /** 326 | * -> let Identifier = 327 | */ 328 | Node* Parser::assign(int level) 329 | { 330 | level++; 331 | if (KeywordToken::is_let_token(token)) { 332 | Node* node = Node::of(ASSIGN, level); 333 | token = scanner->read(); 334 | if (token.is_identifier()) { 335 | node->append_token(token); 336 | token = scanner->read(); 337 | if (OperatorToken::is_assignment_token(token)) { 338 | token = scanner->read(); 339 | node->append_child(expr(level)); 340 | return node; 341 | } 342 | } 343 | } 344 | print_error_and_exit(); 345 | } 346 | 347 | /** 348 | * -> < | > | : 349 | */ 350 | Node* Parser::O(int level) 351 | { 352 | level++; 353 | Node* node = Node::of(OPERATOR, level); 354 | if (is_O_token(token)) { 355 | node->append_token(token); 356 | token = scanner->read(); 357 | return node; 358 | } 359 | print_error_and_exit(); 360 | } 361 | 362 | bool Parser::is_O_token(Token token) 363 | { 364 | return OperatorToken::is_less_than_token(token) || 365 | OperatorToken::is_greater_than_token(token) || 366 | OperatorToken::is_equals_token(token); 367 | } 368 | 369 | void Parser::print_error_and_exit() 370 | { 371 | std::cerr << "Parser print_error_and_exit: " << token << std::endl; 372 | exit(1); 373 | } --------------------------------------------------------------------------------