├── .clang-format ├── .gitignore ├── LICENSE ├── Makefile ├── README.md └── src ├── error_handler ├── error_handler.cpp └── error_handler.hpp ├── main.cpp ├── parser ├── parser.cpp └── parser.hpp ├── scanner ├── scanner.cpp ├── scanner.hpp ├── token.cpp └── token.hpp └── tools ├── ast_generator.cpp └── ast_printer.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | Language: Cpp 2 | # BasedOnStyle: LLVM 3 | # This was generated using: $ clang-format -style=llvm -dump-config > .clang-format 4 | # and then small modifications were made on top. 5 | AccessModifierOffset: -2 6 | AlignAfterOpenBracket: Align 7 | AlignConsecutiveAssignments: true 8 | AlignConsecutiveDeclarations: false 9 | AlignEscapedNewlinesLeft: false 10 | AlignOperands: true 11 | AlignTrailingComments: true 12 | AllowAllParametersOfDeclarationOnNextLine: true 13 | AllowShortBlocksOnASingleLine: false 14 | AllowShortCaseLabelsOnASingleLine: false 15 | AllowShortFunctionsOnASingleLine: Empty 16 | AllowShortIfStatementsOnASingleLine: false 17 | AllowShortLoopsOnASingleLine: false 18 | AlwaysBreakAfterDefinitionReturnType: None 19 | AlwaysBreakAfterReturnType: None 20 | AlwaysBreakBeforeMultilineStrings: false 21 | AlwaysBreakTemplateDeclarations: false 22 | BinPackArguments: true 23 | BinPackParameters: true 24 | BraceWrapping: 25 | AfterClass: false 26 | AfterControlStatement: false 27 | AfterEnum: false 28 | AfterFunction: false 29 | AfterNamespace: false 30 | AfterObjCDeclaration: false 31 | AfterStruct: false 32 | AfterUnion: false 33 | BeforeCatch: false 34 | BeforeElse: false 35 | IndentBraces: false 36 | BreakBeforeBinaryOperators: None 37 | BreakBeforeBraces: Attach 38 | BreakBeforeTernaryOperators: true 39 | BreakConstructorInitializersBeforeComma: true 40 | ColumnLimit: 80 41 | CommentPragmas: '^ IWYU pragma:' 42 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 43 | ConstructorInitializerIndentWidth: 4 44 | ContinuationIndentWidth: 4 45 | Cpp11BracedListStyle: true 46 | DerivePointerAlignment: false 47 | DisableFormat: false 48 | ExperimentalAutoDetectBinPacking: false 49 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 50 | IncludeCategories: 51 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 52 | Priority: 2 53 | - Regex: '^(<|"(gtest|isl|json)/)' 54 | Priority: 3 55 | - Regex: '.*' 56 | Priority: 1 57 | IndentCaseLabels: true 58 | IndentWidth: 4 59 | IndentWrappedFunctionNames: false 60 | KeepEmptyLinesAtTheStartOfBlocks: true 61 | MacroBlockBegin: '' 62 | MacroBlockEnd: '' 63 | MaxEmptyLinesToKeep: 1 64 | NamespaceIndentation: All 65 | ObjCBlockIndentWidth: 2 66 | ObjCSpaceAfterProperty: false 67 | ObjCSpaceBeforeProtocolList: true 68 | PenaltyBreakBeforeFirstCallParameter: 19 69 | PenaltyBreakComment: 300 70 | PenaltyBreakFirstLessLess: 120 71 | PenaltyBreakString: 1000 72 | PenaltyExcessCharacter: 1000000 73 | PenaltyReturnTypeOnItsOwnLine: 60 74 | PointerAlignment: Left 75 | ReflowComments: true 76 | SortIncludes: true 77 | SpaceAfterCStyleCast: false 78 | SpaceBeforeAssignmentOperators: true 79 | SpaceBeforeParens: ControlStatements 80 | SpaceInEmptyParentheses: false 81 | SpacesBeforeTrailingComments: 1 82 | SpacesInAngles: false 83 | SpacesInContainerLiterals: true 84 | SpacesInCStyleCastParentheses: false 85 | SpacesInParentheses: false 86 | SpacesInSquareBrackets: false 87 | Standard: Cpp11 88 | TabWidth: 8 89 | UseTab: Never -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | src/Expr.hpp -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Syed Paymaan Raza 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC := clang++ 2 | CFLAGS := -c -g -Werror -std=c++14 3 | SRC_DIR := src 4 | BUILD_DIR := build 5 | 6 | all: pre_setup format $(BUILD_DIR)/lox 7 | 8 | $(BUILD_DIR)/lox: $(BUILD_DIR)/main.o $(BUILD_DIR)/scanner.o $(BUILD_DIR)/token.o $(BUILD_DIR)/error_handler.o $(BUILD_DIR)/parser.o 9 | $(CC) $^ -o $@ 10 | 11 | $(BUILD_DIR)/main.o: $(SRC_DIR)/main.cpp 12 | $(CC) $(CFLAGS) $< -o $@ 13 | 14 | $(BUILD_DIR)/scanner.o: $(SRC_DIR)/scanner/scanner.cpp 15 | $(CC) $(CFLAGS) $< -o $@ 16 | 17 | $(BUILD_DIR)/token.o: $(SRC_DIR)/scanner/token.cpp 18 | $(CC) $(CFLAGS) $< -o $@ 19 | 20 | $(BUILD_DIR)/error_handler.o: $(SRC_DIR)/error_handler/error_handler.cpp 21 | $(CC) $(CFLAGS) $< -o $@ 22 | 23 | $(BUILD_DIR)/parser.o: $(SRC_DIR)/parser/parser.cpp $(BUILD_DIR)/token.o 24 | $(CC) $(CFLAGS) $< -o $@ 25 | 26 | format: 27 | find . -type f -name "*.?pp" | xargs clang-format -i 28 | 29 | pre_setup: 30 | mkdir -p $(BUILD_DIR) 31 | $(CC) -std=c++14 $(SRC_DIR)/tools/ast_generator.cpp -o $(BUILD_DIR)/ast_generator 32 | ./$(BUILD_DIR)/ast_generator $(SRC_DIR) 33 | 34 | clean: 35 | rm -rf $(BUILD_DIR) 36 | 37 | # Run the lox interpreter 38 | run: 39 | ./$(BUILD_DIR)/lox 40 | 41 | .PHONY: pre_setup 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # lox-cpp 2 | A C++ port of the Lox programming language 3 | 4 | [Lox](http://www.craftinginterpreters.com/the-lox-language.html) is part of the [Crafting Interpreters](http://www.craftinginterpreters.com/) book by [Bob Nystrom](https://github.com/munificent). 5 | 6 | The original Java implementation can be found [here](https://github.com/munificent/craftinginterpreters). 7 | -------------------------------------------------------------------------------- /src/error_handler/error_handler.cpp: -------------------------------------------------------------------------------- 1 | #include "error_handler.hpp" 2 | #include 3 | 4 | using namespace lox; 5 | 6 | ErrorHandler::ErrorHandler() 7 | : errorList() 8 | , foundError(false) {} 9 | 10 | void ErrorHandler::report() const { 11 | for (const auto error : errorList) { 12 | std::cout << "[line " + std::to_string(error.line) + "] Error " + 13 | error.where + ": " + error.message 14 | << std::endl; 15 | } 16 | } 17 | 18 | void ErrorHandler::add(int line, const std::string& where, 19 | const std::string& message) { 20 | errorList.push_back({line, where, message}); 21 | foundError = true; 22 | } 23 | 24 | void ErrorHandler::clear() { 25 | errorList.clear(); 26 | } 27 | -------------------------------------------------------------------------------- /src/error_handler/error_handler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ERROR_HANDLER_HPP 2 | #define ERROR_HANDLER_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace lox { 8 | class ErrorHandler { 9 | public: 10 | struct ErrorInfo { 11 | int line; 12 | std::string where; 13 | std::string message; 14 | }; 15 | ErrorHandler(); 16 | void report() const; 17 | void add(int line, const std::string& where, 18 | const std::string& message); 19 | void clear(); 20 | bool foundError; 21 | 22 | private: 23 | std::vector errorList; 24 | }; 25 | } // namespace lox 26 | 27 | #endif // ERROR_HANDLER_HPP 28 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "error_handler/error_handler.hpp" 7 | #include "parser/parser.hpp" 8 | #include "scanner/scanner.hpp" 9 | #include "tools/ast_printer.hpp" 10 | 11 | namespace lox { 12 | static void run(const std::string& source, ErrorHandler& errorHandler) { 13 | /// scanner 14 | Scanner scanner(source, errorHandler); 15 | const auto tokens = scanner.scanAndGetTokens(); 16 | // if found error during scanning, report 17 | if (errorHandler.foundError) { 18 | errorHandler.report(); 19 | return; 20 | } 21 | /// parser 22 | Parser parser(tokens, errorHandler); 23 | auto expr = parser.parse(); 24 | // if found error during parsing, report 25 | if (errorHandler.foundError) { 26 | errorHandler.report(); 27 | return; 28 | } 29 | /// print ast 30 | ASTPrinter pp; 31 | pp.print(expr); 32 | std::cout << std::endl; 33 | } 34 | 35 | static void runFile(const std::string& path, ErrorHandler& errorHandler) { 36 | std::ifstream file(path); 37 | std::ostringstream stream; 38 | stream << file.rdbuf(); 39 | file.close(); 40 | run(stream.str(), errorHandler); 41 | } 42 | 43 | static void runPrompt(ErrorHandler& errorHandler) { 44 | while (true) { 45 | std::cout << "> "; 46 | std::string line; 47 | getline(std::cin, line); 48 | run(line, errorHandler); 49 | if (errorHandler.foundError) { 50 | errorHandler.clear(); 51 | } 52 | } 53 | } 54 | } // namespace lox 55 | 56 | int main(int argc, char** argv) { 57 | lox::ErrorHandler errorHandler; 58 | if (argc > 2) { 59 | std::cout << "Usage: lox [filename]" << std::endl; 60 | } else if (argc == 2) { 61 | lox::runFile(argv[1], errorHandler); 62 | } else { 63 | lox::runPrompt(errorHandler); 64 | } 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /src/parser/parser.cpp: -------------------------------------------------------------------------------- 1 | #include "parser.hpp" 2 | #include "../error_handler/error_handler.hpp" 3 | #include 4 | 5 | using namespace lox; 6 | 7 | ParseError::ParseError(std::string msg, Token token) 8 | : std::runtime_error(msg) 9 | , token_(token) {} 10 | 11 | Parser::Parser(const std::vector& tokens, ErrorHandler& errorHandler) 12 | : current(0) 13 | , tokens_(tokens) 14 | , errorHandler_(errorHandler) {} 15 | 16 | Expr* Parser::expression() { 17 | return equality(); 18 | } 19 | 20 | Expr* Parser::equality() { 21 | Expr* expr = comparison(); 22 | while (match({TokenType::BANG_EQUAL, TokenType::EQUAL_EQUAL})) { 23 | Token Operator = previous(); 24 | Expr* right = comparison(); 25 | expr = new BinaryExpr(expr, Operator, right); 26 | } 27 | return expr; 28 | } 29 | 30 | Expr* Parser::comparison() { 31 | Expr* expr = term(); 32 | while ( 33 | match({TokenType::GREATER, TokenType::LESS, TokenType::LESS_EQUAL})) { 34 | Token Operator = previous(); 35 | Expr* right = term(); 36 | expr = new BinaryExpr(expr, Operator, right); 37 | } 38 | return expr; 39 | } 40 | 41 | Expr* Parser::term() { 42 | Expr* expr = factor(); 43 | while (match({TokenType::MINUS, TokenType::PLUS})) { 44 | Token Operator = previous(); 45 | Expr* right = factor(); 46 | expr = new BinaryExpr(expr, Operator, right); 47 | } 48 | return expr; 49 | } 50 | 51 | Expr* Parser::factor() { 52 | Expr* expr = unary(); 53 | while (match({TokenType::SLASH, TokenType::STAR})) { 54 | Token Operator = previous(); 55 | Expr* right = unary(); 56 | expr = new BinaryExpr(expr, Operator, right); 57 | } 58 | return expr; 59 | } 60 | 61 | Expr* Parser::unary() { 62 | if (match({TokenType::BANG, TokenType::MINUS})) { 63 | Token Operator = previous(); 64 | Expr* right = unary(); 65 | return new UnaryExpr(Operator, right); 66 | } 67 | return primary(); 68 | } 69 | 70 | Expr* Parser::primary() { 71 | if (match({TokenType::FALSE})) 72 | return new LiteralExpr("false"); 73 | if (match({TokenType::TRUE})) 74 | return new LiteralExpr("true"); 75 | if (match({TokenType::NIL})) 76 | return new LiteralExpr("nil"); 77 | if (match({TokenType::NUMBER, TokenType::STRING})) 78 | return new LiteralExpr(previous().literal); 79 | if (match({TokenType::LEFT_PAREN})) { 80 | Expr* expr = expression(); 81 | consume(TokenType::RIGHT_PAREN, "Exppect ')' after expression."); 82 | return new GroupingExpr(expr); 83 | } 84 | throw error(peek(), "Expect expression."); 85 | return nullptr; 86 | } 87 | 88 | Expr* Parser::parse() { 89 | try { 90 | return expression(); 91 | } catch (ParseError error) { 92 | return nullptr; 93 | } 94 | } 95 | Token Parser::consume(TokenType type, std::string message) { 96 | if (check(type)) 97 | return advance(); 98 | throw error(peek(), message); 99 | } 100 | 101 | ParseError Parser::error(Token token, std::string message) { 102 | if (token.type == TokenType::END_OF_FILE) { 103 | errorHandler_.add(token.line, " at end", message); 104 | } else { 105 | errorHandler_.add(token.line, "at '" + token.lexeme + "'", message); 106 | } 107 | errorHandler_.report(); 108 | return *new ParseError(message, token); 109 | } 110 | 111 | bool Parser::match(const std::vector& types) { 112 | for (auto type : types) { 113 | if (check(type)) { 114 | advance(); 115 | return true; 116 | } 117 | } 118 | return false; 119 | } 120 | 121 | Token Parser::previous() { 122 | return tokens_[current - 1]; 123 | } 124 | 125 | Token Parser::advance() { 126 | if (!isAtEnd()) 127 | ++current; 128 | return previous(); 129 | } 130 | 131 | Token Parser::peek() { 132 | return tokens_[current]; 133 | } 134 | 135 | bool Parser::isAtEnd() { 136 | return peek().type == TokenType::END_OF_FILE; 137 | } 138 | 139 | bool Parser::check(TokenType type) { 140 | if (isAtEnd()) 141 | return false; 142 | return peek().type == type; 143 | } 144 | -------------------------------------------------------------------------------- /src/parser/parser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_HPP 2 | #define PARSER_HPP 3 | 4 | #include "../Expr.hpp" 5 | #include "../scanner/token.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | namespace lox { 11 | // forward declarations 12 | class ErrorHandler; 13 | 14 | class ParseError : public std::runtime_error { 15 | public: 16 | ParseError(std::string msg, Token token); 17 | Token token_; 18 | }; 19 | 20 | class Parser { 21 | public: 22 | Parser(const std::vector& tokens, ErrorHandler& errorHandler); 23 | size_t current; 24 | Expr* expression(); 25 | Expr* equality(); 26 | Expr* comparison(); 27 | Expr* term(); 28 | Expr* factor(); 29 | Expr* unary(); 30 | Expr* primary(); 31 | Expr* parse(); 32 | ParseError error(Token token, std::string message); 33 | 34 | private: 35 | bool match(const std::vector& types); 36 | Token previous(); 37 | Token advance(); 38 | Token peek(); 39 | bool isAtEnd(); 40 | bool check(TokenType type); 41 | Token consume(TokenType type, std::string message); 42 | ErrorHandler& errorHandler_; 43 | std::vector tokens_; 44 | }; 45 | } // namespace lox 46 | 47 | #endif // PARSER_HPP 48 | -------------------------------------------------------------------------------- /src/scanner/scanner.cpp: -------------------------------------------------------------------------------- 1 | #include "scanner.hpp" 2 | #include "../error_handler/error_handler.hpp" 3 | 4 | using namespace lox; 5 | 6 | Scanner::Scanner(const std::string& aSource, ErrorHandler& aErrorHandler) 7 | : start(0) 8 | , current(0) 9 | , line(1) 10 | , source(aSource) 11 | , errorHandler(aErrorHandler) { 12 | // initialize reserved keywords map 13 | reservedKeywords["and"] = TokenType::AND; 14 | reservedKeywords["class"] = TokenType::CLASS; 15 | reservedKeywords["else"] = TokenType::ELSE; 16 | reservedKeywords["false"] = TokenType::FALSE; 17 | reservedKeywords["for"] = TokenType::FOR; 18 | reservedKeywords["fun"] = TokenType::FUN; 19 | reservedKeywords["if"] = TokenType::IF; 20 | reservedKeywords["nil"] = TokenType::NIL; 21 | reservedKeywords["or"] = TokenType::OR; 22 | reservedKeywords["print"] = TokenType::PRINT; 23 | reservedKeywords["return"] = TokenType::RETURN; 24 | reservedKeywords["super"] = TokenType::SUPER; 25 | reservedKeywords["this"] = TokenType::THIS; 26 | reservedKeywords["true"] = TokenType::TRUE; 27 | reservedKeywords["var"] = TokenType::VAR; 28 | reservedKeywords["while"] = TokenType::WHILE; 29 | } 30 | 31 | char Scanner::advanceAndGetChar() { 32 | ++current; 33 | return source[current - 1]; 34 | } 35 | 36 | void Scanner::scanAndAddToken() { 37 | const char c = advanceAndGetChar(); 38 | switch (c) { 39 | case '(': 40 | addToken(TokenType::LEFT_PAREN); 41 | break; 42 | case ')': 43 | addToken(TokenType::RIGHT_PAREN); 44 | break; 45 | case '{': 46 | addToken(TokenType::LEFT_BRACE); 47 | break; 48 | case '}': 49 | addToken(TokenType::RIGHT_BRACE); 50 | break; 51 | case ',': 52 | addToken(TokenType::COMMA); 53 | break; 54 | case '.': 55 | addToken(TokenType::DOT); 56 | break; 57 | case '-': 58 | addToken(TokenType::MINUS); 59 | break; 60 | case '+': 61 | addToken(TokenType::PLUS); 62 | break; 63 | case ';': 64 | addToken(TokenType::SEMICOLON); 65 | break; 66 | case '*': 67 | addToken(TokenType::STAR); 68 | break; 69 | case '!': 70 | addToken(matchAndAdvance('=') ? TokenType::BANG_EQUAL 71 | : TokenType::BANG); 72 | break; 73 | case '=': 74 | addToken(matchAndAdvance('=') ? TokenType::EQUAL_EQUAL 75 | : TokenType::EQUAL); 76 | break; 77 | case '<': 78 | addToken(matchAndAdvance('=') ? TokenType::LESS_EQUAL 79 | : TokenType::LESS); 80 | break; 81 | case '>': 82 | addToken(matchAndAdvance('=') ? TokenType::GREATER_EQUAL 83 | : TokenType::GREATER); 84 | break; 85 | case '/': 86 | if (matchAndAdvance('/')) { 87 | // a comment goes until the end of the line. 88 | while (peek() != '\n' && !isAtEnd()) 89 | (void)advanceAndGetChar(); 90 | } else { 91 | addToken(TokenType::SLASH); 92 | } 93 | break; 94 | case '"': 95 | string(); 96 | break; 97 | case ' ': 98 | case '\r': 99 | case '\t': 100 | // ignore whitespace 101 | break; 102 | case '\n': 103 | ++line; 104 | break; 105 | default: { 106 | if (isDigit(c)) { 107 | number(); 108 | } else if (isAlpha(c)) { 109 | identifier(); 110 | } else { 111 | std::string errorMessage = "Unexpected character: "; 112 | errorMessage += c; 113 | errorHandler.add(line, "", errorMessage); 114 | break; 115 | } 116 | } 117 | } 118 | } 119 | 120 | bool Scanner::isAlpha(const char c) const { 121 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; 122 | } 123 | 124 | bool Scanner::isAlphaNumeric(const char c) const { 125 | return isAlpha(c) || isDigit(c); 126 | } 127 | 128 | void Scanner::identifier() { 129 | // using "maximal munch" 130 | // e.g. match "orchid" not "or" keyword and "chid" 131 | while (isAlphaNumeric(peek())) 132 | (void)advanceAndGetChar(); 133 | // see if the identifier is a reserved keyword 134 | const size_t identifierLength = current - start; 135 | const std::string identifier = source.substr(start, identifierLength); 136 | const bool isReservedKeyword = 137 | reservedKeywords.find(identifier) != reservedKeywords.end(); 138 | if (isReservedKeyword) { 139 | addToken(reservedKeywords[identifier]); 140 | } else { 141 | addToken(TokenType::IDENTIFIER); 142 | } 143 | } 144 | 145 | bool Scanner::isDigit(const char c) const { 146 | return c >= '0' && c <= '9'; 147 | } 148 | 149 | void Scanner::number() { 150 | while (isDigit(peek())) 151 | (void)advanceAndGetChar(); 152 | // look for fractional part 153 | if (peek() == '.' && isDigit(peekNext())) { 154 | // consume the "." 155 | (void)advanceAndGetChar(); 156 | while (isDigit(peek())) 157 | (void)advanceAndGetChar(); 158 | } 159 | const size_t numberLength = current - start; 160 | const std::string numberLiteral = source.substr(start, numberLength); 161 | addToken(TokenType::NUMBER, numberLiteral); 162 | } 163 | 164 | void Scanner::string() { 165 | while (peek() != '"' && !isAtEnd()) { 166 | if (peek() == '\n') 167 | ++line; 168 | (void)advanceAndGetChar(); 169 | } 170 | // unterminated string 171 | if (isAtEnd()) { 172 | errorHandler.add(line, "", "Unterminated string."); 173 | return; 174 | } 175 | // closing " 176 | (void)advanceAndGetChar(); 177 | const size_t stringSize = current - start; 178 | // trim the surrounding quotes 179 | const std::string stringLiteral = source.substr(start + 1, stringSize - 2); 180 | addToken(TokenType::STRING, stringLiteral); 181 | } 182 | 183 | void Scanner::addToken(const TokenType aTokenType, const std::string& value) { 184 | const size_t lexemeSize = current - start; 185 | const auto lexeme = source.substr(start, lexemeSize); 186 | tokens.push_back(Token(aTokenType, lexeme, value, line)); 187 | } 188 | 189 | void Scanner::addToken(const TokenType aTokenType) { 190 | addToken(aTokenType, ""); 191 | } 192 | 193 | bool Scanner::isAtEnd() const { 194 | return current >= source.size(); 195 | } 196 | 197 | bool Scanner::matchAndAdvance(const char aExpected) { 198 | if (isAtEnd()) 199 | return false; 200 | if (source[current] != aExpected) 201 | return false; 202 | ++current; 203 | return true; 204 | } 205 | 206 | char Scanner::peekNext() const { 207 | if (current + 1 >= source.length()) 208 | return '\0'; 209 | return source[current + 1]; 210 | } 211 | char Scanner::peek() const { 212 | if (isAtEnd()) 213 | return '\0'; 214 | return source[current]; 215 | } 216 | 217 | std::vector Scanner::scanAndGetTokens() { 218 | while (!isAtEnd()) { 219 | // we are at the beginning of the next lexeme 220 | start = current; 221 | scanAndAddToken(); 222 | } 223 | tokens.push_back(Token(TokenType::END_OF_FILE, "", "", line)); 224 | return tokens; 225 | } 226 | -------------------------------------------------------------------------------- /src/scanner/scanner.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SCANNER_HPP 2 | #define SCANNER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "token.hpp" 9 | 10 | namespace lox { 11 | // forward declarations 12 | class ErrorHandler; 13 | 14 | class Scanner { 15 | public: 16 | Scanner(const std::string& aSource, ErrorHandler& aErrorHandler); 17 | std::vector scanAndGetTokens(); 18 | 19 | private: 20 | /// @brief advance and get current char 21 | char advanceAndGetChar(); 22 | ///@brief scans and adds tokens 23 | void scanAndAddToken(); 24 | /// @brief adds token to tokens list 25 | void addToken(TokenType); 26 | /// @brief adds token to token list with the corresponding value (used 27 | /// for literals mostly) 28 | void addToken(TokenType, const std::string&); 29 | 30 | /// @brief scans the entire source and calls processToken on each 31 | bool isAtEnd() const; 32 | /// @brief Returns true iff the given char matches the current char. 33 | /// Also advances current if there is a match. 34 | bool matchAndAdvance(char); 35 | /// @brief Like advance but doesn't consume character - basically 36 | /// "looksahead" 37 | char peek() const; 38 | char peekNext() const; 39 | bool isDigit(char) const; 40 | bool isAlpha(char) const; 41 | bool isAlphaNumeric(char) const; 42 | void string(); 43 | void number(); 44 | void identifier(); 45 | 46 | /// @brief index in source string to first character in current lexeme 47 | size_t start; 48 | /// @brief index in source string to the current lexeme 49 | size_t current; 50 | /// @brief line number of current lexeme 51 | size_t line; 52 | /// @brief string containing the entire lox source code 53 | std::string source; 54 | /// @brief list of all tokens 55 | std::vector tokens; 56 | /// @brief error handler for adding errors when found 57 | ErrorHandler& errorHandler; 58 | /// @brief map of reserved keywords e.g. and, or, for, else, nil etc. 59 | std::unordered_map reservedKeywords; 60 | }; 61 | } // namespace lox 62 | 63 | #endif // SCANNER_HPP 64 | -------------------------------------------------------------------------------- /src/scanner/token.cpp: -------------------------------------------------------------------------------- 1 | #include "token.hpp" 2 | 3 | using namespace lox; 4 | 5 | Token::Token(const TokenType aType, const std::string& aLexeme, 6 | const std::string& aLiteral, const int aLine) 7 | : type(aType) 8 | , lexeme(aLexeme) 9 | , literal(aLiteral) 10 | , line(aLine) {} 11 | 12 | std::string Token::toString() const { 13 | // for string and number literals, use actual value 14 | if (type == TokenType::STRING || type == TokenType::NUMBER) { 15 | return literal; 16 | } 17 | 18 | return lexeme; 19 | } 20 | -------------------------------------------------------------------------------- /src/scanner/token.hpp: -------------------------------------------------------------------------------- 1 | #ifndef TOKEN_HPP 2 | #define TOKEN_HPP 3 | 4 | #include 5 | 6 | namespace lox { 7 | enum class TokenType { 8 | // Single-character tokens. 9 | LEFT_PAREN, 10 | RIGHT_PAREN, 11 | LEFT_BRACE, 12 | RIGHT_BRACE, 13 | COMMA, 14 | DOT, 15 | MINUS, 16 | PLUS, 17 | SEMICOLON, 18 | SLASH, 19 | STAR, 20 | 21 | // One or two character tokens. 22 | BANG, 23 | BANG_EQUAL, 24 | EQUAL, 25 | EQUAL_EQUAL, 26 | GREATER, 27 | GREATER_EQUAL, 28 | LESS, 29 | LESS_EQUAL, 30 | 31 | // Literals. 32 | IDENTIFIER, // user-defined (e.g. variable/type name) or 33 | // language-defined (reserved keyword) 34 | STRING, 35 | NUMBER, 36 | 37 | // Reserved Keywords. 38 | // Reserved keywords ARE identifiers but have seperate token types 39 | AND, 40 | CLASS, 41 | ELSE, 42 | FALSE, 43 | FUN, 44 | FOR, 45 | IF, 46 | NIL, 47 | OR, 48 | PRINT, 49 | RETURN, 50 | SUPER, 51 | THIS, 52 | TRUE, 53 | VAR, 54 | WHILE, 55 | 56 | END_OF_FILE 57 | }; 58 | 59 | class Token { 60 | public: 61 | Token(TokenType aType, const std::string& aLexeme, 62 | const std::string& aLiteral, int aLine); 63 | std::string toString() const; 64 | std::string lexeme; 65 | // @brief literal can be of 3 types: string, number, or identifier 66 | // number literals are tricky and it may seem odd that i'm storing them 67 | // in a string here, but having a "polymorphic" type for literal is more 68 | // work which is why i'm using a string for now and will convert to 69 | // number if needed. 70 | std::string literal; 71 | TokenType type; 72 | int line; 73 | }; 74 | } // namespace lox 75 | 76 | #endif // TOKEN_HPP 77 | -------------------------------------------------------------------------------- /src/tools/ast_generator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace so_utils { 8 | // source: 9 | // https://stackoverflow.com/questions/14265581/parse-split-a-string-in-c-using-string-delimiter-standard-c 10 | static std::vector split(const std::string& str, 11 | const std::string& delim) { 12 | std::vector tokens; 13 | size_t prev = 0, pos = 0; 14 | do { 15 | pos = str.find(delim, prev); 16 | if (pos == std::string::npos) 17 | pos = str.length(); 18 | auto token = str.substr(prev, pos - prev); 19 | if (!token.empty()) 20 | tokens.push_back(token); 21 | prev = pos + delim.length(); 22 | } while (pos < str.length() && prev < str.length()); 23 | return tokens; 24 | } 25 | } // namespace so_utils 26 | 27 | class ASTGenerator { 28 | public: 29 | using ASTSpecification = std::pair>; 30 | ASTGenerator(const std::string& aDir, const ASTSpecification aSpec) 31 | : outDir(aDir) 32 | , astSpec(aSpec) {} 33 | void generate() { 34 | std::cout << outDir << std::endl; 35 | defineAST(); 36 | } 37 | void defineAST() { 38 | auto baseName = astSpec.first; 39 | auto path = outDir + "/" + baseName + ".hpp"; 40 | std::ofstream file(path); 41 | if (!file.is_open()) { 42 | std::cout << "Unable to open file." << std::endl; 43 | return; 44 | } 45 | 46 | /// #ifndef guard 47 | file << "#ifndef " + baseName + "_HPP" << std::endl; 48 | file << "#define " + baseName + "_HPP" << std::endl; 49 | 50 | // Expr base abstract interface 51 | file << "#include \"scanner/token.hpp\"" << std::endl; 52 | file << "using namespace lox;" << std::endl; 53 | 54 | // forward declarations 55 | file << "class " << baseName << "; // forward declare" << std::endl; 56 | for (auto type : astSpec.second) { 57 | auto className = type.substr(0, type.find(":")); 58 | file << "class " << className << "; // forward declare" 59 | << std::endl; 60 | } 61 | 62 | defineVisitor(file, baseName); 63 | 64 | file << "class " << baseName << " {" << std::endl; 65 | file << "public:" << std::endl; 66 | file << "virtual ~" << baseName << "() {}" << std::endl; 67 | file << "virtual void accept(" << baseName + "Visitor* visitor) = 0;" 68 | << std::endl; 69 | file << "};" << std::endl; 70 | 71 | // Derived concrete classes 72 | for (auto type : astSpec.second) { 73 | auto className = type.substr(0, type.find(":")); 74 | auto fields = type.substr(type.find(":") + 1, type.size()); 75 | defineType(file, baseName, className, fields); 76 | } 77 | 78 | /// #endif for #ifndef 79 | file << "#endif" << std::endl; 80 | 81 | file.close(); 82 | } 83 | void defineType(std::ofstream& file, const std::string& baseName, 84 | const std::string& className, const std::string fields) { 85 | file << "class " + className + " : public " + baseName + " { " 86 | << std::endl; 87 | file << "public: " << std::endl; 88 | file << className + "("; 89 | auto fieldList = so_utils::split(fields, ","); 90 | bool first = true; 91 | for (auto field : fieldList) { 92 | if (!first) 93 | file << ", "; 94 | if (first) 95 | first = false; 96 | auto fieldType = so_utils::split(field, " ")[0]; 97 | auto fieldName = so_utils::split(field, " ")[1]; 98 | if (!fieldType.compare(baseName)) { 99 | file << fieldType + "* " + fieldName; 100 | } else { 101 | file << fieldType + " " + fieldName; 102 | } 103 | } 104 | file << ") : "; 105 | first = true; 106 | for (auto field : fieldList) { 107 | if (!first) 108 | file << ", "; 109 | if (first) 110 | first = false; 111 | auto fieldName = so_utils::split(field, " ")[1]; 112 | file << fieldName + "(" + fieldName + ")"; 113 | } 114 | file << " {}" << std::endl; 115 | file << "void accept(" << baseName + "Visitor* visitor) override {" 116 | << std::endl; 117 | file << "visitor->visit" << className << "(this);" << std::endl; 118 | file << "}" << std::endl; 119 | file << "public: " << std::endl; 120 | for (auto field : fieldList) { 121 | auto fieldType = so_utils::split(field, " ")[0]; 122 | auto fieldName = so_utils::split(field, " ")[1]; 123 | if (!fieldType.compare(baseName)) { 124 | file << fieldType + "* " + fieldName + ";" << std::endl; 125 | } else { 126 | file << fieldType + " " + fieldName + ";" << std::endl; 127 | } 128 | } 129 | file << "};" << std::endl; 130 | } 131 | void defineVisitor(std::ofstream& file, const std::string& baseName) { 132 | auto visitorClassName = baseName + "Visitor"; 133 | file << "class " << visitorClassName << " {" << std::endl; 134 | file << "public:" << std::endl; 135 | file << "virtual ~" << visitorClassName << "() {}" << std::endl; 136 | for (auto type : astSpec.second) { 137 | auto className = type.substr(0, type.find(":")); 138 | file << "virtual void " 139 | << "visit" + className << "(" << className << "* " << baseName 140 | << ") = 0;" << std::endl; 141 | } 142 | file << "};" << std::endl; 143 | } 144 | 145 | private: 146 | const std::string outDir; 147 | const ASTSpecification astSpec; 148 | }; 149 | 150 | int main(int argc, char** argv) { 151 | if (argc != 2) { 152 | std::cout << "Usage: ast_generator " << std::endl; 153 | } else { 154 | const std::string outDir = argv[1]; 155 | const ASTGenerator::ASTSpecification astSpec = { 156 | "Expr", 157 | {"BinaryExpr :Expr left,Token Operator,Expr right", 158 | "GroupingExpr :Expr expression", "LiteralExpr :std::string value", 159 | "UnaryExpr :Token Operator,Expr right"}}; 160 | ASTGenerator astGenerator(outDir, astSpec); 161 | astGenerator.generate(); 162 | } 163 | return 0; 164 | } 165 | -------------------------------------------------------------------------------- /src/tools/ast_printer.hpp: -------------------------------------------------------------------------------- 1 | #include "../Expr.hpp" 2 | #include "../scanner/token.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace lox { 9 | class ASTPrinter : public ExprVisitor { 10 | public: 11 | void print(Expr* expr) { 12 | return expr->accept(this); 13 | } 14 | void visitBinaryExpr(BinaryExpr* expr) override { 15 | return parenthesize(expr->Operator.lexeme, 16 | {expr->left, expr->right}); 17 | } 18 | void visitGroupingExpr(GroupingExpr* expr) override { 19 | return parenthesize("group", {expr->expression}); 20 | } 21 | void visitLiteralExpr(LiteralExpr* expr) override { 22 | if (expr->value.empty()) 23 | std::cout << "nil"; 24 | std::cout << " " << expr->value; 25 | } 26 | void visitUnaryExpr(UnaryExpr* expr) override { 27 | return parenthesize(expr->Operator.lexeme, {expr->right}); 28 | } 29 | void parenthesize(std::string name, std::vector exprs) { 30 | std::string pp = "(" + name; 31 | // print 32 | std::cout << pp; 33 | for (auto expr : exprs) { 34 | expr->accept(this); 35 | } 36 | std::cout << ")"; 37 | } 38 | }; 39 | } // namespace lox 40 | 41 | /// EXAMPLE USE: 42 | // int main() { 43 | // std::unique_ptr rootExpr( 44 | // new BinaryExpr(new UnaryExpr(*new Token(TokenType::MINUS, "-", "", 45 | // 1), 46 | // new LiteralExpr("123")), 47 | // *new Token(TokenType::STAR, "*", "", 1), 48 | // new GroupingExpr(new LiteralExpr("45.67")))); 49 | // ASTPrinter pp; 50 | // pp.print(rootExpr.get()); 51 | // std::cout << std::endl; 52 | // return 0; 53 | // } 54 | --------------------------------------------------------------------------------