├── .github └── workflows │ └── build.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── forge.lua ├── lalr.forge └── src └── lalr ├── AddLexerActionHandler.hpp ├── AddLexerActionHandler.ipp ├── AddParserActionHandler.hpp ├── AddParserActionHandler.ipp ├── Associativity.hpp ├── ErrorCode.hpp ├── ErrorPolicy.cpp ├── ErrorPolicy.hpp ├── Grammar.cpp ├── Grammar.hpp ├── GrammarAction.cpp ├── GrammarAction.hpp ├── GrammarCompiler.cpp ├── GrammarCompiler.hpp ├── GrammarGenerator.cpp ├── GrammarGenerator.hpp ├── GrammarItem.hpp ├── GrammarItem.ipp ├── GrammarLookahead.hpp ├── GrammarLookahead.ipp ├── GrammarParser.cpp ├── GrammarParser.hpp ├── GrammarProduction.hpp ├── GrammarProduction.ipp ├── GrammarProductionLess.hpp ├── GrammarProductionLess.ipp ├── GrammarState.cpp ├── GrammarState.hpp ├── GrammarStateLess.hpp ├── GrammarStateLess.ipp ├── GrammarSymbol.cpp ├── GrammarSymbol.hpp ├── GrammarSymbol.ipp ├── GrammarSymbolSet.cpp ├── GrammarSymbolSet.hpp ├── GrammarTransition.cpp ├── GrammarTransition.hpp ├── LexemeType.hpp ├── Lexer.hpp ├── Lexer.ipp ├── LexerAction.hpp ├── LexerState.hpp ├── LexerStateMachine.hpp ├── LexerTransition.hpp ├── Parser.hpp ├── Parser.ipp ├── ParserAction.hpp ├── ParserNode.hpp ├── ParserNode.ipp ├── ParserState.hpp ├── ParserStateMachine.hpp ├── ParserSymbol.hpp ├── ParserTransition.hpp ├── ParserUserData.hpp ├── ParserUserData.ipp ├── PositionIterator.hpp ├── RegexAction.cpp ├── RegexAction.hpp ├── RegexCharacter.cpp ├── RegexCharacter.hpp ├── RegexCompiler.cpp ├── RegexCompiler.hpp ├── RegexGenerator.cpp ├── RegexGenerator.hpp ├── RegexItem.cpp ├── RegexItem.hpp ├── RegexNode.cpp ├── RegexNode.hpp ├── RegexNodeLess.cpp ├── RegexNodeLess.hpp ├── RegexNodeType.hpp ├── RegexParser.cpp ├── RegexParser.hpp ├── RegexState.cpp ├── RegexState.hpp ├── RegexStateLess.cpp ├── RegexStateLess.hpp ├── RegexSyntaxTree.cpp ├── RegexSyntaxTree.hpp ├── RegexToken.cpp ├── RegexToken.hpp ├── RegexTokenType.hpp ├── RegexTransition.cpp ├── RegexTransition.hpp ├── SymbolType.hpp ├── ThreadPool.cpp ├── ThreadPool.hpp ├── TransitionType.hpp ├── assert.hpp ├── block_comment.hpp ├── forge └── lalr │ ├── Lalrc.lua │ └── init.lua ├── lalr.forge ├── lalr_examples ├── error_handling_calculator.g ├── json.cpp ├── json.g ├── lalr_calculator_example.cpp ├── lalr_error_handling_calculator_example.cpp ├── lalr_examples.cpp ├── lalr_examples.forge ├── lalr_hello_world_example.cpp ├── lalr_json_example.cpp ├── lalr_json_example.json ├── lalr_xml_example.cpp ├── xml.cpp └── xml.g ├── lalr_test ├── TestParsers.cpp ├── TestPositionIterator.cpp ├── TestPrecedenceDirectives.cpp ├── TestRegularExpressions.cpp ├── lalr_test.forge └── main.cpp ├── lalrc ├── dot.cpp ├── dot.hpp ├── lalrc.cpp └── lalrc.forge ├── line_comment.hpp └── string_literal.hpp /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [macos-latest, windows-latest, ubuntu-latest] 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v1 18 | with: 19 | submodules: true 20 | - name: Install Forge 21 | run: | 22 | OS=$(echo ${RUNNER_OS} |tr '[:upper:]' '[:lower:]') 23 | curl -L https://github.com/cwbaker/forge/releases/download/v0.9.7/forge-${OS}-v0.9.7.tar.gz >forge.tar.gz 24 | tar -xvf forge.tar.gz 25 | shell: bash 26 | - name: Build 27 | run: | 28 | forge/bin/forge variant=release 29 | shell: bash 30 | - name: Test 31 | run: | 32 | release/bin/lalr_examples 33 | release/bin/lalr_test 34 | shell: bash 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | /bin 3 | /lib 4 | /obj 5 | /sweet/parser.html 6 | /sweet/local_settings.lua 7 | /sweet_parser.xcodeproj 8 | /sweet/sweet_parser.sln 9 | /*.sln 10 | /.vscode/ 11 | /.vs 12 | /out 13 | debug 14 | release 15 | shipping 16 | local_settings.lua 17 | *.sublime-project 18 | *.sublime-workspace 19 | *.xcodeproj 20 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "unittest-cpp"] 2 | path = src/unittest-cpp 3 | url = https://github.com/cwbaker/unittest-cpp.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(LALR) 2 | 3 | ### ---------------------- Static Library --------------------- ### 4 | file(GLOB SOURCES "./src/lalr/*.cpp") 5 | file(GLOB HEADERS "./src/lalr/*.hpp") 6 | add_library(${PROJECT_NAME} STATIC ${SOURCES} ${HEADERS}) 7 | target_include_directories(${PROJECT_NAME} PUBLIC ./src/) 8 | 9 | ### -------------------------- offline compiler -------------------------- ### 10 | file(GLOB COMPILER_SOURCES "./src/lalr/lalrc/*.cpp") 11 | add_executable(${PROJECT_NAME}Compiler ${COMPILER_SOURCES}) 12 | target_link_libraries(${PROJECT_NAME}Compiler ${PROJECT_NAME}) 13 | 14 | ### -------------------------- Tests -------------------------- ### 15 | add_subdirectory(src/unittest-cpp) 16 | 17 | enable_testing() 18 | file(GLOB TEST_SOURCES "./src/lalr/lalr_test/*.cpp") 19 | add_executable(${PROJECT_NAME}Tests ${TEST_SOURCES}) 20 | target_link_libraries(${PROJECT_NAME}Tests ${PROJECT_NAME} UnitTest++) 21 | add_test(NAME ${PROJECT_NAME}Tests COMMAND ${PROJECT_NAME}Tests) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2011-2018 Charles Baker 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /forge.lua: -------------------------------------------------------------------------------- 1 | 2 | local paths = { 3 | package.path; 4 | root( 'src/lalr/?.lua' ); 5 | root( 'src/lalr/?/init.lua' ); 6 | }; 7 | package.path = table.concat( paths, ';' ); 8 | 9 | variant = variant or 'debug'; 10 | 11 | local forge = require( 'forge' ):load( variant ); 12 | 13 | local cc = forge.Toolset 'cc_${platform}' { 14 | platform = operating_system(); 15 | bin = root( ('%s/bin'):format(variant) ); 16 | lib = root( ('%s/lib'):format(variant) ); 17 | obj = root( ('%s/obj'):format(variant) ); 18 | include_directories = { 19 | root( 'src' ); 20 | root( 'src/unittest-cpp' ); 21 | }; 22 | library_directories = { 23 | root( ('%s/lib'):format(variant) ); 24 | }; 25 | defines = { 26 | ('BUILD_PLATFORM_%s'):format( upper(platform) ); 27 | ('BUILD_VARIANT_%s'):format( upper(variant) ); 28 | ('BUILD_VERSION="\\"%s\\""'):format( version ); 29 | }; 30 | 31 | assertions = variant ~= 'shipping'; 32 | debug = variant ~= 'shipping'; 33 | debuggable = variant ~= 'shipping'; 34 | exceptions = true; 35 | fast_floating_point = variant ~= 'debug'; 36 | incremental_linking = variant == 'debug'; 37 | link_time_code_generation = variant == 'shipping'; 38 | minimal_rebuild = variant == 'debug'; 39 | optimization = variant ~= 'debug'; 40 | run_time_checks = variant == 'debug'; 41 | runtime_library = variant == 'debug' and 'static_debug' or 'static_release'; 42 | run_time_type_info = true; 43 | stack_size = 1048576; 44 | standard = 'c++14'; 45 | string_pooling = variant == 'shipping'; 46 | strip = false; 47 | warning_level = 3; 48 | warnings_as_errors = true; 49 | }; 50 | 51 | cc:install( 'forge.cc' ); 52 | 53 | -- Bump the C++ standard to c++14 when building on Windows as that is the 54 | -- lowest standard supported by Microsoft Visual C++. 55 | if cc.platform == 'windows' then 56 | cc.standard = 'c++14'; 57 | end 58 | 59 | local lalr = require 'forge.lalr'; 60 | cc:install( lalr ); 61 | 62 | buildfile 'src/lalr/lalr.forge'; 63 | buildfile 'src/unittest-cpp/unittest-cpp.forge'; 64 | 65 | cc:all { 66 | 'src/lalr/all', 67 | 'src/lalr/lalrc/all', 68 | 'src/lalr/lalr_examples/all', 69 | 'src/lalr/lalr_test/all' 70 | }; 71 | -------------------------------------------------------------------------------- /lalr.forge: -------------------------------------------------------------------------------- 1 | 2 | buildfile 'src/lalr/lalr.forge'; 3 | 4 | for _, cc in toolsets('^cc.*') do 5 | cc:all { 6 | 'src/lalr/all'; 7 | 'src/lalr/lalrc/all'; 8 | 'src/lalr/lalr_test/all'; 9 | }; 10 | end 11 | -------------------------------------------------------------------------------- /src/lalr/AddLexerActionHandler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ADDLEXERACTIONHANDLER_HPP_INCLUDED 2 | #define LALR_ADDLEXERACTIONHANDLER_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | template class PositionIterator; 10 | template class Lexer; 11 | 12 | /** 13 | // A helper that provides a convenient syntax for adding handlers to a %Lexer. 14 | */ 15 | template 16 | class AddLexerActionHandler 17 | { 18 | typedef std::function (const PositionIterator& begin, const PositionIterator& end, std::basic_string* lexeme, const void** symbol)> LexerActionFunction; 19 | Lexer* lexer_; ///< The Lexer to add handlers to. 20 | 21 | public: 22 | AddLexerActionHandler( Lexer* lexer ); 23 | const AddLexerActionHandler& operator()( const char* identifier, LexerActionFunction function ) const; 24 | }; 25 | 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /src/lalr/AddLexerActionHandler.ipp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ADDLEXERACTIONHANDLER_IPP_INCLUDED 2 | #define LALR_ADDLEXERACTIONHANDLER_IPP_INCLUDED 3 | 4 | #include "AddLexerActionHandler.hpp" 5 | #include "assert.hpp" 6 | 7 | namespace lalr 8 | { 9 | 10 | /** 11 | // Constructor. 12 | // 13 | // @param lexer 14 | // The %Lexer to add actions to (assumed not null). 15 | */ 16 | template 17 | AddLexerActionHandler::AddLexerActionHandler( Lexer* lexer ) 18 | : lexer_( lexer ) 19 | { 20 | LALR_ASSERT( lexer_ ); 21 | } 22 | 23 | 24 | /** 25 | // Set the function to call when the lexer action \e identifier is taken. 26 | // 27 | // @param identifier 28 | // The identifier of the lexer action to assign a function to (matches the 29 | // identifier specified between ':' characters in a regular expression). 30 | // 31 | // @param function 32 | // The function to call when the lexer action needs to be taken. 33 | // 34 | // @return 35 | // This %AddLexerActionHandler. 36 | */ 37 | template 38 | const AddLexerActionHandler& 39 | AddLexerActionHandler::operator()( const char* identifier, LexerActionFunction function ) const 40 | { 41 | LALR_ASSERT( identifier ); 42 | LALR_ASSERT( lexer_ ); 43 | lexer_->set_action_handler( identifier, function ); 44 | return *this; 45 | } 46 | 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/lalr/AddParserActionHandler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ADDPARSERACTIONHANDLER_HPP_INCLUDED 2 | #define LALR_ADDPARSERACTIONHANDLER_HPP_INCLUDED 3 | 4 | #include "ParserNode.hpp" 5 | #include 6 | 7 | namespace lalr 8 | { 9 | 10 | class ParserSymbol; 11 | template class Parser; 12 | 13 | /** 14 | // A helper that provides a convenient syntax for adding handlers to a %Parser. 15 | */ 16 | template 17 | class AddParserActionHandler 18 | { 19 | typedef std::function* nodes, size_t length)> ParserActionFunction; 20 | Parser* parser_; ///< The Parser to add handlers to. 21 | 22 | public: 23 | AddParserActionHandler( Parser* parser ); 24 | const AddParserActionHandler& default_action( ParserActionFunction function ) const; 25 | const AddParserActionHandler& operator()( const char* identifier, ParserActionFunction function ) const; 26 | }; 27 | 28 | } 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/lalr/AddParserActionHandler.ipp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ADDHANDLER_IPP_INCLUDED 2 | #define LALR_ADDHANDLER_IPP_INCLUDED 3 | 4 | #include "AddParserActionHandler.hpp" 5 | #include "assert.hpp" 6 | 7 | namespace lalr 8 | { 9 | 10 | /** 11 | // Constructor. 12 | // 13 | // @param parser 14 | // The %Parser to add actions to (assumed not null). 15 | */ 16 | template 17 | AddParserActionHandler::AddParserActionHandler( Parser* parser ) 18 | : parser_( parser ) 19 | { 20 | LALR_ASSERT( parser_ ); 21 | } 22 | 23 | /** 24 | // Set the function to call for the default action (taken on a reduction when 25 | // no action is specified in the grammar). 26 | // 27 | // @param function 28 | // The function to call for the default action. 29 | // 30 | // @return 31 | // This %AddParserActionHandler. 32 | */ 33 | template 34 | const AddParserActionHandler& 35 | AddParserActionHandler::default_action( ParserActionFunction function ) const 36 | { 37 | LALR_ASSERT( parser_ ); 38 | parser_->set_default_action_handler( function ); 39 | return *this; 40 | } 41 | 42 | /** 43 | // Set the function to call when the %parser action \e identifier is taken. 44 | // 45 | // @param identifier 46 | // The identifier of the %parser action to assign a function to (matches the 47 | // identifier specified between '[' and ']' characters at the end of a 48 | // production). 49 | // 50 | // @param function 51 | // The function to call when the %parser action needs to be taken. 52 | // 53 | // @return 54 | // This AddParserActionHandler. 55 | */ 56 | template 57 | const AddParserActionHandler& 58 | AddParserActionHandler::operator()( const char* identifier, ParserActionFunction function ) const 59 | { 60 | LALR_ASSERT( identifier ); 61 | LALR_ASSERT( parser_ ); 62 | parser_->set_action_handler( identifier, function ); 63 | return *this; 64 | } 65 | 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /src/lalr/Associativity.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ASSOCIATIVITY_HPP_INCLUDED 2 | #define LALR_ASSOCIATIVITY_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // The associativity of a symbol. 9 | // 10 | // @relates Symbol 11 | */ 12 | enum Associativity 13 | { 14 | ASSOCIATE_NULL, ///< The symbol has undefined associativity. 15 | ASSOCIATE_NONE, ///< The symbol has no associativity. 16 | ASSOCIATE_LEFT, ///< The symbol associates to the left. 17 | ASSOCIATE_RIGHT ///< The symbol associates to the right. 18 | }; 19 | 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/lalr/ErrorCode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ERRORCODE_HPP_INCLUDED 2 | #define LALR_ERRORCODE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // Error codes reported by the %parser library. 9 | */ 10 | enum ErrorCode 11 | { 12 | PARSER_ERROR_NONE, ///< No %error. 13 | LALR_ERROR_SYNTAX, ///< Syntax %error occured while parsing input. 14 | LALR_ERROR_UNTERMINATED_LITERAL, ///< Unterminated literal in an lalr grammar. 15 | LALR_ERROR_EMPTY_LITERAL, ///< Empty literal in an lalr grammar. 16 | LEXER_ERROR_MISSING_ACTION_HANDLER, ///< A lexer action hasn't been bound to a function. 17 | LEXER_ERROR_SYNTAX, ///< Syntax %error occured while parsing some input. 18 | LEXER_ERROR_SYMBOL_CONFLICT, ///< A lexer state matches more than one symbol. 19 | LEXER_ERROR_LEXICAL_ERROR, ///< A lexical error occured while scanning an input sequence. 20 | PARSER_ERROR_OPENING_FILE_FAILED, ///< Opening a grammar file failed. 21 | PARSER_ERROR_PARSING_FAILED, ///< Parsing a grammar failed. 22 | PARSER_ERROR_UNEXPECTED, ///< An unexpected %error occured. 23 | PARSER_ERROR_SYNTAX, ///< Syntax %error occured while parsing some input. 24 | PARSER_ERROR_PARSE_TABLE_CONFLICT, ///< A shift-reduce or reduce-reduce conflict was found in the parse table. 25 | PARSER_ERROR_UNDEFINED_SYMBOL, ///< A grammar symbol is referenced but not defined. 26 | PARSER_ERROR_UNREFERENCED_SYMBOL, ///< A grammar symbol is defined but not referenced. 27 | PARSER_ERROR_ERROR_SYMBOL_ON_LEFT_HAND_SIDE, ///< The 'error' symbol has been used on the left hand side of a production. 28 | PARSER_ERROR_DUPLICATE_ASSOCIATION_ON_IMPLICIT_TERMINAL ///< Both implicit terminal forms specify associativity and precedence. 29 | }; 30 | 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lalr/ErrorPolicy.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ErrorPolicy.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "ErrorPolicy.hpp" 7 | #include 8 | 9 | using namespace lalr; 10 | 11 | /** 12 | // Destructor. 13 | */ 14 | ErrorPolicy::~ErrorPolicy() 15 | { 16 | } 17 | 18 | /** 19 | // An %error has occured in the *lalr* library. 20 | // 21 | // @param line 22 | // The line number that the %error occured on. 23 | // 24 | // @param column 25 | // The column number that the %error occured on. 26 | // 27 | // @param error 28 | // The error code. 29 | // 30 | // @param format 31 | // A printf-style format string that describes the error. 32 | // 33 | // @param ... 34 | // Arguments as described by *format*. 35 | */ 36 | void ErrorPolicy::lalr_error( int line, int column, int /*error*/, const char* format, va_list args ) 37 | { 38 | fprintf( stderr, "lalr (%d:%d): ERROR: ", line, column ); 39 | vfprintf( stderr, format, args ); 40 | fprintf( stderr, "\n" ); 41 | } 42 | 43 | /** 44 | // Debug output has been sent from the *lalr* library. 45 | // 46 | // @param format 47 | // The printf-style format string that describes the text to print. 48 | // 49 | // @param args 50 | // Arguments as described by \e format. 51 | */ 52 | void ErrorPolicy::lalr_vprintf( const char* format, va_list args ) 53 | { 54 | printf( "lalr: " ); 55 | vprintf( format, args ); 56 | } 57 | -------------------------------------------------------------------------------- /src/lalr/ErrorPolicy.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ERRORPOLICY_HPP_INCLUDED 2 | #define LALR_ERRORPOLICY_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | /** 10 | // An interface to be implemented by classes that wish to be notified of 11 | // errors and %debug information from the *lalr* library. 12 | */ 13 | class ErrorPolicy 14 | { 15 | public: 16 | virtual ~ErrorPolicy(); 17 | virtual void lalr_error( int line, int column, int error, const char* format, va_list args ); 18 | virtual void lalr_vprintf( const char* format, va_list args ); 19 | }; 20 | 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/lalr/Grammar.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMAR_HPP_INCLUDED 2 | #define LALR_GRAMMAR_HPP_INCLUDED 3 | 4 | #include "SymbolType.hpp" 5 | #include "LexemeType.hpp" 6 | #include "Associativity.hpp" 7 | #include "RegexToken.hpp" 8 | #include 9 | #include 10 | 11 | namespace lalr 12 | { 13 | 14 | class GrammarSymbol; 15 | class GrammarProduction; 16 | class GrammarAction; 17 | class LexerErrorPolicy; 18 | class RegexCompiler; 19 | class ParserErrorPolicy; 20 | class GrammarCompiler; 21 | class ParserStateMachine; 22 | 23 | class Grammar 24 | { 25 | std::string identifier_; 26 | std::vector> symbols_; ///< The symbols in the grammar. 27 | std::vector> productions_; ///< The productions in the grammar. 28 | std::vector> actions_; ///< The actions in the grammar. 29 | std::vector whitespace_tokens_; ///< Regular expressions that define whitespace in this grammar. 30 | bool active_whitespace_directive_; ///< True iff a whitespace directive is active. 31 | bool active_precedence_directive_; ///< True iff a precedence directive is active. 32 | Associativity associativity_; ///< Most recently set associativity. 33 | int precedence_; ///< Current precedence. 34 | GrammarProduction* active_production_; ///< Currently active production. 35 | GrammarSymbol* active_symbol_; ///< Currently active symbol. 36 | GrammarSymbol* start_symbol_; ///< The start symbol. 37 | GrammarSymbol* end_symbol_; ///< The end symbol. 38 | GrammarSymbol* error_symbol_; ///< The error symbol. 39 | GrammarSymbol* whitespace_symbol_; ///< The whitespace symbol. 40 | 41 | public: 42 | Grammar(); 43 | ~Grammar(); 44 | const std::string& identifier() const; 45 | std::vector>& symbols(); 46 | std::vector>& productions(); 47 | std::vector>& actions(); 48 | const std::vector& whitespace_tokens() const; 49 | GrammarSymbol* start_symbol() const; 50 | GrammarSymbol* end_symbol() const; 51 | GrammarSymbol* error_symbol() const; 52 | GrammarSymbol* whitespace_symbol() const; 53 | Grammar& grammar( const std::string& identifier ); 54 | Grammar& left( int line ); 55 | Grammar& right( int line ); 56 | Grammar& none( int line ); 57 | Grammar& whitespace(); 58 | Grammar& precedence(); 59 | Grammar& production( const char* identifier, int line, int column ); 60 | Grammar& end_production(); 61 | Grammar& end_expression( int line, int column ); 62 | Grammar& error( int line, int column ); 63 | Grammar& action( const char* identifier, int line, int column ); 64 | Grammar& literal( const char* literal, int line, int column ); 65 | Grammar& regex( const char* regex, int line, int column ); 66 | Grammar& identifier( const char* identifier, int line, int column ); 67 | 68 | private: 69 | GrammarSymbol* literal_symbol( const char* lexeme, int line , int column ); 70 | GrammarSymbol* regex_symbol( const char* lexeme, int line , int column ); 71 | GrammarSymbol* non_terminal_symbol( const char* lexeme, int line , int column ); 72 | GrammarSymbol* add_symbol( const char* lexeme, int line, int column, LexemeType lexeme_type, SymbolType symbol_type ); 73 | GrammarProduction* add_production( GrammarSymbol* symbol, int line , int column ); 74 | GrammarAction* add_action( const char* id ); 75 | }; 76 | 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /src/lalr/GrammarAction.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarAction.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "GrammarAction.hpp" 7 | #include "assert.hpp" 8 | 9 | using namespace lalr; 10 | 11 | /** 12 | // Constructor. 13 | // 14 | // @param index 15 | // The index of this action (assumed >= 0). 16 | // 17 | // @param identifier 18 | // The identifier of this action (assumed not empty). 19 | */ 20 | GrammarAction::GrammarAction( int index, const std::string& identifier ) 21 | : index_( index ) 22 | , identifier_( identifier ) 23 | { 24 | LALR_ASSERT( index_ >= 0 ); 25 | LALR_ASSERT( !identifier_.empty() ); 26 | } 27 | 28 | int GrammarAction::index() const 29 | { 30 | return index_; 31 | } 32 | 33 | const std::string& GrammarAction::identifier() const 34 | { 35 | return identifier_; 36 | } 37 | -------------------------------------------------------------------------------- /src/lalr/GrammarAction.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMARACTION_HPP_INCLUDED 2 | #define LALR_GRAMMARACTION_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | /** 10 | // An action that is attached to a %parser reduction. 11 | */ 12 | class GrammarAction 13 | { 14 | int index_; ///< The index of this action. 15 | std::string identifier_; ///< The identifier of this action. 16 | 17 | public: 18 | GrammarAction( int index, const std::string& identifier ); 19 | int index() const; 20 | const std::string& identifier() const; 21 | static const int INVALID_INDEX = -1; 22 | }; 23 | 24 | } 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/lalr/GrammarCompiler.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarCompiler.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "GrammarCompiler.hpp" 7 | #include "Grammar.hpp" 8 | #include "GrammarParser.hpp" 9 | #include "GrammarGenerator.hpp" 10 | #include "GrammarSymbol.hpp" 11 | #include "GrammarAction.hpp" 12 | #include "GrammarState.hpp" 13 | #include "GrammarTransition.hpp" 14 | #include "RegexGenerator.hpp" 15 | #include "RegexToken.hpp" 16 | #include "ParserStateMachine.hpp" 17 | #include "ParserSymbol.hpp" 18 | #include "ParserState.hpp" 19 | #include "ParserAction.hpp" 20 | #include "ParserTransition.hpp" 21 | #include "RegexCompiler.hpp" 22 | #include "ErrorPolicy.hpp" 23 | #include "assert.hpp" 24 | #include 25 | #include 26 | 27 | using std::set; 28 | using std::vector; 29 | using std::copy; 30 | using std::back_inserter; 31 | using std::unique_ptr; 32 | using std::shared_ptr; 33 | using namespace lalr; 34 | 35 | GrammarCompiler::GrammarCompiler() 36 | : strings_() 37 | , actions_() 38 | , symbols_() 39 | , transitions_() 40 | , states_() 41 | , lexer_() 42 | , whitespace_lexer_() 43 | , parser_state_machine_() 44 | , labels_enabled_{ false } 45 | { 46 | lexer_.reset( new RegexCompiler ); 47 | whitespace_lexer_.reset( new RegexCompiler ); 48 | parser_state_machine_.reset( new ParserStateMachine ); 49 | memset( parser_state_machine_.get(), 0, sizeof(*parser_state_machine_) ); 50 | } 51 | 52 | GrammarCompiler::~GrammarCompiler() 53 | { 54 | } 55 | 56 | const RegexCompiler* GrammarCompiler::lexer() const 57 | { 58 | return lexer_.get(); 59 | } 60 | 61 | const RegexCompiler* GrammarCompiler::whitespace_lexer() const 62 | { 63 | return whitespace_lexer_.get(); 64 | } 65 | 66 | const ParserStateMachine* GrammarCompiler::parser_state_machine() const 67 | { 68 | return parser_state_machine_.get(); 69 | } 70 | 71 | void GrammarCompiler::labels_enabled( bool enabled ) 72 | { 73 | labels_enabled_ = enabled; 74 | } 75 | 76 | int GrammarCompiler::compile( const char* begin, const char* end, ErrorPolicy* error_policy ) 77 | { 78 | Grammar grammar; 79 | 80 | GrammarParser parser; 81 | int errors = parser.parse( begin, end, error_policy, &grammar ); 82 | if ( errors == 0 ) 83 | { 84 | GrammarGenerator generator; 85 | errors = generator.generate( grammar, error_policy ); 86 | 87 | if ( errors == 0 ) 88 | { 89 | populate_parser_state_machine( grammar, generator ); 90 | 91 | // Generate tokens for generating the lexical analyzer from each of 92 | // the terminal symbols in the grammar. 93 | vector tokens; 94 | int column = 1; 95 | const vector>& grammar_symbols = generator.symbols(); 96 | for ( size_t i = 0; i < grammar_symbols.size(); ++i, ++column ) 97 | { 98 | const GrammarSymbol* grammar_symbol = grammar_symbols[i].get(); 99 | LALR_ASSERT( grammar_symbol ); 100 | if ( grammar_symbol->symbol_type() == SYMBOL_TERMINAL ) 101 | { 102 | const ParserSymbol* symbol = &symbols_[i]; 103 | LALR_ASSERT( symbol ); 104 | int line = grammar_symbol->line(); 105 | RegexTokenType token_type = grammar_symbol->lexeme_type() == LEXEME_REGULAR_EXPRESSION ? TOKEN_REGULAR_EXPRESSION : TOKEN_LITERAL; 106 | tokens.emplace_back( RegexToken(token_type, line, column, symbol, symbol->lexeme) ); 107 | } 108 | } 109 | 110 | errors += lexer_->compile( tokens, error_policy ); 111 | if ( errors == 0 ) 112 | { 113 | parser_state_machine_->lexer_state_machine = lexer_->state_machine(); 114 | } 115 | 116 | const vector& grammar_whitespace_tokens = grammar.whitespace_tokens(); 117 | if ( !grammar_whitespace_tokens.empty() ) 118 | { 119 | const int WHITESPACE_SYMBOL = 3; 120 | const ParserSymbol* whitespace_symbol = &symbols_[WHITESPACE_SYMBOL]; 121 | LALR_ASSERT( whitespace_symbol ); 122 | 123 | vector whitespace_tokens; 124 | whitespace_tokens.reserve( grammar_whitespace_tokens.size() ); 125 | for ( const RegexToken& token : grammar_whitespace_tokens ) 126 | { 127 | whitespace_tokens.emplace_back( RegexToken(token, whitespace_symbol) ); 128 | } 129 | 130 | errors += whitespace_lexer_->compile( whitespace_tokens, error_policy ); 131 | if ( errors == 0 ) 132 | { 133 | parser_state_machine_->whitespace_lexer_state_machine = whitespace_lexer_->state_machine(); 134 | } 135 | } 136 | } 137 | } 138 | return errors; 139 | } 140 | 141 | const char* GrammarCompiler::add_string( const std::string& string ) 142 | { 143 | strings_.push_back( string ); 144 | return strings_.back().c_str(); 145 | } 146 | 147 | void GrammarCompiler::set_actions( std::unique_ptr& actions, int actions_size ) 148 | { 149 | LALR_ASSERT( parser_state_machine_ ); 150 | LALR_ASSERT( actions || actions_size == 0 ); 151 | LALR_ASSERT( actions_size >= 0 ); 152 | actions_ = std::move( actions ); 153 | parser_state_machine_->actions_size = actions_size; 154 | parser_state_machine_->actions = actions_.get(); 155 | } 156 | 157 | void GrammarCompiler::set_symbols( std::unique_ptr& symbols, int symbols_size ) 158 | { 159 | LALR_ASSERT( parser_state_machine_ ); 160 | LALR_ASSERT( symbols ); 161 | LALR_ASSERT( symbols_size >= 3 ); 162 | symbols_ = std::move( symbols ); 163 | parser_state_machine_->symbols_size = symbols_size; 164 | parser_state_machine_->symbols = symbols_.get(); 165 | parser_state_machine_->start_symbol = &symbols_[0]; 166 | parser_state_machine_->end_symbol = &symbols_[1]; 167 | parser_state_machine_->error_symbol = &symbols_[2]; 168 | parser_state_machine_->whitespace_symbol = &symbols_[3]; 169 | } 170 | 171 | void GrammarCompiler::set_transitions( std::unique_ptr& transitions, int transitions_size ) 172 | { 173 | LALR_ASSERT( transitions ); 174 | LALR_ASSERT( transitions_size >= 0 ); 175 | transitions_ = std::move( transitions ); 176 | parser_state_machine_->transitions_size = transitions_size; 177 | parser_state_machine_->transitions = transitions_.get(); 178 | } 179 | 180 | void GrammarCompiler::set_states( std::unique_ptr& states, int states_size, const ParserState* start_state ) 181 | { 182 | LALR_ASSERT( states ); 183 | LALR_ASSERT( states_size >= 0 ); 184 | LALR_ASSERT( start_state ); 185 | states_ = std::move( states ); 186 | parser_state_machine_->states_size = states_size; 187 | parser_state_machine_->states = states_.get(); 188 | parser_state_machine_->start_state = start_state; 189 | } 190 | 191 | void GrammarCompiler::populate_parser_state_machine( const Grammar& grammar, const GrammarGenerator& generator ) 192 | { 193 | const vector>& grammar_actions = generator.actions(); 194 | int actions_size = int(grammar_actions.size()); 195 | unique_ptr actions( new ParserAction [actions_size] ); 196 | for ( int i = 0; i < actions_size; ++i ) 197 | { 198 | const GrammarAction* grammar_action = grammar_actions[i].get(); 199 | LALR_ASSERT( grammar_action ); 200 | ParserAction* action = &actions[i]; 201 | LALR_ASSERT( action ); 202 | action->index = grammar_action->index(); 203 | action->identifier = add_string( grammar_action->identifier() ); 204 | } 205 | 206 | const vector>& grammar_symbols = generator.symbols(); 207 | int symbols_size = int(grammar_symbols.size()); 208 | unique_ptr symbols( new ParserSymbol [symbols_size] ); 209 | for ( int i = 0; i < symbols_size; ++i ) 210 | { 211 | const GrammarSymbol* source_symbol = grammar_symbols[i].get(); 212 | LALR_ASSERT( source_symbol ); 213 | ParserSymbol* symbol = &symbols[i]; 214 | LALR_ASSERT( symbol ); 215 | symbol->index = source_symbol->index(); 216 | symbol->identifier = add_string( source_symbol->identifier() ); 217 | symbol->lexeme = add_string( source_symbol->lexeme() ); 218 | symbol->label = source_symbol->literal() ? symbol->lexeme : symbol->identifier; 219 | symbol->type = source_symbol->symbol_type(); 220 | } 221 | 222 | const set, GrammarStateLess>& grammar_states = generator.states(); 223 | int states_size = int(grammar_states.size()); 224 | unique_ptr states( new ParserState [states_size] ); 225 | 226 | int transitions_size = 0; 227 | for ( auto i = grammar_states.begin(); i != grammar_states.end(); ++i ) 228 | { 229 | const GrammarState* grammar_state = i->get(); 230 | LALR_ASSERT( grammar_state ); 231 | transitions_size += grammar_state->count_valid_transitions(); 232 | } 233 | unique_ptr transitions( new ParserTransition [transitions_size] ); 234 | 235 | const ParserState* start_state = nullptr; 236 | int state_index = 0; 237 | int transition_index = 0; 238 | for ( auto i = grammar_states.begin(); i != grammar_states.end(); ++i ) 239 | { 240 | const GrammarState* grammar_state = i->get(); 241 | LALR_ASSERT( grammar_state ); 242 | ParserState* state = &states[state_index]; 243 | LALR_ASSERT( state ); 244 | const vector& source_transitions = grammar_state->transitions(); 245 | state->index = state_index; 246 | state->length = grammar_state->count_valid_transitions(); 247 | state->transitions = &transitions[transition_index]; 248 | state->label = labels_enabled_ ? add_string( generator.label_state(*grammar_state) ) : nullptr; 249 | if ( grammar_state == generator.start_state() ) 250 | { 251 | start_state = state; 252 | } 253 | for ( auto j = source_transitions.begin(); j != source_transitions.end(); ++j ) 254 | { 255 | const GrammarTransition* source_transition = *j; 256 | if ( source_transition ) 257 | { 258 | const GrammarSymbol* source_symbol = source_transition->symbol(); 259 | LALR_ASSERT( source_symbol ); 260 | const GrammarState* state_transitioned_to = source_transition->state(); 261 | const GrammarSymbol* reduced_symbol = source_transition->reduced_symbol(); 262 | ParserTransition* transition = &transitions[transition_index]; 263 | transition->symbol = &symbols[source_symbol->index()]; 264 | transition->state = state_transitioned_to ? &states[state_transitioned_to->index()] : nullptr; 265 | transition->reduced_symbol = reduced_symbol ? &symbols[reduced_symbol->index()] : nullptr; 266 | transition->reduced_length = source_transition->reduced_length(); 267 | transition->precedence = source_transition->precedence(); 268 | transition->action = source_transition->action(); 269 | transition->index = transition_index; 270 | ++transition_index; 271 | } 272 | } 273 | ++state_index; 274 | } 275 | 276 | parser_state_machine_->identifier = add_string( grammar.identifier() ); 277 | set_actions( actions, actions_size ); 278 | set_symbols( symbols, symbols_size ); 279 | set_transitions( transitions, transitions_size ); 280 | set_states( states, states_size, start_state ); 281 | } 282 | -------------------------------------------------------------------------------- /src/lalr/GrammarCompiler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMARCOMPILER_HPP_INCLUDED 2 | #define LALR_GRAMMARCOMPILER_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | class ErrorPolicy; 12 | class Grammar; 13 | class GrammarState; 14 | class GrammarItem; 15 | class GrammarGenerator; 16 | class ParserAction; 17 | class ParserSymbol; 18 | class ParserTransition; 19 | class ParserState; 20 | class ParserStateMachine; 21 | class RegexCompiler; 22 | 23 | class GrammarCompiler 24 | { 25 | std::deque strings_; 26 | std::unique_ptr actions_; ///< The parser actions for this ParserStateMachine. 27 | std::unique_ptr symbols_; ///< The symbols in the grammar for this ParserStateMachine. 28 | std::unique_ptr transitions_; ///< The transitions in the state machine for this ParserStateMachine. 29 | std::unique_ptr states_; ///< The states in the state machine for this ParserStateMachine. 30 | std::unique_ptr lexer_; ///< Allocated lexer state machine. 31 | std::unique_ptr whitespace_lexer_; ///< Allocated whitespace lexer state machine. 32 | std::unique_ptr parser_state_machine_; ///< Allocated parser state machine. 33 | bool labels_enabled_; ///< True to generate labels for states and symbols. 34 | 35 | public: 36 | GrammarCompiler(); 37 | ~GrammarCompiler(); 38 | const RegexCompiler* lexer() const; 39 | const RegexCompiler* whitespace_lexer() const; 40 | const ParserStateMachine* parser_state_machine() const; 41 | void labels_enabled( bool enabled ); 42 | int compile( const char* begin, const char* end, ErrorPolicy* error_policy = nullptr ); 43 | 44 | private: 45 | const char* add_string( const std::string& string ); 46 | void set_actions( std::unique_ptr& actions, int actions_size ); 47 | void set_symbols( std::unique_ptr& symbols, int symbols_size ); 48 | void set_transitions( std::unique_ptr& transitions, int transitions_size ); 49 | void set_states( std::unique_ptr& states, int states_size, const ParserState* start_state ); 50 | void populate_parser_state_machine( const Grammar& grammar, const GrammarGenerator& generator ); 51 | }; 52 | 53 | } 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /src/lalr/GrammarGenerator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMARGENERATOR_HPP_INCLUDED 2 | #define LALR_GRAMMARGENERATOR_HPP_INCLUDED 3 | 4 | #include "RegexToken.hpp" 5 | #include "GrammarStateLess.hpp" 6 | #include "GrammarProductionLess.hpp" 7 | #include "GrammarSymbolSet.hpp" 8 | #include "GrammarLookahead.hpp" 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace lalr 15 | { 16 | 17 | class ErrorPolicy; 18 | #ifndef LALR_NO_THREADS 19 | class ThreadPool; 20 | #endif 21 | class LexerStateMachine; 22 | class GrammarCompiler; 23 | class GrammarAction; 24 | class GrammarSymbol; 25 | class GrammarTransition; 26 | class GrammarItem; 27 | class GrammarState; 28 | class GrammarProduction; 29 | class Grammar; 30 | 31 | /** 32 | // @internal 33 | // 34 | // %Parser state machine generator. 35 | */ 36 | class GrammarGenerator 37 | { 38 | ErrorPolicy* error_policy_; ///< The event sink to report errors to and print with or null to ignore errors and prints. 39 | #ifndef LALR_NO_THREADS 40 | ThreadPool* thread_pool_; ///< The pool of threads to use to generate the parser. 41 | #endif 42 | std::string identifier_; ///< The identifier of the parser. 43 | std::vector> actions_; ///< The actions in the parser. 44 | std::vector> productions_; ///< The productions in the parser. 45 | std::vector> symbols_; ///< The symbols in the parser. 46 | std::set, GrammarStateLess> states_; ///< The states in the parser's state machine. 47 | std::vector> transitions_; ///< The transitions in the parser. 48 | std::vector lookaheads_; // Lookaheads for each item in each state. 49 | GrammarSymbol* start_symbol_; ///< The start symbol. 50 | GrammarSymbol* end_symbol_; ///< The end symbol. 51 | GrammarSymbol* error_symbol_; ///< The error symbol. 52 | GrammarSymbol* whitespace_symbol_; ///< The whitespace symbol. 53 | GrammarState* start_state_; ///< The start state. 54 | int errors_; ///< The number of errors that occured during parsing and generation. 55 | 56 | public: 57 | GrammarGenerator(); 58 | ~GrammarGenerator(); 59 | const std::vector>& actions() const; 60 | const std::vector>& symbols() const; 61 | const std::set, GrammarStateLess>& states() const; 62 | const std::vector>& transitions() const; 63 | const GrammarState* start_state() const; 64 | const GrammarSymbol* whitespace_symbol() const; 65 | std::string label_state( const GrammarState& state ) const; 66 | std::string label_item( const GrammarItem& item ) const; 67 | int generate( Grammar& grammar, ErrorPolicy* error_policy ); 68 | 69 | private: 70 | void error( int line, int error, const char* format, ... ); 71 | GrammarTransition* shift_transition( const GrammarSymbol* symbol, GrammarState* state ); 72 | GrammarTransition* reduce_transition( const GrammarSymbol* symbol, const GrammarProduction* production ); 73 | GrammarSymbolSet spontaneous_lookaheads( const GrammarItem& item ) const; 74 | void closure( const std::shared_ptr& state ); 75 | std::shared_ptr goto_( const GrammarState* state, const GrammarSymbol& symbol ); 76 | void replace_references_to_symbol( GrammarSymbol* to_symbol, GrammarSymbol* with_symbol ); 77 | void check_for_undefined_symbol_errors(); 78 | void check_for_unreferenced_symbol_errors(); 79 | void check_for_error_symbol_on_left_hand_side_errors(); 80 | void check_for_implicit_terminal_duplicate_associativity(); 81 | void calculate_identifiers(); 82 | void calculate_terminal_and_non_terminal_symbols(); 83 | void calculate_implicit_terminal_symbols(); 84 | void calculate_precedence_of_productions(); 85 | void calculate_symbol_indices(); 86 | void calculate_first(); 87 | void calculate_follow(); 88 | void calculate_reachable_productions(); 89 | void calculate_reachable_productions_for_symbol( const GrammarSymbol& symbol, std::set* productions ); 90 | void generate_spontaneous_lookaheads(); 91 | void generate_goto_items(); 92 | void generate_propagated_lookaheads(); 93 | void generate_states( const GrammarSymbol* start_symbol, const GrammarSymbol* end_symbol, const std::vector>& symbols ); 94 | void generate_indices_for_states(); 95 | void generate_reduce_transitions(); 96 | void generate_reduce_transition( GrammarState* state, const GrammarSymbol* symbol, const GrammarProduction* production ); 97 | void generate_indices_for_transitions(); 98 | }; 99 | 100 | } 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /src/lalr/GrammarItem.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace lalr 4 | { 5 | 6 | class GrammarProduction; 7 | 8 | /** 9 | // An item that defines the positions in the grammar that a state 10 | // represents. 11 | */ 12 | class GrammarItem 13 | { 14 | unsigned short production_; ///< The index of the production of this item. 15 | unsigned short position_; ///< The position of the dot in this item. 16 | mutable int index_; 17 | 18 | public: 19 | inline GrammarItem(); 20 | inline GrammarItem( GrammarProduction* production, int position ); 21 | inline int index() const; 22 | inline int production() const; 23 | inline int position() const; 24 | inline bool dot_at_beginning() const; 25 | inline bool operator<( const GrammarItem& item ) const; 26 | inline void set_index( int index ) const; 27 | }; 28 | 29 | } 30 | 31 | #include "GrammarItem.ipp" 32 | -------------------------------------------------------------------------------- /src/lalr/GrammarItem.ipp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarItem.ipp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | #pragma once 6 | 7 | #include "GrammarItem.hpp" 8 | #include "GrammarSymbol.hpp" 9 | #include "GrammarProduction.hpp" 10 | #include "assert.hpp" 11 | #include 12 | #include 13 | 14 | namespace lalr 15 | { 16 | 17 | /** 18 | // Constructor. 19 | */ 20 | GrammarItem::GrammarItem() 21 | : production_( 0 ) 22 | , position_( 0 ) 23 | , index_( 0 ) 24 | { 25 | } 26 | 27 | /** 28 | // Constructor. 29 | // 30 | // @param production 31 | // The production that this item is for. 32 | // 33 | // @param position 34 | // The position of the dot in this item. 35 | */ 36 | GrammarItem::GrammarItem( GrammarProduction* production, int position ) 37 | : production_( production->index() ) 38 | , position_( position ) 39 | , index_( 0 ) 40 | { 41 | using std::numeric_limits; 42 | LALR_ASSERT( production_ >= 0 && production_ < numeric_limits::max() ); 43 | LALR_ASSERT( position_ >= 0 && position_ < numeric_limits::max() ); 44 | LALR_ASSERT( position_ >= 0 && position_ < production->length() + 1 ); 45 | } 46 | 47 | /** 48 | // Get the index of this item. 49 | // 50 | // @return 51 | // The index of this item. 52 | */ 53 | int GrammarItem::index() const 54 | { 55 | return index_; 56 | } 57 | 58 | /** 59 | // Get the index of the production for this item. 60 | */ 61 | int GrammarItem::production() const 62 | { 63 | return production_; 64 | } 65 | 66 | /** 67 | // Get the position of the dot in this item. 68 | // 69 | // @return 70 | // The position of the dot in this item. 71 | */ 72 | int GrammarItem::position() const 73 | { 74 | return position_; 75 | } 76 | 77 | /** 78 | // Is the dot that marks the position of this item at the beginning of the 79 | // production? 80 | // 81 | // @return 82 | // True if the dot is at the beginning of the production otherwise false. 83 | */ 84 | bool GrammarItem::dot_at_beginning() const 85 | { 86 | return position_ == 0; 87 | } 88 | 89 | /** 90 | // Less than operator. 91 | // 92 | // @param item 93 | // The item to compare this item with. 94 | // 95 | // @return 96 | // True if this item's production is less than \e item's or if this item's 97 | // production is the same as \e item's then if this item's next nodes 98 | // are lexically less than item's otherwise false. 99 | */ 100 | bool GrammarItem::operator<( const GrammarItem& item ) const 101 | { 102 | return 103 | production_ < item.production_ || 104 | (production_ == item.production_ && position_ < item.position_) 105 | ; 106 | } 107 | 108 | /** 109 | // Set the index of this item. 110 | // 111 | // @param index 112 | // The index to set this item (assumed >= 0). 113 | */ 114 | void GrammarItem::set_index( int index ) const 115 | { 116 | index_ = index; 117 | } 118 | 119 | } 120 | -------------------------------------------------------------------------------- /src/lalr/GrammarLookahead.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "GrammarSymbolSet.hpp" 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | class GrammarItem; 10 | 11 | class GrammarLookahead 12 | { 13 | GrammarItem* item_; 14 | std::vector propagate_to_; 15 | GrammarSymbolSet lookaheads_; 16 | 17 | public: 18 | inline GrammarLookahead( GrammarItem* item, size_t symbols ); 19 | inline GrammarLookahead( GrammarLookahead&& item ); 20 | 21 | inline GrammarItem* item() const; 22 | inline const std::vector& propagate_to() const; 23 | inline const GrammarSymbolSet& lookaheads() const; 24 | inline void set_item( GrammarItem* item ); 25 | inline void add_propagate_to( GrammarLookahead* propagate_to ); 26 | inline size_t add_lookaheads( const GrammarSymbolSet& lookaheads ); 27 | 28 | GrammarLookahead( const GrammarLookahead& item ) = delete; 29 | GrammarLookahead& operator=( const GrammarLookahead& item ) = delete; 30 | }; 31 | 32 | } 33 | 34 | #include "GrammarLookahead.ipp" 35 | -------------------------------------------------------------------------------- /src/lalr/GrammarLookahead.ipp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarLookahead.ipp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | #pragma once 6 | 7 | #include "GrammarLookahead.hpp" 8 | #include "GrammarItem.hpp" 9 | #include "assert.hpp" 10 | #include 11 | 12 | namespace lalr 13 | { 14 | 15 | GrammarLookahead::GrammarLookahead( GrammarItem* item, size_t symbols ) 16 | : item_( item ) 17 | , propagate_to_() 18 | , lookaheads_{ symbols } 19 | { 20 | LALR_ASSERT( item_ ); 21 | } 22 | 23 | GrammarLookahead::GrammarLookahead( GrammarLookahead&& item ) 24 | : item_{ std::exchange(item.item_, nullptr) } 25 | , propagate_to_{ std::move(item.propagate_to_) } 26 | , lookaheads_{ std::move(item.lookaheads_) } 27 | { 28 | } 29 | 30 | GrammarItem* GrammarLookahead::item() const 31 | { 32 | return item_; 33 | } 34 | 35 | const std::vector& GrammarLookahead::propagate_to() const 36 | { 37 | return propagate_to_; 38 | } 39 | 40 | const GrammarSymbolSet& GrammarLookahead::lookaheads() const 41 | { 42 | return lookaheads_; 43 | } 44 | 45 | void GrammarLookahead::set_item( GrammarItem* item ) 46 | { 47 | LALR_ASSERT( item ); 48 | item_ = item; 49 | } 50 | 51 | void GrammarLookahead::add_propagate_to( GrammarLookahead* propagate_to ) 52 | { 53 | LALR_ASSERT( propagate_to ); 54 | LALR_ASSERT( propagate_to != this ); 55 | propagate_to_.push_back( propagate_to ); 56 | } 57 | 58 | size_t GrammarLookahead::add_lookaheads( const GrammarSymbolSet& lookaheads ) 59 | { 60 | return lookaheads_.insert( lookaheads ); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/lalr/GrammarParser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMARPARSER_HPP_INCLUDED 2 | #define LALR_GRAMMARPARSER_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | class ErrorPolicy; 10 | class Grammar; 11 | class ParserStateMachine; 12 | 13 | class GrammarParser 14 | { 15 | ErrorPolicy* error_policy_; ///< ErrorPolicy to report errors to. 16 | Grammar* grammar_; ///< Grammar to build from parsing input. 17 | const char* position_; ///< Current input position. 18 | const char* end_; ///< One past the last character of input to parse. 19 | const char* line_position_; ///< Current line position. 20 | int line_; ///< Current line number. 21 | std::string lexeme_; ///< Currently parsed lexeme. 22 | int errors_; ///< The number of errors that occured during parsing and generation. 23 | 24 | public: 25 | GrammarParser(); 26 | int parse( const char* start, const char* finish, ErrorPolicy* error_policy, Grammar* grammar ); 27 | 28 | private: 29 | bool match_grammar(); 30 | bool match_statements(); 31 | bool match_statement(); 32 | bool match_associativity_statement(); 33 | bool match_whitespace_statement(); 34 | bool match_production_statement(); 35 | bool match_symbols(); 36 | bool match_symbol(); 37 | bool match_associativity(); 38 | bool match_expressions(); 39 | bool match_expression(); 40 | bool match_precedence(); 41 | bool match_action(); 42 | bool match_error(); 43 | bool match_literal(); 44 | bool match_regex(); 45 | bool match_identifier(); 46 | bool match_whitespace_and_comments(); 47 | bool match_whitespace(); 48 | bool match_line_comment(); 49 | bool match_block_comment(); 50 | bool match_end(); 51 | bool match( const char* lexeme ); 52 | bool match_without_skipping_whitespace( const char* lexeme ); 53 | bool expect( const char* lexeme ); 54 | void error( int error, const char* format, ... ); 55 | const char* new_line( const char* position ); 56 | static bool is_new_line( const char* position ); 57 | int column(); 58 | }; 59 | 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/lalr/GrammarProduction.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "GrammarSymbol.hpp" 4 | #include "GrammarAction.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace lalr 10 | { 11 | 12 | class GrammarAction; 13 | class GrammarSymbol; 14 | 15 | /** 16 | // A production specifying a symbol on the left hand side that is reduced 17 | // from zero or more nodes on the right hand side. 18 | */ 19 | class GrammarProduction 20 | { 21 | int index_; ///< The index of this GrammarProduction. 22 | GrammarSymbol* symbol_; ///< The symbol on the left hand side of this GrammarProduction. 23 | int line_; ///< The line in the grammar that this GrammarProduction was defined on. 24 | int column_; ///< The column in the grammar that this GrammarProduction was defined on. 25 | const GrammarAction* action_; ///< The action taken when this GrammarProduction is reduced or null if there is no action. 26 | std::vector symbols_; ///< The symbols on the right hand side of this production. 27 | const GrammarSymbol* precedence_symbol_; /// The symbol that defines precedence for this production or null to use the right most terminal. 28 | 29 | public: 30 | inline GrammarProduction( int index, GrammarSymbol* symbol, int line, int column, const GrammarAction* action ); 31 | 32 | inline int index() const; 33 | inline GrammarSymbol* symbol() const; 34 | inline int line() const; 35 | inline int column() const; 36 | inline int count_references_to_symbol( const GrammarSymbol* symbol ) const; 37 | inline bool nullable_after( int position ) const; 38 | inline const GrammarSymbol* find_rightmost_terminal_symbol() const; 39 | inline const GrammarSymbol* symbol_by_position( int position ) const; 40 | inline const std::vector& symbols() const; 41 | inline int length() const; 42 | inline const GrammarAction* action() const; 43 | inline int action_index() const; 44 | inline const GrammarSymbol* precedence_symbol() const; 45 | inline int precedence() const; 46 | 47 | inline void append_symbol( GrammarSymbol* symbol ); 48 | inline void set_action( const GrammarAction* action ); 49 | inline void set_precedence_symbol( const GrammarSymbol* symbol ); 50 | inline void replace_references_to_symbol( GrammarSymbol* to_symbol, GrammarSymbol* with_symbol ); 51 | 52 | static const int INVALID_INDEX = -1; 53 | }; 54 | 55 | } 56 | 57 | #include "GrammarProduction.ipp" 58 | -------------------------------------------------------------------------------- /src/lalr/GrammarProduction.ipp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarProduction.ipp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | #pragma once 6 | 7 | #include "GrammarProduction.hpp" 8 | #include "GrammarSymbol.hpp" 9 | #include "GrammarAction.hpp" 10 | #include "assert.hpp" 11 | 12 | namespace lalr 13 | { 14 | 15 | /* 16 | // Constructor. 17 | // 18 | // @param index 19 | // The index of this production. 20 | // 21 | // @param symbol 22 | // The symbol on the left-hand side of the production (assumed not null). 23 | // 24 | // @param line 25 | // The line in the grammar that this production is defnied on. 26 | // 27 | // @param action 28 | // The action taken when the production is reduced or null if the production 29 | // has no action. 30 | */ 31 | GrammarProduction::GrammarProduction( int index, GrammarSymbol* symbol, int line, int column, const GrammarAction* action ) 32 | : index_( index ) 33 | , symbol_( symbol ) 34 | , line_( line ) 35 | , column_( column ) 36 | , action_( action ) 37 | , precedence_symbol_( nullptr ) 38 | { 39 | LALR_ASSERT( symbol_ ); 40 | } 41 | 42 | /** 43 | // Get the index of this production. 44 | // 45 | // @return 46 | // The index. 47 | */ 48 | int GrammarProduction::index() const 49 | { 50 | return index_; 51 | } 52 | 53 | /** 54 | // Get the symbol on the left-hand side of this production. 55 | // 56 | // @return 57 | // The symbol. 58 | */ 59 | GrammarSymbol* GrammarProduction::symbol() const 60 | { 61 | LALR_ASSERT( symbol_ ); 62 | return symbol_; 63 | } 64 | 65 | /** 66 | // Get the line in the grammar that this production is defined on. 67 | // 68 | // @return 69 | // The line. 70 | */ 71 | int GrammarProduction::line() const 72 | { 73 | return line_; 74 | } 75 | 76 | /** 77 | // Get the column in the grammar that this production is defined on. 78 | // 79 | // @return 80 | // The column. 81 | */ 82 | int GrammarProduction::column() const 83 | { 84 | return column_; 85 | } 86 | 87 | /** 88 | // Count the number of references the right-hand side of this production 89 | // makes to \e symbol. 90 | // 91 | // @param symbol 92 | // The symbol to count the number of references to. 93 | // 94 | // @return 95 | // The number of references. 96 | */ 97 | int GrammarProduction::count_references_to_symbol( const GrammarSymbol* symbol ) const 98 | { 99 | using std::vector; 100 | int references = 0; 101 | for ( vector::const_iterator i = symbols_.begin(); i != symbols_.end(); ++i ) 102 | { 103 | references += (symbol == *i ? 1 : 0); 104 | } 105 | references += (precedence_symbol_ == symbol ? 1 : 0); 106 | return references; 107 | } 108 | 109 | /** 110 | // Are all symbols from *position* onwards nullable? 111 | // 112 | // @param position 113 | // The position to check symbols for being nullable from, assumed zero or 114 | // greater. 115 | // 116 | // @return 117 | // True if symbols from [position, length) are nullable otherwise false. 118 | */ 119 | bool GrammarProduction::nullable_after( int position ) const 120 | { 121 | using std::vector; 122 | LALR_ASSERT( position >= 0 && position <= length() ); 123 | if ( position >= 0 && position < length() ) 124 | { 125 | vector::const_iterator begin = symbols_.begin() + position; 126 | for ( vector::const_iterator i = begin; i != symbols_.end(); ++i ) 127 | { 128 | const GrammarSymbol* symbol = *i; 129 | LALR_ASSERT( symbol ); 130 | if ( !symbol->nullable() ) 131 | { 132 | return false; 133 | } 134 | } 135 | } 136 | return true; 137 | } 138 | 139 | /** 140 | // Find the rightmost terminal symbol in this production. 141 | // 142 | // If there is more than one possible rightmost terminal symbol because of 143 | // an embedded or operators then this function returns null. In this case the precedence 144 | // and associativity of this production must be specified explicitly. 145 | // 146 | // @return 147 | // The rightmost terminal symbol or null if there was more than one potential 148 | // rightmost terminal symbol. 149 | */ 150 | const GrammarSymbol* GrammarProduction::find_rightmost_terminal_symbol() const 151 | { 152 | using std::vector; 153 | vector::const_reverse_iterator i = symbols_.rbegin(); 154 | while ( i != symbols_.rend() && (*i)->symbol_type() != SYMBOL_TERMINAL ) 155 | { 156 | ++i; 157 | } 158 | return i != symbols_.rend() ? *i : nullptr; 159 | } 160 | 161 | /** 162 | // Get the symbol at \e position on the right hand side of this production. 163 | // 164 | // @param position 165 | // The position to get the symbol from. 166 | // 167 | // @return 168 | // The symbol at \e position or null if \e position refers past the end of 169 | // this production. 170 | */ 171 | const GrammarSymbol* GrammarProduction::symbol_by_position( int position ) const 172 | { 173 | return position >= 0 && position < int(symbols_.size()) ? symbols_[position] : nullptr; 174 | } 175 | 176 | /** 177 | // Get the symbols on the right hand side of this production. 178 | // 179 | // @return 180 | // The symbols. 181 | */ 182 | const std::vector& GrammarProduction::symbols() const 183 | { 184 | return symbols_; 185 | } 186 | 187 | /** 188 | // Get the length of the right-hand side of this production. 189 | // 190 | // @return 191 | // The length of the right-hand side of this production. 192 | */ 193 | int GrammarProduction::length() const 194 | { 195 | return int(symbols_.size()); 196 | } 197 | 198 | /** 199 | // Append \e symbol to the end of the right hand side of this production. 200 | // 201 | // @param symbol 202 | // The symbol to append (assumed not null). 203 | */ 204 | void GrammarProduction::append_symbol( GrammarSymbol* symbol ) 205 | { 206 | symbols_.push_back( symbol ); 207 | } 208 | 209 | /** 210 | // Set the action taken when this production is reduced. 211 | // 212 | // @param 213 | // The action to take when this production is reduced or null to set this 214 | // production to have no action. 215 | */ 216 | void GrammarProduction::set_action( const GrammarAction* action ) 217 | { 218 | action_ = action; 219 | } 220 | 221 | /** 222 | // Get the action taken when this production is reduced. 223 | // 224 | // @return 225 | // The action or null if this production doesn't have an action. 226 | */ 227 | const GrammarAction* GrammarProduction::action() const 228 | { 229 | return action_; 230 | } 231 | 232 | int GrammarProduction::action_index() const 233 | { 234 | return action_ ? action_->index() : GrammarAction::INVALID_INDEX; 235 | } 236 | 237 | const GrammarSymbol* GrammarProduction::precedence_symbol() const 238 | { 239 | return precedence_symbol_; 240 | } 241 | 242 | /** 243 | // Get the precedence of this production. 244 | // 245 | // @return 246 | // The precedence of this production. 247 | */ 248 | int GrammarProduction::precedence() const 249 | { 250 | return precedence_symbol_ ? precedence_symbol_->precedence() : 0; 251 | } 252 | 253 | /** 254 | // Set the symbol that this production gets its precedence from. 255 | // 256 | // @param symbol 257 | // The symbol to have this production inherit its precedence from (assumed 258 | // not null). 259 | */ 260 | void GrammarProduction::set_precedence_symbol( const GrammarSymbol* symbol ) 261 | { 262 | LALR_ASSERT( symbol ); 263 | LALR_ASSERT( !precedence_symbol_ ); 264 | precedence_symbol_ = symbol; 265 | } 266 | 267 | /** 268 | // Replace references to \e to_symbol in this production by \e with_symbol. 269 | // 270 | // @param to_symbol 271 | // The symbol to replace references to. 272 | // 273 | // @param with_symbol 274 | // The symbol to replace the references with. 275 | */ 276 | void GrammarProduction::replace_references_to_symbol( GrammarSymbol* to_symbol, GrammarSymbol* with_symbol ) 277 | { 278 | if ( symbol_ == to_symbol ) 279 | { 280 | symbol_ = with_symbol; 281 | } 282 | 283 | if ( precedence_symbol_ == to_symbol ) 284 | { 285 | precedence_symbol_ = with_symbol; 286 | } 287 | 288 | using std::vector; 289 | for ( vector::iterator i = symbols_.begin(); i != symbols_.end(); ++i ) 290 | { 291 | if ( *i == to_symbol ) 292 | { 293 | *i = with_symbol; 294 | } 295 | } 296 | } 297 | 298 | } 299 | -------------------------------------------------------------------------------- /src/lalr/GrammarProductionLess.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace lalr 4 | { 5 | 6 | class GrammarProduction; 7 | 8 | /** 9 | // Indirectly compare productions through pointers. 10 | */ 11 | class GrammarProductionLess 12 | { 13 | public: 14 | inline bool operator()( const GrammarProduction* lhs, const GrammarProduction* rhs ) const; 15 | }; 16 | 17 | } 18 | 19 | #include "GrammarProductionLess.ipp" 20 | -------------------------------------------------------------------------------- /src/lalr/GrammarProductionLess.ipp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarProductionLess.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | #pragma once 6 | 7 | #include "GrammarProductionLess.hpp" 8 | #include "GrammarProduction.hpp" 9 | #include "assert.hpp" 10 | 11 | namespace lalr 12 | { 13 | 14 | bool GrammarProductionLess::operator()( const GrammarProduction* lhs, const GrammarProduction* rhs ) const 15 | { 16 | LALR_ASSERT( lhs->index() >= 0 ); 17 | LALR_ASSERT( rhs->index() >= 0 ); 18 | return lhs->index() < rhs->index(); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/lalr/GrammarState.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarState.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "GrammarState.hpp" 7 | #include "GrammarItem.hpp" 8 | #include "GrammarTransition.hpp" 9 | #include "GrammarSymbol.hpp" 10 | #include "GrammarProduction.hpp" 11 | #include "assert.hpp" 12 | #include 13 | 14 | using std::set; 15 | using std::string; 16 | using namespace lalr; 17 | 18 | /** 19 | // Constructor. 20 | */ 21 | GrammarState::GrammarState() 22 | : items_() 23 | , transitions_by_symbol_index_() 24 | , transitions_() 25 | , index_( INVALID_INDEX ) 26 | { 27 | } 28 | 29 | GrammarState::GrammarState( GrammarState&& state ) 30 | : items_() 31 | , transitions_by_symbol_index_() 32 | , transitions_() 33 | , index_( INVALID_INDEX ) 34 | { 35 | std::swap( items_, state.items_ ); 36 | std::swap( transitions_by_symbol_index_, state.transitions_by_symbol_index_ ); 37 | std::swap( transitions_, state.transitions_ ); 38 | std::swap( index_, state.index_ ); 39 | } 40 | 41 | GrammarState::GrammarState( const GrammarState& state ) 42 | : items_( state.items_ ) 43 | , transitions_by_symbol_index_( state.transitions_by_symbol_index_ ) 44 | , transitions_( state.transitions_ ) 45 | , index_( state.index_ ) 46 | { 47 | } 48 | 49 | /** 50 | // Find the item for *production* at *position* in this state. 51 | // 52 | // @param production 53 | // The production to find the item for, assumed not null. 54 | // 55 | // @param position 56 | // The position to find the item for, assumed zero or greater. 57 | // 58 | // @return 59 | // The matching item or null if no such item was found. 60 | */ 61 | GrammarItem* GrammarState::find_item( GrammarProduction* production, int position ) const 62 | { 63 | LALR_ASSERT( production ); 64 | LALR_ASSERT( position >= 0 && position <= production->length() ); 65 | set::iterator i = items_.find( GrammarItem(production, position) ); 66 | return i != items_.end() ? const_cast(&(*i)) : nullptr; 67 | } 68 | 69 | /** 70 | // Get the items in this state. 71 | // 72 | // @return 73 | // The items. 74 | */ 75 | const std::set& GrammarState::items() const 76 | { 77 | return items_; 78 | } 79 | 80 | /** 81 | // Find a transition on \e symbol from this state. 82 | // 83 | // @param symbol 84 | // The symbol to find a transition for. 85 | // 86 | // @return 87 | // The transition or null if there is no transition on \e symbol from this 88 | // state. 89 | */ 90 | const GrammarTransition* GrammarState::find_transition_by_symbol( const GrammarSymbol* symbol ) const 91 | { 92 | if ( symbol ) 93 | { 94 | int index = symbol->index(); 95 | if ( index >= 0 && index < int(transitions_by_symbol_index_.size()) ) 96 | { 97 | const GrammarTransition* transition = transitions_by_symbol_index_[index]; 98 | LALR_ASSERT( !transition || transition->symbol() == symbol ); 99 | return transition; 100 | } 101 | } 102 | return nullptr; 103 | } 104 | 105 | /** 106 | // Get the transitions from this state. 107 | // 108 | // @return 109 | // The transitions. 110 | */ 111 | const std::vector& GrammarState::transitions() const 112 | { 113 | return transitions_; 114 | } 115 | 116 | int GrammarState::count_valid_transitions() const 117 | { 118 | int valid_transitions = 0; 119 | for ( const GrammarTransition* transition : transitions_ ) 120 | { 121 | valid_transitions += transition != nullptr; 122 | } 123 | return valid_transitions; 124 | } 125 | 126 | /** 127 | // Get the index of this state. 128 | // 129 | // @return 130 | // The index of this state. 131 | */ 132 | int GrammarState::index() const 133 | { 134 | return index_; 135 | } 136 | 137 | /** 138 | // Less than operator. 139 | // 140 | // @param state 141 | // The state to compare this state with. 142 | // 143 | // @return 144 | // True if the items in this state are less than the items in \e state. 145 | */ 146 | bool GrammarState::operator<( const GrammarState& state ) const 147 | { 148 | return std::lexicographical_compare( items_.begin(), items_.end(), state.items_.begin(), state.items_.end() ); 149 | } 150 | 151 | /** 152 | // Add an item to this state. 153 | // 154 | // @param production 155 | // The production of the item to add. 156 | // 157 | // @param position 158 | // The position of the dot in the item to add. 159 | // 160 | // @return 161 | // The number of items added (0 or 1). 162 | */ 163 | int GrammarState::add_item( GrammarProduction* production, int position ) 164 | { 165 | LALR_ASSERT( production ); 166 | return items_.insert( GrammarItem(production, position) ).second ? 1 : 0; 167 | } 168 | 169 | /** 170 | // Add a transition on \e symbol from this state to \e state. 171 | // 172 | // @param symbol 173 | // The symbol to transition on (assumed not null). 174 | // 175 | // @param state 176 | // The state to transition to (assumed not null). 177 | */ 178 | void GrammarState::add_shift_transition( GrammarTransition* transition ) 179 | { 180 | LALR_ASSERT( transition ); 181 | LALR_ASSERT( transition->is_shift() ); 182 | LALR_ASSERT( !find_transition_by_symbol(transition->symbol()) ); 183 | add_transition( transition ); 184 | } 185 | 186 | /** 187 | // Add a reduction to \e production from this state on \e symbol. 188 | // 189 | // @param symbol 190 | // The symbol to make the reduction on. 191 | // 192 | // @param production 193 | // The production to reduce. 194 | */ 195 | void GrammarState::add_reduce_transition( GrammarTransition* transition ) 196 | { 197 | LALR_ASSERT( transition ); 198 | LALR_ASSERT( transition->is_reduce() ); 199 | GrammarTransition* existing_transition = find_transition_by_symbol( transition->symbol() ); 200 | if ( existing_transition ) 201 | { 202 | existing_transition->override_shift_to_reduce( transition->production() ); 203 | } 204 | else 205 | { 206 | add_transition( transition ); 207 | } 208 | } 209 | 210 | /** 211 | // Find a transition on \e symbol from this state. 212 | // 213 | // @param symbol 214 | // The symbol to find a transition for. 215 | // 216 | // @return 217 | // The transition or null if there is no transition on \e symbol from this 218 | // state. 219 | */ 220 | GrammarTransition* GrammarState::find_transition_by_symbol( const GrammarSymbol* symbol ) 221 | { 222 | if ( symbol ) 223 | { 224 | int index = symbol->index(); 225 | if ( index >= 0 && index < int(transitions_by_symbol_index_.size()) ) 226 | { 227 | GrammarTransition* transition = transitions_by_symbol_index_[index]; 228 | LALR_ASSERT( !transition || transition->symbol() == symbol ); 229 | return transition; 230 | } 231 | } 232 | return nullptr; 233 | } 234 | 235 | /** 236 | // Set the index of this state. 237 | // 238 | // @param index 239 | // The value to set the index of this state to. 240 | */ 241 | void GrammarState::set_index( int index ) 242 | { 243 | index_ = index; 244 | } 245 | 246 | void GrammarState::add_transition( GrammarTransition* transition ) 247 | { 248 | LALR_ASSERT( transition ); 249 | LALR_ASSERT( transition->symbol() ); 250 | int index = transition->symbol()->index(); 251 | if ( index >= int(transitions_by_symbol_index_.size()) ) 252 | { 253 | int transitions_to_insert = index - int(transitions_by_symbol_index_.size()) + 1; 254 | transitions_by_symbol_index_.insert( transitions_by_symbol_index_.end(), transitions_to_insert, nullptr ); 255 | } 256 | LALR_ASSERT( index < int(transitions_by_symbol_index_.size()) ); 257 | LALR_ASSERT( !transitions_by_symbol_index_[index] ); 258 | transitions_by_symbol_index_[index] = transition; 259 | transitions_.push_back( transition ); 260 | } 261 | -------------------------------------------------------------------------------- /src/lalr/GrammarState.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMARSTATE_HPP_INCLUDED 2 | #define LALR_GRAMMARSTATE_HPP_INCLUDED 3 | 4 | #include "GrammarItem.hpp" 5 | #include "GrammarTransition.hpp" 6 | #include 7 | #include 8 | 9 | namespace lalr 10 | { 11 | 12 | class GrammarSymbol; 13 | class GrammarProduction; 14 | 15 | /** 16 | // A state in a parser's state machine. 17 | */ 18 | class GrammarState 19 | { 20 | std::set items_; ///< The items that define the positions within the grammar that this state represents. 21 | std::vector transitions_by_symbol_index_; ///< Transitions from this state by symbol index. 22 | std::vector transitions_; ///< Transitions from this state. 23 | int index_; ///< The index of this state. 24 | 25 | public: 26 | GrammarState(); 27 | GrammarState( GrammarState&& state ); 28 | GrammarState( const GrammarState& state ); 29 | 30 | GrammarItem* find_item( GrammarProduction* production, int position ) const; 31 | const std::set& items() const; 32 | const GrammarTransition* find_transition_by_symbol( const GrammarSymbol* symbol ) const; 33 | const std::vector& transitions() const; 34 | int count_valid_transitions() const; 35 | std::string label() const; 36 | int index() const; 37 | bool operator<( const GrammarState& state ) const; 38 | 39 | int add_item( GrammarProduction* production, int position ); 40 | void add_shift_transition( GrammarTransition* transition ); 41 | void add_reduce_transition( GrammarTransition* transition ); 42 | GrammarTransition* find_transition_by_symbol( const GrammarSymbol* symbol ); 43 | void set_index( int index ); 44 | 45 | static const int INVALID_INDEX = -1; 46 | 47 | private: 48 | void add_transition( GrammarTransition* transition ); 49 | 50 | GrammarState& operator=( GrammarState&& state ) = delete; 51 | GrammarState& operator=( const GrammarState& state ) = delete; 52 | }; 53 | 54 | } 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/lalr/GrammarStateLess.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "GrammarState.hpp" 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | class GrammarState; 10 | 11 | /** 12 | // Indirectly compare objects through two raw pointers. 13 | */ 14 | class GrammarStateLess 15 | { 16 | public: 17 | inline bool operator()( const std::shared_ptr& lhs, const std::shared_ptr& rhs ) const; 18 | }; 19 | 20 | } 21 | 22 | #include "GrammarStateLess.ipp" 23 | -------------------------------------------------------------------------------- /src/lalr/GrammarStateLess.ipp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarStateLess.ipp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | #pragma once 6 | 7 | #include "GrammarStateLess.hpp" 8 | #include "GrammarState.hpp" 9 | 10 | namespace lalr 11 | { 12 | 13 | bool GrammarStateLess::operator()( const std::shared_ptr& lhs, const std::shared_ptr& rhs ) const 14 | { 15 | return *lhs < *rhs; 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/lalr/GrammarSymbol.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "GrammarSymbolSet.hpp" 4 | #include "SymbolType.hpp" 5 | #include "LexemeType.hpp" 6 | #include "Associativity.hpp" 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace lalr 13 | { 14 | 15 | class GrammarProduction; 16 | 17 | class GrammarSymbol 18 | { 19 | std::string lexeme_; ///< The lexeme of this symbol. 20 | std::string identifier_; ///< The identifier for this symbol (generated from its lexeme). 21 | SymbolType symbol_type_; ///< The type of this symbol (terminal, non-terminal, or end). 22 | LexemeType lexeme_type_; ///< The type of this symbol's lexeme (regular expression or literal). 23 | Associativity associativity_; ///< The associativity of this symbol. 24 | int precedence_; ///< The precedence of this symbol. 25 | int line_; ///< The line that this symbol is defined on. 26 | int column_; ///< The line column that this symbol is defined on. 27 | int index_; ///< The index of this symbol among all symbols. 28 | bool nullable_; ///< True if this symbol is nullable otherwise false. 29 | bool referenced_in_precedence_directive_; ///< True if this symbol is referenced by a %precedence directive. 30 | GrammarSymbolSet first_; ///< The symbols that can start this symbol in a production or regular expression. 31 | GrammarSymbolSet follow_; ///< The symbols that can follow this symbol in a production or regular expression. 32 | std::vector productions_; ///< The productions that reduce to this symbol. 33 | std::multimap reachable_productions_by_first_symbol_; ///< The productions reachable by right-most derivation from this symbol by their first symbol. 34 | 35 | public: 36 | GrammarSymbol( const char* lexeme ); 37 | 38 | inline const std::string& lexeme() const; 39 | inline const std::string& identifier() const; 40 | inline SymbolType symbol_type() const; 41 | inline LexemeType lexeme_type() const; 42 | inline bool literal() const; 43 | inline Associativity associativity() const; 44 | inline int precedence() const; 45 | inline int line() const; 46 | inline int column() const; 47 | inline int index() const; 48 | inline bool nullable() const; 49 | inline bool referenced_in_precedence_directive() const; 50 | inline const GrammarSymbolSet& first() const; 51 | inline const GrammarSymbolSet& follow() const; 52 | inline const std::vector& productions() const; 53 | inline const std::multimap& reachable_productions_by_first_symbol() const; 54 | std::multimap::const_iterator find_reachable_productions( const GrammarSymbol& first_symbol ) const; 55 | GrammarSymbol* implicit_terminal() const; 56 | bool matches( const char* lexeme, SymbolType symbol_type ) const; 57 | 58 | void set_lexeme( const std::string& lexeme ); 59 | void set_identifier( const std::string& identifier ); 60 | void set_symbol_type( SymbolType symbol_type ); 61 | void set_lexeme_type( LexemeType lexeme_type ); 62 | void set_associativity( Associativity associativity ); 63 | void set_precedence( int precedence ); 64 | void set_line( int line ); 65 | void set_column( int column ); 66 | void set_index( int index ); 67 | void set_nullable( bool nullable ); 68 | void set_referenced_in_precedence_directive( bool referenced_in_precedence_directive ); 69 | void append_production( GrammarProduction* production ); 70 | void append_reachable_production( GrammarProduction* production ); 71 | void calculate_identifier(); 72 | void replace_by_non_terminal( const GrammarSymbol* non_terminal_symbol ); 73 | int add_symbol_to_first( const GrammarSymbol* symbol ); 74 | int add_symbols_to_first( const GrammarSymbolSet& symbols ); 75 | int add_symbol_to_follow( const GrammarSymbol* symbol ); 76 | int add_symbols_to_follow( const GrammarSymbolSet& symbols ); 77 | int calculate_first(); 78 | int calculate_follow(); 79 | }; 80 | 81 | } 82 | 83 | #include "GrammarSymbol.ipp" 84 | -------------------------------------------------------------------------------- /src/lalr/GrammarSymbol.ipp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarSymbol.ipp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | #pragma once 6 | 7 | #include "GrammarSymbol.hpp" 8 | #include "GrammarProduction.hpp" 9 | #include "assert.hpp" 10 | #include 11 | 12 | namespace lalr 13 | { 14 | 15 | const std::string& GrammarSymbol::lexeme() const 16 | { 17 | return lexeme_; 18 | } 19 | 20 | const std::string& GrammarSymbol::identifier() const 21 | { 22 | return identifier_; 23 | } 24 | 25 | SymbolType GrammarSymbol::symbol_type() const 26 | { 27 | return symbol_type_; 28 | } 29 | 30 | LexemeType GrammarSymbol::lexeme_type() const 31 | { 32 | return lexeme_type_; 33 | } 34 | 35 | bool GrammarSymbol::literal() const 36 | { 37 | return lexeme_type_ == LEXEME_LITERAL; 38 | } 39 | 40 | Associativity GrammarSymbol::associativity() const 41 | { 42 | return associativity_; 43 | } 44 | 45 | int GrammarSymbol::precedence() const 46 | { 47 | return precedence_; 48 | } 49 | 50 | int GrammarSymbol::line() const 51 | { 52 | return line_; 53 | } 54 | 55 | int GrammarSymbol::column() const 56 | { 57 | return column_; 58 | } 59 | 60 | int GrammarSymbol::index() const 61 | { 62 | return index_; 63 | } 64 | 65 | bool GrammarSymbol::nullable() const 66 | { 67 | return nullable_; 68 | } 69 | 70 | bool GrammarSymbol::referenced_in_precedence_directive() const 71 | { 72 | return referenced_in_precedence_directive_; 73 | } 74 | 75 | const GrammarSymbolSet& GrammarSymbol::first() const 76 | { 77 | return first_; 78 | } 79 | 80 | const GrammarSymbolSet& GrammarSymbol::follow() const 81 | { 82 | return follow_; 83 | } 84 | 85 | const std::vector& GrammarSymbol::productions() const 86 | { 87 | return productions_; 88 | } 89 | 90 | const std::multimap& GrammarSymbol::reachable_productions_by_first_symbol() const 91 | { 92 | return reachable_productions_by_first_symbol_; 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /src/lalr/GrammarSymbolSet.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarSymbolSet.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "GrammarSymbolSet.hpp" 7 | #include "GrammarSymbol.hpp" 8 | #include "assert.hpp" 9 | #include 10 | #include 11 | 12 | using std::min; 13 | using std::max; 14 | using std::vector; 15 | using std::numeric_limits; 16 | using namespace lalr; 17 | 18 | static const size_t ONE = 1; 19 | static const size_t BITS_PER_ELEMENT = sizeof(size_t) * 8; 20 | 21 | GrammarSymbolSet::GrammarSymbolSet( size_t symbols ) 22 | : set_() 23 | , minimum_( numeric_limits::max() ) 24 | , maximum_( numeric_limits::min() ) 25 | { 26 | set_.resize( symbols / BITS_PER_ELEMENT ); 27 | } 28 | 29 | GrammarSymbolSet::GrammarSymbolSet( GrammarSymbolSet&& set ) 30 | : set_( std::move(set.set_) ) 31 | , minimum_( std::exchange(set.minimum_, numeric_limits::max()) ) 32 | , maximum_( std::exchange(set.maximum_, numeric_limits::min()) ) 33 | { 34 | } 35 | 36 | GrammarSymbolSet::GrammarSymbolSet( const GrammarSymbolSet& set ) 37 | : set_( set.set_ ) 38 | , minimum_( set.minimum_ ) 39 | , maximum_( set.maximum_ ) 40 | { 41 | } 42 | 43 | GrammarSymbolSet& GrammarSymbolSet::operator=( GrammarSymbolSet&& set ) 44 | { 45 | if ( this != &set ) 46 | { 47 | std::swap( set_, set.set_ ); 48 | std::swap( minimum_, set.minimum_ ); 49 | std::swap( maximum_, set.maximum_ ); 50 | } 51 | return *this; 52 | } 53 | 54 | GrammarSymbolSet& GrammarSymbolSet::operator=( const GrammarSymbolSet& set ) 55 | { 56 | if ( this != &set ) 57 | { 58 | set_ = set.set_; 59 | minimum_ = set.minimum_; 60 | maximum_ = set.maximum_; 61 | } 62 | return *this; 63 | } 64 | 65 | int GrammarSymbolSet::minimum_index() const 66 | { 67 | return int(minimum_ * BITS_PER_ELEMENT); 68 | } 69 | 70 | int GrammarSymbolSet::maximum_index() const 71 | { 72 | return int((maximum_ + 1) * BITS_PER_ELEMENT); 73 | } 74 | 75 | bool GrammarSymbolSet::contains( int symbol_index ) const 76 | { 77 | size_t index = symbol_index / BITS_PER_ELEMENT; 78 | if ( index >= minimum_ && index <= maximum_ ) 79 | { 80 | size_t mask = ONE << (symbol_index % BITS_PER_ELEMENT); 81 | return (set_[index] & mask) != 0; 82 | } 83 | return false; 84 | } 85 | 86 | bool GrammarSymbolSet::insert( const GrammarSymbol* symbol ) 87 | { 88 | if ( symbol ) 89 | { 90 | size_t index = symbol->index() / BITS_PER_ELEMENT; 91 | if ( index >= set_.size() ) 92 | { 93 | set_.insert( set_.end(), index - set_.size() + 1, 0 ); 94 | } 95 | size_t mask = ONE << (symbol->index() % BITS_PER_ELEMENT); 96 | if ( !(set_[index] & mask) ) 97 | { 98 | set_[index] |= mask; 99 | minimum_ = min( minimum_, index ); 100 | maximum_ = max( maximum_, index ); 101 | return true; 102 | } 103 | } 104 | return false; 105 | } 106 | 107 | int GrammarSymbolSet::insert( const GrammarSymbolSet& set ) 108 | { 109 | if ( set_.size() < set.set_.size() ) 110 | { 111 | set_.insert( set_.end(), set.set_.size() - set_.size(), 0 ); 112 | } 113 | 114 | int added = 0; 115 | for ( size_t i = set.minimum_; i < set.maximum_ + 1; ++i ) 116 | { 117 | size_t mask = set_[i]; 118 | size_t new_mask = mask | set.set_[i]; 119 | if ( mask != new_mask ) 120 | { 121 | set_[i] = new_mask; 122 | ++added; 123 | } 124 | } 125 | minimum_ = min( minimum_, set.minimum_ ); 126 | maximum_ = max( maximum_, set.maximum_ ); 127 | return added; 128 | } 129 | -------------------------------------------------------------------------------- /src/lalr/GrammarSymbolSet.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace lalr 8 | { 9 | 10 | class GrammarSymbol; 11 | 12 | class GrammarSymbolSet 13 | { 14 | std::vector set_; 15 | size_t minimum_; 16 | size_t maximum_; 17 | 18 | public: 19 | GrammarSymbolSet( size_t symbols ); 20 | GrammarSymbolSet( GrammarSymbolSet&& set ); 21 | GrammarSymbolSet( const GrammarSymbolSet& set ); 22 | GrammarSymbolSet& operator=( GrammarSymbolSet&& set ); 23 | GrammarSymbolSet& operator=( const GrammarSymbolSet& set ); 24 | 25 | int minimum_index() const; 26 | int maximum_index() const; 27 | bool contains( int symbol_index ) const; 28 | bool insert( const GrammarSymbol* symbol ); 29 | int insert( const GrammarSymbolSet& set ); 30 | }; 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/lalr/GrammarTransition.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // GrammarTransition.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "GrammarTransition.hpp" 7 | #include "GrammarProduction.hpp" 8 | #include "GrammarSymbol.hpp" 9 | #include "GrammarAction.hpp" 10 | #include "assert.hpp" 11 | #include 12 | 13 | using namespace lalr; 14 | 15 | /** 16 | // Constructor. 17 | // 18 | // @param symbol 19 | // The symbol that this transition is made on (assumed not null). 20 | // 21 | // @param production 22 | // The GrammarProduction that this transition reduces (assumed not null). 23 | */ 24 | GrammarTransition::GrammarTransition( const GrammarSymbol* symbol, const GrammarProduction* production ) 25 | : symbol_( symbol ) 26 | , state_( nullptr ) 27 | , production_( production ) 28 | { 29 | LALR_ASSERT( symbol_ ); 30 | LALR_ASSERT( production_ ); 31 | } 32 | 33 | /** 34 | // Constructor. 35 | // 36 | // @param symbol 37 | // The symbol that this transition is made on (assumed not null). 38 | // 39 | // @param state 40 | // The state that this transition is to (assumed not null). 41 | */ 42 | GrammarTransition::GrammarTransition( const GrammarSymbol* symbol, GrammarState* state ) 43 | : symbol_( symbol ) 44 | , state_( state ) 45 | , production_( nullptr ) 46 | { 47 | LALR_ASSERT( symbol_ ); 48 | LALR_ASSERT( state_ ); 49 | } 50 | 51 | /** 52 | // Get the state that this transition is to. 53 | // 54 | // @return 55 | // The state or null if this transition is a reduction. 56 | */ 57 | GrammarState* GrammarTransition::state() const 58 | { 59 | return state_; 60 | } 61 | 62 | /** 63 | // Get the production that this transition reduces. 64 | // 65 | // @return 66 | // The reduced production or null if this transition is a shift. 67 | */ 68 | const GrammarProduction* GrammarTransition::production() const 69 | { 70 | return production_; 71 | } 72 | 73 | bool GrammarTransition::is_shift() const 74 | { 75 | return state_ != nullptr; 76 | } 77 | 78 | bool GrammarTransition::is_reduce() const 79 | { 80 | return production_ != nullptr; 81 | } 82 | 83 | const GrammarSymbol* GrammarTransition::reduced_symbol() const 84 | { 85 | return production_ ? production_->symbol() : nullptr; 86 | } 87 | 88 | int GrammarTransition::reduced_length() const 89 | { 90 | return production_ ? production_->length() : 0; 91 | } 92 | 93 | int GrammarTransition::precedence() const 94 | { 95 | return production_ ? production_->precedence() : 0; 96 | } 97 | 98 | int GrammarTransition::action() const 99 | { 100 | return production_ ? production_->action_index() : 0; 101 | } 102 | 103 | /** 104 | // Is this transition made on \e symbol? 105 | // 106 | // @param symbol 107 | // The symbol to check this transition being made on. 108 | // 109 | // @return 110 | // True if this transition is taken on \e symbol otherwise false. 111 | */ 112 | bool GrammarTransition::taken_on_symbol( const GrammarSymbol* symbol ) const 113 | { 114 | return symbol_ == symbol; 115 | } 116 | 117 | /** 118 | // Get the symbol that this transition is on. 119 | // 120 | // @return 121 | // The symbol. 122 | */ 123 | const GrammarSymbol* GrammarTransition::symbol() const 124 | { 125 | return symbol_; 126 | } 127 | 128 | /** 129 | // Change this transition from a shift into a reduction. 130 | // 131 | // @param reduced_production 132 | // The production to reduce by when this transition is taken. 133 | */ 134 | void GrammarTransition::override_shift_to_reduce( const GrammarProduction* production ) const 135 | { 136 | LALR_ASSERT( state_ ); 137 | LALR_ASSERT( !production_ ); 138 | LALR_ASSERT( production ); 139 | state_ = nullptr; 140 | production_ = production; 141 | } 142 | 143 | /** 144 | // Change this transition from being a reduce transition for one production 145 | // into a reduce transition for a different production. 146 | // 147 | // @param reduced_production 148 | // The production to reduce by when this transition is taken. 149 | */ 150 | void GrammarTransition::override_reduce_to_reduce( const GrammarProduction* production ) const 151 | { 152 | LALR_ASSERT( !state_ ); 153 | LALR_ASSERT( production ); 154 | production_ = production; 155 | } 156 | -------------------------------------------------------------------------------- /src/lalr/GrammarTransition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_GRAMMARTRANSITION_HPP_INCLUDED 2 | #define LALR_GRAMMARTRANSITION_HPP_INCLUDED 3 | 4 | #include "TransitionType.hpp" 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | class GrammarState; 12 | class GrammarSymbol; 13 | class GrammarProduction; 14 | 15 | /** 16 | // A transition in a parser's state machine. 17 | */ 18 | class GrammarTransition 19 | { 20 | const GrammarSymbol* symbol_; ///< The symbol that the transition is taken on. 21 | mutable GrammarState* state_; ///< The state that is transitioned to. 22 | mutable const GrammarProduction* production_; /// The production reduced to or null for shifts 23 | 24 | public: 25 | GrammarTransition( const GrammarSymbol* symbol, const GrammarProduction* production ); 26 | GrammarTransition( const GrammarSymbol* symbol, GrammarState* state ); 27 | GrammarState* state() const; 28 | const GrammarProduction* production() const; 29 | bool is_shift() const; 30 | bool is_reduce() const; 31 | const GrammarSymbol* reduced_symbol() const; 32 | int reduced_length() const; 33 | int precedence() const; 34 | int action() const; 35 | bool taken_on_symbol( const GrammarSymbol* symbol ) const; 36 | const GrammarSymbol* symbol() const; 37 | 38 | void override_shift_to_reduce( const GrammarProduction* production ) const; 39 | void override_reduce_to_reduce( const GrammarProduction* production ) const; 40 | }; 41 | 42 | } 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /src/lalr/LexemeType.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXEMETYPE_HPP_INCLUDED 2 | #define LALR_LEXEMETYPE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | enum LexemeType 8 | { 9 | LEXEME_NULL, ///< Null lexeme type. 10 | LEXEME_LITERAL, ///< Literal. 11 | LEXEME_REGULAR_EXPRESSION, ///< Regular expression. 12 | LEXEME_TYPE_COUNT ///< The number of lexeme types. 13 | }; 14 | 15 | } 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /src/lalr/Lexer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXER_HPP_INCLUDED 2 | #define LALR_LEXER_HPP_INCLUDED 3 | 4 | #include "PositionIterator.hpp" 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | class ErrorPolicy; 12 | class LexerAction; 13 | class LexerTransition; 14 | class LexerState; 15 | class LexerStateMachine; 16 | 17 | /** 18 | // A lexical analyzer. 19 | */ 20 | template ::value_type, class Traits = typename std::char_traits, class Allocator = typename std::allocator > 21 | class Lexer 22 | { 23 | typedef std::function (const PositionIterator& begin, const PositionIterator& end, std::basic_string* lexeme, const void** symbol)> LexerActionFunction; 24 | 25 | struct LexerActionHandler 26 | { 27 | const LexerAction* action_; 28 | LexerActionFunction function_; 29 | LexerActionHandler( const LexerAction* action, LexerActionFunction function ); 30 | }; 31 | 32 | const LexerStateMachine* state_machine_; ///< The state machine for this lexer. 33 | const LexerStateMachine* whitespace_state_machine_; ///< The whitespace state machine for this lexer. 34 | const void* end_symbol_; ///< The value to return to indicate that the end of the input has been reached. 35 | ErrorPolicy* error_policy_; ///< The error policy this lexer uses to report errors and debug information. 36 | std::vector action_handlers_; ///< The action handlers for this Lexer. 37 | std::vector whitespace_action_handlers_; ///< The action handlers for this Lexer. 38 | PositionIterator position_; ///< The current position of this Lexer in its input sequence. 39 | Iterator end_; ///< One past the last position of the input sequence for this Lexer. 40 | std::basic_string lexeme_; ///< The most recently matched lexeme. 41 | int line_; ///< The line number at the start of the most recently matched lexeme. 42 | int column_; ///< The column number at the start of the most recently matched lexeme. 43 | const void* symbol_; ///< The most recently matched symbol or null if no symbol has been matched. 44 | bool full_; ///< True when this Lexer scanned all of its input otherwise false. 45 | PositionIterator matched_position_; ///< The position at the end of the most recent match. 46 | std::basic_string matched_lexeme_; ///< The lexeme at the most recent match. 47 | const void* matched_symbol_; ///< The symbol at the most recent match. 48 | 49 | public: 50 | Lexer( const LexerStateMachine* state_machine, const LexerStateMachine* whitespace_state_machine = nullptr, const void* end_symbol = nullptr, ErrorPolicy* error_policy = nullptr ); 51 | const std::basic_string& lexeme() const; 52 | int line() const; 53 | int column() const; 54 | const void* symbol() const; 55 | const Iterator& position() const; 56 | bool full() const; 57 | bool valid() const; 58 | void set_action_handler( const char* identifier, LexerActionFunction function ); 59 | void reset( Iterator start, Iterator finish ); 60 | void advance(); 61 | 62 | private: 63 | void skip(); 64 | const void* run(); 65 | void error(); 66 | void fire_error( int line, int column, int error, const char* format, ... ) const; 67 | const LexerTransition* find_transition_by_character( const LexerState* state, int character ) const; 68 | }; 69 | 70 | } 71 | 72 | #include "Lexer.ipp" 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /src/lalr/LexerAction.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERACTION_HPP_INCLUDED 2 | #define LALR_LEXERACTION_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // An action that is attached to a lexical analyzer. 9 | */ 10 | class LexerAction 11 | { 12 | public: 13 | static const int INVALID_INDEX = -1; 14 | int index; ///< The index of this action. 15 | const char* identifier; ///< The identifier of this action. 16 | }; 17 | 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/lalr/LexerState.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERSTATE_HPP_INCLUDED 2 | #define LALR_LEXERSTATE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class LexerTransition; 8 | 9 | /** 10 | // A state in a lexical analyzer's state machine. 11 | */ 12 | class LexerState 13 | { 14 | public: 15 | int index; ///< Index of this state. 16 | int length; ///< Number of transitions from this state. 17 | const LexerTransition* transitions; ///< Transitions from this state. 18 | const void* symbol; ///< The symbol that this state recognizes or null if this state doesn't recognize a symbol. 19 | }; 20 | 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/lalr/LexerStateMachine.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERSTATEMACHINE_HPP_INCLUDED 2 | #define LALR_LEXERSTATEMACHINE_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | class LexerErrorPolicy; 10 | class LexerAction; 11 | class LexerTransition; 12 | class LexerState; 13 | 14 | /** 15 | // The data that defines the state machine for a lexical analyzer. 16 | */ 17 | class LexerStateMachine 18 | { 19 | public: 20 | int actions_size; ///< The number of actions. 21 | int transitions_size; ///< The total number of transitions. 22 | int states_size; ///< The number of states. 23 | const LexerAction* actions; ///< The actions in this lexer. 24 | const LexerTransition* transitions; ///< The transitions in this lexer (for all states). 25 | const LexerState* states; ///< The states in this lexer. 26 | const LexerState* start_state; ///< The starting state in this lexer. 27 | }; 28 | 29 | } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/lalr/LexerTransition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERTRANSITION_HPP_INCLUDED 2 | #define LALR_LEXERTRANSITION_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class LexerState; 8 | class LexerAction; 9 | 10 | /** 11 | // A transition in a lexical analyzer's state machine. 12 | */ 13 | class LexerTransition 14 | { 15 | public: 16 | int begin; ///< The first character that the transition can be made on. 17 | int end; ///< One past the last character that the transition can be made on. 18 | const LexerState* state; ///< The state that is transitioned to. 19 | const LexerAction* action; ///< The action that is taken on the transition or null if no action is taken. 20 | }; 21 | 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/lalr/Parser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSER_HPP_INCLUDED 2 | #define LALR_PARSER_HPP_INCLUDED 3 | 4 | #include "ParserNode.hpp" 5 | #include "ParserUserData.hpp" 6 | #include "AddParserActionHandler.hpp" 7 | #include "AddLexerActionHandler.hpp" 8 | #include "Lexer.hpp" 9 | #include 10 | 11 | namespace error 12 | { 13 | 14 | class Error; 15 | 16 | } 17 | 18 | namespace lalr 19 | { 20 | 21 | class ErrorPolicy; 22 | class ParserAction; 23 | class ParserSymbol; 24 | class ParserTransition; 25 | class ParserState; 26 | class ParserStateMachine; 27 | class ErrorPolicy; 28 | 29 | /** 30 | // A %parser. 31 | */ 32 | template ::value_type> >, class Char = typename std::iterator_traits::value_type, class Traits = typename std::char_traits, class Allocator = typename std::allocator > 33 | class Parser 34 | { 35 | public: 36 | typedef lalr::ParserNode ParserNode; 37 | typedef typename std::vector::const_iterator ParserNodeConstIterator; 38 | typedef std::function (const PositionIterator& begin, const PositionIterator& end, std::basic_string* lexeme, const void** symbol)> LexerActionFunction; 39 | typedef std::function ParserActionFunction; 40 | 41 | private: 42 | struct ParserActionHandler 43 | { 44 | const ParserAction* action_; 45 | ParserActionFunction function_; 46 | ParserActionHandler( const ParserAction* action, ParserActionFunction function ); 47 | }; 48 | 49 | const ParserStateMachine* state_machine_; ///< The data that defines the state machine used by this parser. 50 | ErrorPolicy* error_policy_; ///< The error policy this parser uses to report errors and debug information. 51 | std::vector nodes_; ///< The stack of nodes that store symbols that are shifted and reduced during parsing. 52 | std::vector user_data_; ///< User data stack matching the stack of nodes. 53 | Lexer lexer_; ///< The lexical analyzer used during parsing. 54 | std::vector action_handlers_; ///< The action handlers for parser actions taken during reduction. 55 | ParserActionFunction default_action_handler_; ///< The default action handler for reductions that don't specify any action. 56 | bool debug_enabled_; ///< True if shift and reduce operations should be printed otherwise false. 57 | bool accepted_; ///< True if the parser accepted its input otherwise false. 58 | bool full_; ///< True if the parser processed all of its input otherwise false. 59 | 60 | public: 61 | Parser( const ParserStateMachine* state_machine, ErrorPolicy* error_policy = nullptr ); 62 | 63 | bool accepted() const; 64 | bool full() const; 65 | bool valid() const; 66 | const UserData& user_data() const; 67 | const Lexer& lexer() const; 68 | void fire_error(int line, int column, int error, const char* format, ... ) const; 69 | void fire_printf( const char* format, ... ) const; 70 | bool is_debug_enabled() const; 71 | 72 | AddParserActionHandler parser_action_handlers(); 73 | AddLexerActionHandler lexer_action_handlers(); 74 | void set_default_action_handler( ParserActionFunction function ); 75 | void set_action_handler( const char* identifier, ParserActionFunction function ); 76 | void set_lexer_action_handler( const char* identifier, LexerActionFunction function ); 77 | void set_debug_enabled( bool debug_enabled ); 78 | void reset(); 79 | void parse( Iterator start, Iterator finish ); 80 | bool parse( const void* symbol, const std::basic_string& lexeme, int line, int column ); 81 | bool parse( const ParserSymbol* symbol, const std::basic_string& lexeme, int line, int column ); 82 | 83 | private: 84 | const ParserTransition* find_transition( const ParserSymbol* symbol, const ParserState* state ) const; 85 | typename std::vector::iterator find_node_to_reduce_to( const ParserTransition* transition, std::vector& nodes ); 86 | void debug_shift( const ParserNode& node ) const; 87 | void debug_reduce( const ParserSymbol* reduced_symbol, std::ptrdiff_t start, std::ptrdiff_t finish ) const; 88 | UserData handle( const ParserTransition* transition, std::ptrdiff_t start, std::ptrdiff_t finish ) const; 89 | std::string expected_symbols( const ParserState* state ) const; 90 | void shift( const ParserTransition* transition, const std::basic_string& lexeme, int line, int column ); 91 | void reduce( const ParserTransition* transition, bool* accepted, bool* rejected ); 92 | void error( bool* accepted, bool* rejected, int line, int column ); 93 | }; 94 | 95 | } 96 | 97 | #include "Parser.ipp" 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /src/lalr/ParserAction.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERACTION_HPP_INCLUDED 2 | #define LALR_PARSERACTION_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | /** 10 | // An action that is attached to a %parser reduction. 11 | */ 12 | class ParserAction 13 | { 14 | public: 15 | static const int INVALID_INDEX = -1; 16 | int index; ///< The index of this action. 17 | const char* identifier; ///< The identifier of this action. 18 | }; 19 | 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/lalr/ParserNode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERNODE_HPP_INCLUDED 2 | #define LALR_PARSERNODE_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | class ParserSymbol; 12 | class ParserState; 13 | 14 | /** 15 | // An element in the parser's stack when parsing. 16 | */ 17 | template , class Allocator = std::allocator> 18 | class ParserNode 19 | { 20 | const ParserState* state_; ///< The state at this node. 21 | const ParserSymbol* symbol_; ///< The symbol at this node. 22 | std::basic_string lexeme_; ///< The lexeme at this node (empty if this node's symbol is non-terminal). 23 | int line_; ///< The line number at the start of the lexeme at this node. 24 | int column_; ///< The column number at the start of the lexeme at this node. 25 | 26 | public: 27 | ParserNode( const ParserState* state, const ParserSymbol* symbol, int line, int column ); 28 | ParserNode( const ParserState* state, const ParserSymbol* symbol, const std::basic_string& lexeme, int line, int column ); 29 | const ParserState* state() const; 30 | const ParserSymbol* symbol() const; 31 | const std::basic_string& lexeme() const; 32 | int line() const; 33 | int column() const; 34 | }; 35 | 36 | } 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/lalr/ParserNode.ipp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERNODE_IPP_INCLUDED 2 | #define LALR_PARSERNODE_IPP_INCLUDED 3 | 4 | #include "ParserNode.hpp" 5 | #include "assert.hpp" 6 | 7 | namespace lalr 8 | { 9 | 10 | /** 11 | // Constructor. 12 | // 13 | // @param state 14 | // The %ParserState at this node. 15 | // 16 | // @param state 17 | // The ParserState of the parser at this node. 18 | // 19 | // @param symbol 20 | // The Symbol at this node. 21 | // 22 | // @param line 23 | // The line number of the symbol at this node. 24 | // 25 | // @param column 26 | // The column number of the symbol at this node. 27 | */ 28 | template 29 | ParserNode::ParserNode( const ParserState* state, const ParserSymbol* symbol, int line, int column ) 30 | : state_( state ) 31 | , symbol_( symbol ) 32 | , lexeme_() 33 | , line_( line ) 34 | , column_( column ) 35 | { 36 | LALR_ASSERT( state ); 37 | LALR_ASSERT( line >= 0 ); 38 | LALR_ASSERT( column >= 1 ); 39 | } 40 | 41 | /** 42 | // Constructor. 43 | // 44 | // @param state 45 | // The state at this node. 46 | // 47 | // @param started_productions 48 | // The productions that were started at this node. 49 | // 50 | // @param symbol 51 | // The symbol at this node. 52 | // 53 | // @param lexeme 54 | // The lexeme at this node. 55 | // 56 | // @param line 57 | // The line number at the start of the lexeme (assumed >= 0). 58 | // 59 | // @param column 60 | // The column number at the start of the lexeme (assumed >= 1). 61 | */ 62 | template 63 | ParserNode::ParserNode( const ParserState* state, const ParserSymbol* symbol, const std::basic_string& lexeme, int line, int column ) 64 | : state_( state ) 65 | , symbol_( symbol ) 66 | , lexeme_( lexeme ) 67 | , line_( line ) 68 | , column_( column ) 69 | { 70 | LALR_ASSERT( state ); 71 | LALR_ASSERT( line >= 0 ); 72 | LALR_ASSERT( column >= 1 ); 73 | } 74 | 75 | /** 76 | // Get the state at this node. 77 | // 78 | // @return 79 | // The state. 80 | */ 81 | template 82 | const ParserState* ParserNode::state() const 83 | { 84 | return state_; 85 | } 86 | 87 | /** 88 | // Get the symbol at this state. 89 | // 90 | // @return 91 | // The symbol. 92 | */ 93 | template 94 | const ParserSymbol* ParserNode::symbol() const 95 | { 96 | return symbol_; 97 | } 98 | 99 | /** 100 | // Get the lexeme at this state. 101 | // 102 | // @return 103 | // The lexeme. 104 | */ 105 | template 106 | const std::basic_string& ParserNode::lexeme() const 107 | { 108 | return lexeme_; 109 | } 110 | 111 | /** 112 | // Get the line number at the start of this node's lexeme. 113 | // 114 | // @return 115 | // The line number. 116 | */ 117 | template 118 | int ParserNode::line() const 119 | { 120 | return line_; 121 | } 122 | 123 | /** 124 | // Get the column number at the start of this node's lexeme. 125 | // 126 | // @return 127 | // The column number. 128 | */ 129 | template 130 | int ParserNode::column() const 131 | { 132 | return column_; 133 | } 134 | 135 | } 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /src/lalr/ParserState.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERSTATE_HPP_INCLUDED 2 | #define LALR_PARSERSTATE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class ParserSymbol; 8 | class ParserTransition; 9 | 10 | /** 11 | // A state in a parser's state machine. 12 | */ 13 | class ParserState 14 | { 15 | public: 16 | int index; ///< The index of this state. 17 | int length; ///< The number of transitions in this state. 18 | const ParserTransition* transitions; ///< The available transitions from this state. 19 | const char* label; ///< The state's items as a text label for debugging 20 | }; 21 | 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/lalr/ParserStateMachine.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERSTATEMACHINE_HPP_INCLUDED 2 | #define LALR_PARSERSTATEMACHINE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class LexerStateMachine; 8 | class LexerErrorPolicy; 9 | 10 | } 11 | 12 | namespace lalr 13 | { 14 | 15 | class ParserAction; 16 | class ParserSymbol; 17 | class ParserTransition; 18 | class ParserState; 19 | 20 | /** 21 | // The data that defines the state machine for a %parser. 22 | */ 23 | class ParserStateMachine 24 | { 25 | public: 26 | const char* identifier; ///< The identifier of this parser. 27 | int actions_size; ///< The number of actions. 28 | int symbols_size; ///< The number of symbols. 29 | int transitions_size; ///< The total number of transitions. 30 | int states_size; ///< The number of states. 31 | const ParserAction* actions; ///< The parser actions for this ParserStateMachine. 32 | const ParserSymbol* symbols; ///< The symbols in the grammar for this ParserStateMachine. 33 | const ParserTransition* transitions; ///< The transitions in the state machine for this ParserStateMachine. 34 | const ParserState* states; ///< The states in the state machine for this ParserStateMachine. 35 | const ParserSymbol* start_symbol; ///< The start symbol. 36 | const ParserSymbol* end_symbol; ///< The end symbol. 37 | const ParserSymbol* error_symbol; ///< The error symbol. 38 | const ParserSymbol* whitespace_symbol; ///< The whitespace symbol. 39 | const ParserState* start_state; ///< The start state. 40 | const LexerStateMachine* lexer_state_machine; ///< The state machine used by the lexer to match tokens 41 | const LexerStateMachine* whitespace_lexer_state_machine; ///< The state machine used by the lexer to skip whitespace 42 | }; 43 | 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /src/lalr/ParserSymbol.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERSYMBOL_HPP_INCLUDED 2 | #define LALR_PARSERSYMBOL_HPP_INCLUDED 3 | 4 | #include "SymbolType.hpp" 5 | 6 | namespace lalr 7 | { 8 | 9 | /** 10 | // A symbol used by the parser 11 | */ 12 | class ParserSymbol 13 | { 14 | public: 15 | int index; ///< The index of this symbol. 16 | SymbolType type; ///< The type of this symbol. 17 | const char* identifier; ///< The identifier of this symbol. 18 | const char* lexeme; ///< The lexeme of this symbol or null if this symbol is non-terminal. 19 | const char* label; ///< The human-readable label for this symbol. 20 | }; 21 | 22 | } 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/lalr/ParserTransition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERTRANSITION_HPP_INCLUDED 2 | #define LALR_PARSERTRANSITION_HPP_INCLUDED 3 | 4 | #include "TransitionType.hpp" 5 | 6 | namespace lalr 7 | { 8 | 9 | class ParserState; 10 | class ParserSymbol; 11 | 12 | /** 13 | // A transition in a parser's state machine. 14 | */ 15 | class ParserTransition 16 | { 17 | public: 18 | const ParserSymbol* symbol; ///< The symbol that the transition is taken on. 19 | const ParserState* state; ///< The state that is transitioned to. 20 | const ParserSymbol* reduced_symbol; ///< The symbol that is reduced to or null if this isn't a reducing transition. 21 | int reduced_length; ///< The number of symbols on the right-hand side of the reduced production. 22 | int precedence; ///< The precedence of the reduce production or 0 for the default precedence or no reduction. 23 | int action; ///< The index of the action taken on reduce or Action::INVALID_INDEX if there is no action associated with the reduce. 24 | int index; ///< The index of this transition. 25 | }; 26 | 27 | } 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/lalr/ParserUserData.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERUSERDATA_HPP_INCLUDED 2 | #define LALR_PARSERUSERDATA_HPP_INCLUDED 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | class ParserSymbol; 12 | template class ParserNode; 13 | 14 | /** 15 | // The default implementation for data stored in a parser's stack. 16 | */ 17 | template , class Allocator = typename std::allocator > 18 | class ParserUserData 19 | { 20 | typedef ParserNode TemplatedParserNode; 21 | const ParserSymbol* symbol_; ///< The symbol at this user data's node. 22 | std::basic_string lexeme_; ///< The lexeme at this user data's node. 23 | std::vector > > user_datas_; ///< Children in the parse tree. 24 | 25 | public: 26 | ParserUserData( const ParserSymbol* symbol, const std::basic_string& lexeme ); 27 | ParserUserData( const ParserSymbol* symbol, size_t user_datas ); 28 | ParserUserData( const ParserSymbol* symbol, const TemplatedParserNode* start, const TemplatedParserNode* finish ); 29 | const ParserSymbol* symbol() const; 30 | const std::basic_string& lexeme() const; 31 | void append_user_data( std::shared_ptr user_data ); 32 | const std::vector > >& user_datas() const; 33 | }; 34 | 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/lalr/ParserUserData.ipp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_PARSERUSERDATA_IPP_INCLUDED 2 | #define LALR_PARSERUSERDATA_IPP_INCLUDED 3 | 4 | #include "ParserUserData.hpp" 5 | #include "assert.hpp" 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | /** 12 | // Constructor. 13 | // 14 | // @param symbol 15 | // The symbol at this user data (assumed not null). 16 | // 17 | // @param lexeme 18 | // The lexeme at this user data. 19 | */ 20 | template 21 | ParserUserData::ParserUserData( const ParserSymbol* symbol, const std::basic_string& lexeme ) 22 | : symbol_( symbol ) 23 | , lexeme_( lexeme ) 24 | , user_datas_() 25 | { 26 | LALR_ASSERT( symbol_ ); 27 | } 28 | 29 | /** 30 | // Constructor. 31 | // 32 | // @param symbol 33 | // The symbol at this user data (assumed not null). 34 | // 35 | // @param user_datas 36 | // The number of user datas that will be added as children of this user data. 37 | */ 38 | template 39 | ParserUserData::ParserUserData( const ParserSymbol* symbol, size_t user_datas ) 40 | : symbol_( symbol ) 41 | , lexeme_() 42 | , user_datas_() 43 | { 44 | LALR_ASSERT( symbol_ ); 45 | user_datas_.reserve( user_datas ); 46 | } 47 | 48 | /** 49 | // Constructor. 50 | // 51 | // @param symbol 52 | // The symbol at this user data. 53 | // 54 | // @param start 55 | // The first node to get user data from to add as a child of this user data. 56 | // 57 | // @param finish 58 | // One past the last node to get user data from to add as a child of this 59 | // user data. 60 | */ 61 | template 62 | ParserUserData::ParserUserData( const ParserSymbol* symbol, const TemplatedParserNode* start, const TemplatedParserNode* finish ) 63 | : symbol_( symbol ) 64 | , lexeme_() 65 | , user_datas_() 66 | { 67 | LALR_ASSERT( start ); 68 | LALR_ASSERT( finish ); 69 | LALR_ASSERT( start <= finish ); 70 | LALR_ASSERT( symbol_ ); 71 | 72 | for ( const TemplatedParserNode* node = start; node != finish; ++node ) 73 | { 74 | if ( node->user_data() ) 75 | { 76 | user_datas_.push_back( node->user_data() ); 77 | } 78 | else 79 | { 80 | std::shared_ptr > user_data( new ParserUserData(node->symbol(), node->lexeme()) ); 81 | user_datas_.push_back( user_data ); 82 | } 83 | } 84 | } 85 | 86 | /** 87 | // Get the symbol at this user data. 88 | // 89 | // @return 90 | // The symbol. 91 | */ 92 | template 93 | const ParserSymbol* ParserUserData::symbol() const 94 | { 95 | return symbol_; 96 | } 97 | 98 | /** 99 | // Get the lexeme at this user data. 100 | // 101 | // @return 102 | // The lexeme. 103 | */ 104 | template 105 | const std::basic_string& ParserUserData::lexeme() const 106 | { 107 | return lexeme_; 108 | } 109 | 110 | /** 111 | // Append user data as a child of this user data. 112 | // 113 | // @param user_data 114 | // The user data to append to this user data. 115 | */ 116 | template 117 | void ParserUserData::append_user_data( std::shared_ptr user_data ) 118 | { 119 | LALR_ASSERT( std::find(user_datas_.begin(), user_datas_.end(), user_data) == user_datas_.end() ); 120 | user_datas_.push_back( user_data ); 121 | } 122 | 123 | /** 124 | // Get the user datas that are part of this user data. 125 | // 126 | // @return 127 | // The user datas. 128 | */ 129 | template 130 | const std::vector > >& ParserUserData::user_datas() const 131 | { 132 | return user_datas_; 133 | } 134 | 135 | } 136 | 137 | #endif 138 | -------------------------------------------------------------------------------- /src/lalr/PositionIterator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_POSITIONITERATOR_HPP_INCLUDED 2 | #define LALR_POSITIONITERATOR_HPP_INCLUDED 3 | 4 | #include "assert.hpp" 5 | #include 6 | 7 | namespace lalr 8 | { 9 | 10 | /** 11 | // An iterator wrapper that adds line number tracking to iterators or 12 | // pointers. 13 | */ 14 | template 15 | class PositionIterator 16 | { 17 | public: 18 | typedef std::forward_iterator_tag iterator_category; ///< The type of this iterator (always forward regardless of the underlying iterator). 19 | typedef typename std::iterator_traits::difference_type difference_type; ///< The type to represent the difference between two iterators. 20 | typedef typename std::iterator_traits::value_type value_type; ///< The type of the element the iterator can point to. 21 | typedef typename std::iterator_traits::pointer pointer; ///< The type of a pointer to an element the iterator can point to. 22 | typedef typename std::iterator_traits::reference reference; ///< Type to represent a reference to an element pointed by the iterator. 23 | 24 | private: 25 | Iterator position_; ///< The current position of this iterator. 26 | Iterator end_; ///< One past the last position of the input sequence for this iterator. 27 | bool ended_; ///< True if this iterator has reached its end. 28 | int line_; ///< The current line number of this iterator. 29 | int column_; ///< The current column number of this iterator. 30 | 31 | public: 32 | PositionIterator() 33 | : position_() 34 | , end_() 35 | , ended_( true ) 36 | , line_( 1 ) 37 | , column_( 1 ) 38 | { 39 | } 40 | 41 | PositionIterator( Iterator begin, Iterator end ) 42 | : position_( begin ) 43 | , end_( end ) 44 | , ended_( begin == end ) 45 | , line_( 1 ) 46 | , column_( 1 ) 47 | { 48 | } 49 | 50 | PositionIterator( const PositionIterator& iterator ) 51 | : position_( iterator.position_ ) 52 | , end_( iterator.end_ ) 53 | , ended_( iterator.ended_ ) 54 | , line_( iterator.line_ ) 55 | , column_(iterator.column_) 56 | { 57 | } 58 | 59 | PositionIterator& operator=( const PositionIterator& iterator ) 60 | { 61 | if ( this != &iterator ) 62 | { 63 | position_ = iterator.position_; 64 | end_ = iterator.end_; 65 | ended_ = iterator.ended_; 66 | line_ = iterator.line_; 67 | column_ = iterator.column_; 68 | } 69 | 70 | return *this; 71 | } 72 | 73 | PositionIterator& operator++() 74 | { 75 | int character = *position_; 76 | ++position_; 77 | ++column_; 78 | 79 | if ( character == '\n' ) 80 | { 81 | ++line_; 82 | column_ = 1; 83 | } 84 | else if ( character == '\r' ) 85 | { 86 | if ( position_ != end_ && *position_ == '\n' ) 87 | { 88 | ++position_; 89 | } 90 | ++line_; 91 | column_ = 1; 92 | } 93 | 94 | ended_ = position_ == end_; 95 | return *this; 96 | } 97 | 98 | value_type operator*() const 99 | { 100 | return *position_; 101 | } 102 | 103 | bool ended() const 104 | { 105 | return ended_; 106 | } 107 | 108 | const Iterator& position() const 109 | { 110 | return position_; 111 | } 112 | 113 | const Iterator& end() const 114 | { 115 | return end_; 116 | } 117 | 118 | int line() const 119 | { 120 | return line_; 121 | } 122 | 123 | int column() const 124 | { 125 | return column_; 126 | } 127 | 128 | bool operator!=( const PositionIterator& iterator ) const 129 | { 130 | return ended_ || iterator.ended_ ? ended_ != iterator.ended_ : position_ != iterator.position_; 131 | } 132 | 133 | bool operator==( const PositionIterator& iterator ) const 134 | { 135 | return ended_ || iterator.ended_ ? ended_ == iterator.ended_ : position_ == iterator.position_; 136 | } 137 | 138 | void skip( Iterator position, int lines ) 139 | { 140 | LALR_ASSERT( lines >= 0 ); 141 | position_ = position; 142 | ended_ = position_ == end_; 143 | line_ += lines; 144 | column_ = 1; 145 | } 146 | }; 147 | 148 | } 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /src/lalr/RegexAction.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexAction.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexAction.hpp" 7 | 8 | using namespace lalr; 9 | 10 | /** 11 | // Constructor. 12 | // 13 | // @param index 14 | // The index of this action. 15 | // 16 | // @param identifier 17 | // The identifier of this action. 18 | */ 19 | RegexAction::RegexAction( int index, const std::string& identifier ) 20 | : index_( index ) 21 | , identifier_( identifier ) 22 | { 23 | } 24 | 25 | /** 26 | // Get the index of this action. 27 | // 28 | // @return 29 | // The index. 30 | */ 31 | int RegexAction::index() const 32 | { 33 | return index_; 34 | } 35 | 36 | /** 37 | // Get the identifier of this action. 38 | // 39 | // @return 40 | // The identifier of this action. 41 | */ 42 | const std::string& RegexAction::identifier() const 43 | { 44 | return identifier_; 45 | } 46 | -------------------------------------------------------------------------------- /src/lalr/RegexAction.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXACTION_HPP_INCLUDED 2 | #define LALR_REGEXACTION_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | /** 10 | // An action that is attached to a lexical analyzer. 11 | */ 12 | class RegexAction 13 | { 14 | int index_; ///< The index of this action. 15 | std::string identifier_; ///< The identifier of this action. 16 | 17 | public: 18 | RegexAction( int index, const std::string& identifier ); 19 | int index() const; 20 | const std::string& identifier() const; 21 | 22 | static const int INVALID_INDEX = -1; 23 | }; 24 | 25 | } 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/lalr/RegexCharacter.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexCharacter.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexCharacter.hpp" 7 | #include "assert.hpp" 8 | 9 | using namespace lalr; 10 | 11 | /** 12 | // Constructor. 13 | // 14 | // @param begin_character 15 | // The first character in the represented interval. 16 | // 17 | // @param end_character 18 | // One past the last character in the represented interval. 19 | */ 20 | RegexCharacter::RegexCharacter( int begin_character, int end_character ) 21 | : begin_character_( begin_character ) 22 | , end_character_( end_character ) 23 | { 24 | LALR_ASSERT( begin_character_ < end_character_ ); 25 | } 26 | 27 | /** 28 | // Get the first character in the represented interval. 29 | // 30 | // @return 31 | // The begin character. 32 | */ 33 | int RegexCharacter::begin_character() const 34 | { 35 | return begin_character_; 36 | } 37 | 38 | /** 39 | // Get the character one past the last character in the represented interval. 40 | // 41 | // @return 42 | // The end character. 43 | */ 44 | int RegexCharacter::end_character() const 45 | { 46 | return end_character_; 47 | } 48 | 49 | /** 50 | // Less than operator. 51 | // 52 | // @param regex_character 53 | // The character to compare with. 54 | // 55 | // @return 56 | // True if the end of the interval represented by this character is less 57 | // than the beginning of the interval represented by \e regex_character. 58 | */ 59 | bool RegexCharacter::operator<( const RegexCharacter& regex_character ) const 60 | { 61 | return end_character_ < regex_character.begin_character_; 62 | } 63 | -------------------------------------------------------------------------------- /src/lalr/RegexCharacter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXCHARACTER_HPP_INCLUDED 2 | #define LALR_REGEXCHARACTER_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // @internal 9 | // 10 | // A range of characters in a regular expression. 11 | */ 12 | class RegexCharacter 13 | { 14 | int begin_character_; ///< The first character in the range of characters. 15 | int end_character_; ///< One past the last character in the range of characters. 16 | 17 | public: 18 | RegexCharacter( int begin_character, int end_character ); 19 | int begin_character() const; 20 | int end_character() const; 21 | bool operator<( const RegexCharacter& regex_character ) const; 22 | }; 23 | 24 | } 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/lalr/RegexCompiler.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "RegexCompiler.hpp" 3 | #include "RegexGenerator.hpp" 4 | #include "RegexState.hpp" 5 | #include "RegexTransition.hpp" 6 | #include "RegexAction.hpp" 7 | #include "LexerStateMachine.hpp" 8 | #include "LexerState.hpp" 9 | #include "LexerTransition.hpp" 10 | #include "LexerAction.hpp" 11 | #include "assert.hpp" 12 | #include 13 | 14 | using std::set; 15 | using std::vector; 16 | using std::unique_ptr; 17 | using namespace lalr; 18 | 19 | RegexCompiler::RegexCompiler() 20 | : strings_() 21 | , actions_() 22 | , transitions_() 23 | , states_() 24 | , state_machine_() 25 | { 26 | } 27 | 28 | RegexCompiler::~RegexCompiler() 29 | { 30 | } 31 | 32 | const LexerStateMachine* RegexCompiler::state_machine() const 33 | { 34 | return state_machine_.get(); 35 | } 36 | 37 | int RegexCompiler::compile( const std::string& regular_expression, void* symbol, ErrorPolicy* error_policy ) 38 | { 39 | RegexGenerator generator; 40 | int errors = generator.generate( regular_expression, symbol, error_policy ); 41 | if ( errors == 0 ) 42 | { 43 | populate_lexer_state_machine( generator ); 44 | } 45 | return errors; 46 | } 47 | 48 | int RegexCompiler::compile( const std::vector& tokens, ErrorPolicy* error_policy ) 49 | { 50 | RegexGenerator generator; 51 | int errors = generator.generate( tokens, error_policy ); 52 | if ( errors == 0 ) 53 | { 54 | populate_lexer_state_machine( generator ); 55 | } 56 | return errors; 57 | } 58 | 59 | const char* RegexCompiler::add_string( const std::string& string ) 60 | { 61 | strings_.push_back( string ); 62 | return strings_.back().c_str(); 63 | } 64 | 65 | void RegexCompiler::set_actions( std::unique_ptr& actions, int actions_size ) 66 | { 67 | actions_ = std::move( actions ); 68 | state_machine_->actions_size = actions_size; 69 | state_machine_->actions = actions_.get(); 70 | } 71 | 72 | void RegexCompiler::set_transitions( std::unique_ptr& transitions, int transitions_size ) 73 | { 74 | transitions_ = std::move( transitions ); 75 | state_machine_->transitions_size = transitions_size; 76 | state_machine_->transitions = transitions_.get(); 77 | } 78 | 79 | void RegexCompiler::set_states( std::unique_ptr& states, int states_size, const LexerState* start_state ) 80 | { 81 | states_ = std::move( states ); 82 | state_machine_->states_size = states_size; 83 | state_machine_->states = states_.get(); 84 | state_machine_->start_state = start_state; 85 | } 86 | 87 | void RegexCompiler::populate_lexer_state_machine( const RegexGenerator& generator ) 88 | { 89 | state_machine_.reset( new LexerStateMachine ); 90 | memset( state_machine_.get(), 0, sizeof(*state_machine_) ); 91 | 92 | const vector>& source_actions = generator.actions(); 93 | const set, RegexStateLess>& source_states = generator.states(); 94 | 95 | size_t transitions_size = 0; 96 | for ( auto i = source_states.begin(); i != source_states.end(); ++i ) 97 | { 98 | const RegexState* source_state = i->get(); 99 | LALR_ASSERT( source_state ); 100 | transitions_size += source_state->get_transitions().size(); 101 | } 102 | 103 | unique_ptr actions( new LexerAction [source_actions.size()] ); 104 | unique_ptr transitions( new LexerTransition [transitions_size] ); 105 | unique_ptr states( new LexerState [source_states.size()] ); 106 | 107 | for ( size_t i = 0; i < source_actions.size(); ++i ) 108 | { 109 | const RegexAction* source_action = source_actions[i].get(); 110 | LALR_ASSERT( source_action ); 111 | LexerAction* action = &actions[i]; 112 | action->index = source_action->index(); 113 | action->identifier = add_string( source_action->identifier() ); 114 | } 115 | 116 | int state_index = 0; 117 | int transition_index = 0; 118 | const LexerState* start_state = nullptr; 119 | for ( auto i = source_states.begin(); i != source_states.end(); ++i ) 120 | { 121 | const RegexState* source_state = i->get(); 122 | LALR_ASSERT( source_state ); 123 | LexerState* state = &states[state_index]; 124 | LALR_ASSERT( state ); 125 | const set& source_transitions = source_state->get_transitions(); 126 | state->index = state_index; 127 | state->length = int(source_transitions.size()); 128 | state->transitions = &transitions[transition_index]; 129 | state->symbol = source_state->get_symbol(); 130 | if ( source_state == generator.start_state() ) 131 | { 132 | start_state = state; 133 | } 134 | for ( auto j = source_transitions.begin(); j != source_transitions.end(); ++j ) 135 | { 136 | const RegexTransition* source_transition = &(*j); 137 | LALR_ASSERT( source_transition ); 138 | const RegexState* state_transitioned_to = source_transition->state(); 139 | const RegexAction* action = source_transition->action(); 140 | LexerTransition* transition = &transitions[transition_index]; 141 | transition->begin = source_transition->begin(); 142 | transition->end = source_transition->end(); 143 | transition->state = state_transitioned_to ? &states[state_transitioned_to->index()] : nullptr; 144 | transition->action = action ? &actions[action->index()] : nullptr; 145 | ++transition_index; 146 | } 147 | ++state_index; 148 | } 149 | 150 | set_actions( actions, int(source_actions.size()) ); 151 | set_transitions( transitions, int(transitions_size) ); 152 | set_states( states, int(source_states.size()), start_state ); 153 | } 154 | -------------------------------------------------------------------------------- /src/lalr/RegexCompiler.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERALLOCATIONS_HPP_INCLUDED 2 | #define LALR_LEXERALLOCATIONS_HPP_INCLUDED 3 | 4 | #include "RegexToken.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace lalr 11 | { 12 | 13 | class ErrorPolicy; 14 | class LexerAction; 15 | class LexerTransition; 16 | class LexerState; 17 | class LexerStateMachine; 18 | class RegexGenerator; 19 | 20 | /** 21 | // The data that defines the state machine for a lexical analyzer. 22 | */ 23 | class RegexCompiler 24 | { 25 | std::deque strings_; 26 | std::unique_ptr actions_; 27 | std::unique_ptr transitions_; 28 | std::unique_ptr states_; 29 | std::unique_ptr state_machine_; 30 | 31 | public: 32 | RegexCompiler(); 33 | ~RegexCompiler(); 34 | const LexerStateMachine* state_machine() const; 35 | int compile( const std::string& regular_expression, void* symbol, ErrorPolicy* error_policy = nullptr ); 36 | int compile( const std::vector& tokens, ErrorPolicy* error_policy = nullptr ); 37 | const char* add_string( const std::string& string ); 38 | void set_actions( std::unique_ptr& actions, int actions_size ); 39 | void set_transitions( std::unique_ptr& transitions, int transitions_size ); 40 | void set_states( std::unique_ptr& states, int states_size, const LexerState* start_state ); 41 | void populate_lexer_state_machine( const RegexGenerator& generator ); 42 | }; 43 | 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /src/lalr/RegexGenerator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERGENERATOR_HPP_INCLUDED 2 | #define LALR_LEXERGENERATOR_HPP_INCLUDED 3 | 4 | #include "RegexToken.hpp" 5 | #include "RegexStateLess.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | namespace error 11 | { 12 | 13 | class Error; 14 | 15 | } 16 | 17 | namespace lalr 18 | { 19 | 20 | class ErrorPolicy; 21 | class RegexState; 22 | class RegexAction; 23 | class RegexSyntaxTree; 24 | 25 | /** 26 | // @internal 27 | // 28 | // Generate a lexical analyzer. 29 | */ 30 | class RegexGenerator 31 | { 32 | ErrorPolicy* error_policy_; ///< The error policy to report errors and debug information to or null to ignore errors and debug information. 33 | RegexSyntaxTree* syntax_tree_; ///< The syntax tree generated from parsing regular expression(s). 34 | std::vector> actions_; ///< The lexical analyzer actions. 35 | std::set, RegexStateLess> states_; ///< The states generated for the lexical analyzer. 36 | RegexState* start_state_; ///< The starting state for the lexical analyzer. 37 | std::vector> ranges_; ///< Ranges generated for the current transition while generating. 38 | 39 | public: 40 | RegexGenerator(); 41 | ~RegexGenerator(); 42 | const std::vector>& actions() const; 43 | const std::set, RegexStateLess>& states() const; 44 | const RegexState* start_state() const; 45 | void fire_error( int line, int column, int error, const char* format, ... ) const; 46 | void fire_printf( const char* format, ... ) const; 47 | const RegexAction* add_lexer_action( const std::string& identifier ); 48 | int generate( const std::string& regular_expression, void* symbol, ErrorPolicy* error_policy = nullptr ); 49 | int generate( const std::vector& tokens, ErrorPolicy* error_policy = nullptr ); 50 | 51 | private: 52 | std::unique_ptr goto_( const RegexState* state, int begin, int end ); 53 | void generate_states( const RegexSyntaxTree& syntax_tree ); 54 | void generate_indices_for_states(); 55 | void generate_symbol_for_state( RegexState* state ) const; 56 | void clear(); 57 | void insert( int begin, int end ); 58 | }; 59 | 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /src/lalr/RegexItem.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexItem.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexItem.hpp" 7 | #include "RegexNode.hpp" 8 | #include "assert.hpp" 9 | #include 10 | 11 | using namespace lalr; 12 | 13 | /** 14 | // Constructor. 15 | */ 16 | RegexItem::RegexItem() 17 | : next_nodes_() 18 | { 19 | } 20 | 21 | /** 22 | // Constructor. 23 | // 24 | // @param next_nodes 25 | // The nodes that appear after the dot in this item. 26 | */ 27 | RegexItem::RegexItem( const std::set& next_nodes ) 28 | : next_nodes_( next_nodes ) 29 | { 30 | } 31 | 32 | /** 33 | // Get the nodes that appear after the dot in this item. 34 | // 35 | // @return 36 | // The next nodes. 37 | */ 38 | const std::set& RegexItem::next_nodes() const 39 | { 40 | return next_nodes_; 41 | } 42 | 43 | /** 44 | // Calculate the nodes that can be visited after [\e begin, \e end) is 45 | // transitioned on from this item. 46 | // 47 | // These nodes then become the core of a new item for the state that 48 | // the lexical analyzer is in after accepting [\e begin, \e end) from the 49 | // state that this item is part of. 50 | // 51 | // Action nodes are special in that they are only able to be transitioned on 52 | // if no other nodes can be visited after [\e begin, \e end) is transitioned 53 | // on. Nodes are ordered by type and then index and the action type 54 | // (LEXER_NODE_ACTION) is the highest so that action nodes sort to the end 55 | // of the nodes in an item. 56 | // 57 | // @param begin 58 | // The begin character of the interval to calculate the next nodes for. 59 | // 60 | // @param end 61 | // The end character of the interval to calculate the next nodes for. 62 | // 63 | // @return 64 | // The next nodes after [\e begin, \e end) is transitioned on from this item. 65 | */ 66 | std::set RegexItem::next_nodes( int begin, int end ) const 67 | { 68 | std::set next_nodes; 69 | 70 | // 71 | // Add transitions from non-action nodes to the nodes that are visitable 72 | // next. 73 | // 74 | std::set::const_iterator i = next_nodes_.begin(); 75 | while ( i != next_nodes_.end() && (*i)->get_type() != LEXER_NODE_ACTION ) 76 | { 77 | const RegexNode* node = *i; 78 | LALR_ASSERT( node ); 79 | LALR_ASSERT( node->get_type() < LEXER_NODE_ACTION ); 80 | 81 | if ( node->is_match(begin, end) ) 82 | { 83 | next_nodes.insert( node->get_follow_positions().begin(), node->get_follow_positions().end() ); 84 | } 85 | 86 | ++i; 87 | } 88 | 89 | // 90 | // If there were no transitions from any other nodes then add transitions 91 | // from action nodes to the nodes that are visitable next. 92 | // 93 | if ( next_nodes.empty() ) 94 | { 95 | while ( i != next_nodes_.end() ) 96 | { 97 | const RegexNode* node = *i; 98 | LALR_ASSERT( node ); 99 | LALR_ASSERT( node->get_type() == LEXER_NODE_ACTION ); 100 | 101 | if ( node->is_match(begin, end) ) 102 | { 103 | next_nodes.insert( node->get_follow_positions().begin(), node->get_follow_positions().end() ); 104 | } 105 | 106 | ++i; 107 | } 108 | } 109 | 110 | return next_nodes; 111 | } 112 | 113 | /** 114 | // Find the action that should be taken after [\e begin, \e end) is 115 | // transitioned on from this item. 116 | // 117 | // @param begin 118 | // The begin character of the interval to calculate the next nodes for. 119 | // 120 | // @param end 121 | // The end character of the interval to calculate the next nodes for. 122 | // 123 | // @return 124 | // The action that should be taken after [\e begin, \e end) is 125 | // transitioned on or null if there is no such action. 126 | */ 127 | const RegexAction* RegexItem::find_action_by_interval( int begin, int end ) const 128 | { 129 | std::set::const_iterator i = next_nodes_.begin(); 130 | while ( i != next_nodes_.end() && !(*i)->is_action() && !(*i)->is_match(begin, end) ) 131 | { 132 | ++i; 133 | } 134 | 135 | if ( i != next_nodes_.end() && (*i)->is_action() ) 136 | { 137 | while ( i != next_nodes_.end() && !(*i)->is_match(begin, end) ) 138 | { 139 | LALR_ASSERT( (*i)->is_action() ); 140 | ++i; 141 | } 142 | } 143 | 144 | return i != next_nodes_.end() ? (*i)->get_action() : NULL; 145 | } 146 | 147 | /** 148 | // Less than operator. 149 | // 150 | // @return 151 | // True if the next nodes of this item are less than the next nodes of 152 | // \e item. 153 | */ 154 | bool RegexItem::operator<( const RegexItem& item ) const 155 | { 156 | return std::lexicographical_compare( next_nodes_.begin(), next_nodes_.end(), item.next_nodes_.begin(), item.next_nodes_.end(), RegexNodeLess() ); 157 | } 158 | -------------------------------------------------------------------------------- /src/lalr/RegexItem.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXITEM_HPP_INCLUDED 2 | #define LALR_REGEXITEM_HPP_INCLUDED 3 | 4 | #include "RegexNodeLess.hpp" 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | class RegexNode; 12 | class RegexAction; 13 | 14 | /** 15 | // An item that defines the positions in a regular expression that a state 16 | // represents. 17 | */ 18 | class RegexItem 19 | { 20 | std::set next_nodes_; ///< The nodes that appear after the dot in this item. 21 | 22 | public: 23 | RegexItem(); 24 | RegexItem( const std::set& next_nodes ); 25 | const std::set& next_nodes() const; 26 | std::set next_nodes( int begin, int end ) const; 27 | const RegexAction* find_action_by_interval( int begin, int end ) const; 28 | bool operator<( const RegexItem& item ) const; 29 | }; 30 | 31 | } 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/lalr/RegexNode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXNODE_HPP_INCLUDED 2 | #define LALR_REGEXNODE_HPP_INCLUDED 3 | 4 | #include "RegexNodeLess.hpp" 5 | #include "RegexNodeType.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | namespace lalr 11 | { 12 | 13 | /** 14 | // @internal 15 | // 16 | // The character that marks the beginning of the valid character range. 17 | // 18 | // @relates RegexNode 19 | */ 20 | extern const int BEGIN_CHARACTER; 21 | 22 | /** 23 | // @internal 24 | // 25 | // The character that marks the end of hte valid character range. 26 | // 27 | // @relates RegexNode 28 | */ 29 | extern const int END_CHARACTER; 30 | 31 | /** 32 | // @internal 33 | // 34 | // The character that marks the beginning of the invalid character range. 35 | // 36 | // @relates RegexNode 37 | */ 38 | extern const int INVALID_BEGIN_CHARACTER; 39 | 40 | /** 41 | // @internal 42 | // 43 | // The character that marks the end of the invalid character range. 44 | // 45 | // @relates RegexNode 46 | */ 47 | extern const int INVALID_END_CHARACTER; 48 | 49 | class RegexToken; 50 | class RegexAction; 51 | 52 | /** 53 | // @internal 54 | // 55 | // A node in a parsed regular expression. 56 | */ 57 | class RegexNode : public std::enable_shared_from_this 58 | { 59 | int index_; ///< The index of the node. 60 | RegexNodeType type_; ///< The type of the node. 61 | int begin_character_; ///< The first character in the interval of characters represented by the node. 62 | int end_character_; ///< One past the last character in the interval of characters represented by the node. 63 | const RegexToken* token_; ///< The token recognized at the node or null if the node doesn't recognize a token. 64 | const RegexAction* action_; ///< The action taken at the node or null if no action is taken at the node. 65 | std::vector > nodes_; ///< The child nodes. 66 | bool nullable_; ///< True if the node is nullable otherwise false. 67 | std::set first_positions_; ///< The first positions at the node. 68 | std::set last_positions_; ///< The last positions at the node. 69 | std::set follow_positions_; ///< The follow positions at the node. 70 | 71 | public: 72 | RegexNode( int index, RegexNodeType type ); 73 | RegexNode( int index, int begin_character, int end_character ); 74 | RegexNode( int index, int begin_character, int end_character, const RegexToken* token ); 75 | RegexNode( int index, const RegexAction* action ); 76 | 77 | int get_index() const; 78 | RegexNodeType get_type() const; 79 | const char* get_lexeme() const; 80 | int get_begin_character() const; 81 | int get_end_character() const; 82 | const RegexToken* get_token() const; 83 | const RegexAction* get_action() const; 84 | bool is_match( int begin, int end ) const; 85 | bool is_end() const; 86 | bool is_action() const; 87 | 88 | void add_node( const std::shared_ptr& node ); 89 | RegexNode* get_node( int index ) const; 90 | const std::vector >& get_nodes() const; 91 | 92 | bool is_nullable() const; 93 | const std::set& get_first_positions() const; 94 | const std::set& get_last_positions() const; 95 | const std::set& get_follow_positions() const; 96 | const std::set& get_next_positions() const; 97 | 98 | void calculate_nullable(); 99 | void calculate_first_positions(); 100 | void calculate_last_positions(); 101 | void calculate_follow_positions(); 102 | 103 | bool operator<( const RegexNode& node ) const; 104 | 105 | void print( const std::set& dot_nodes ) const; 106 | }; 107 | 108 | } 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /src/lalr/RegexNodeLess.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexNodeLess.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexNodeLess.hpp" 7 | #include "RegexNode.hpp" 8 | #include "assert.hpp" 9 | 10 | using namespace lalr; 11 | 12 | /** 13 | // Compare two RegexNodes. 14 | // 15 | // @param lhs 16 | // The first RegexNode to compare. 17 | // 18 | // @param rhs 19 | // The second RegexNode to compare. 20 | // 21 | // @return 22 | // True if the index of \e lhs is less than the index of \e rhs otherwise 23 | // false. 24 | */ 25 | bool RegexNodeLess::operator()( const RegexNode* lhs, const RegexNode* rhs ) const 26 | { 27 | LALR_ASSERT( lhs ); 28 | LALR_ASSERT( rhs ); 29 | return *lhs < *rhs; 30 | } 31 | -------------------------------------------------------------------------------- /src/lalr/RegexNodeLess.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXNODELESS_HPP_INCLUDED 2 | #define LALR_REGEXNODELESS_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class RegexNode; 8 | 9 | /** 10 | // @internal 11 | // 12 | // Indirectly compare two RegexNodes. 13 | */ 14 | struct RegexNodeLess 15 | { 16 | bool operator()( const RegexNode* lhs, const RegexNode* rhs ) const; 17 | }; 18 | 19 | } 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /src/lalr/RegexNodeType.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXNODETYPE_HPP_INCLUDED 2 | #define LALR_REGEXNODETYPE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // @internal 9 | // 10 | // The type of a node in a parsed regular expression. 11 | // 12 | // @relates RegexNode 13 | */ 14 | enum RegexNodeType 15 | { 16 | LEXER_NODE_NULL, 17 | LEXER_NODE_CAT, 18 | LEXER_NODE_OR, 19 | LEXER_NODE_STAR, 20 | LEXER_NODE_PLUS, 21 | LEXER_NODE_OPTIONAL, 22 | LEXER_NODE_SYMBOL, 23 | LEXER_NODE_ACTION, 24 | LEXER_NODE_COUNT 25 | }; 26 | 27 | } 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /src/lalr/RegexParser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXPARSER_HPP_INCLUDED 2 | #define LALR_REGEXPARSER_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class RegexSyntaxTree; 8 | 9 | class RegexParser 10 | { 11 | RegexSyntaxTree* syntax_tree_; 12 | const char* position_; 13 | const char* end_; 14 | const char* lexeme_begin_; 15 | const char* lexeme_end_; 16 | bool successful_; 17 | 18 | public: 19 | RegexParser( RegexSyntaxTree* syntax_tree ); 20 | bool parse( const char* begin, const char* end ); 21 | 22 | private: 23 | bool match_or_expression(); 24 | bool match_cat_expression(); 25 | bool match_postfix_expression(); 26 | bool match_base_expression(); 27 | bool match_negative_bracket_expression(); 28 | bool match_bracket_expression(); 29 | bool match_action_expression(); 30 | bool match_compound_expression(); 31 | bool match_character_expression(); 32 | bool match_item(); 33 | bool match_negative_item(); 34 | bool match_character(); 35 | bool match_character_in_character_class(); 36 | bool match_character( const char* metacharacters ); 37 | bool match_bracket_expression_character(); 38 | bool match_end_of_range(); 39 | bool match_identifier(); 40 | bool match( const char* lexeme ); 41 | bool expect( const char* lexeme ); 42 | int escape( const char* start, const char* finish ) const; 43 | }; 44 | 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/lalr/RegexState.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexState.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexState.hpp" 7 | #include "RegexItem.hpp" 8 | #include "RegexTransition.hpp" 9 | #include "assert.hpp" 10 | #include 11 | 12 | using namespace lalr; 13 | 14 | #if defined(BUILD_PLATFORM_MSVC) 15 | #define snprintf _snprintf 16 | #endif 17 | 18 | /** 19 | // Constructor. 20 | */ 21 | RegexState::RegexState() 22 | : items_() 23 | , transitions_() 24 | , symbol_( nullptr ) 25 | , processed_( false ) 26 | , index_( -1 ) 27 | { 28 | } 29 | 30 | /** 31 | // Get the items that make up this state. 32 | // 33 | // @return 34 | // The items that make up this state. 35 | */ 36 | const std::set& RegexState::get_items() const 37 | { 38 | return items_; 39 | } 40 | 41 | /** 42 | // Find the transition from this state on \e character. 43 | // 44 | // @param character 45 | // The character to find a transition from this state on. 46 | // 47 | // @return 48 | // The transition to make on \e character or null if there is no transition 49 | // from this state on \e character. 50 | */ 51 | const RegexTransition* RegexState::find_transition_by_character( int character ) const 52 | { 53 | std::set::const_iterator transition = transitions_.begin(); 54 | while ( transition != transitions_.end() && !transition->on_character(character) ) 55 | { 56 | ++transition; 57 | } 58 | 59 | return transition != transitions_.end() ? &(*transition) : nullptr; 60 | } 61 | 62 | /** 63 | // Get the transitions from this state. 64 | // 65 | // @return 66 | // The transitions from this state. 67 | */ 68 | const std::set& RegexState::get_transitions() const 69 | { 70 | return transitions_; 71 | } 72 | 73 | /** 74 | // Get the symbol that this state matches. 75 | // 76 | // @return 77 | // The symbol that this state matches or null if this state doesn't match a 78 | // symbol. 79 | */ 80 | const void* RegexState::get_symbol() const 81 | { 82 | return symbol_; 83 | } 84 | 85 | /** 86 | // Has this state been processed? 87 | // 88 | // @return 89 | // True if this state has been processed otherwise false. 90 | */ 91 | bool RegexState::is_processed() const 92 | { 93 | return processed_; 94 | } 95 | 96 | /** 97 | // Get the index of this state. 98 | // 99 | // @return 100 | // The index of this state. 101 | */ 102 | int RegexState::index() const 103 | { 104 | return index_; 105 | } 106 | 107 | /** 108 | // Less than operator. 109 | // 110 | // @return 111 | // True if this items of this state are lexically less than the items of 112 | // \e state. 113 | */ 114 | bool RegexState::operator<( const RegexState& state ) const 115 | { 116 | return std::lexicographical_compare( items_.begin(), items_.end(), state.items_.begin(), state.items_.end() ); 117 | } 118 | 119 | /** 120 | // Add an item to this state. 121 | // 122 | // If any of the nodes in \e next_nodes are at the end of their productions 123 | // then this state becomes an accepting state for the symbol that the node 124 | // matches. 125 | // 126 | // @param next_nodes 127 | // The nodes that appear after the dot in the item to add. 128 | // 129 | // @return 130 | // The number of items added (0 or 1). 131 | */ 132 | int RegexState::add_item( const std::set& next_nodes ) 133 | { 134 | return items_.insert( RegexItem(next_nodes) ).second ? 1 : 0; 135 | } 136 | 137 | /** 138 | // Add a transition from this state to \e state on \e symbol. 139 | // 140 | // @param state 141 | // The state to add a transition to (assumed not null). 142 | // 143 | // @param begin 144 | // The begin character in the range to transition on. 145 | // 146 | // @param end 147 | // The end character in the range to transition on. 148 | */ 149 | void RegexState::add_transition( int begin, int end, RegexState* state ) 150 | { 151 | const RegexAction* action = nullptr; 152 | std::set::const_iterator item = items_.begin(); 153 | while ( item != items_.end() && !action ) 154 | { 155 | action = item->find_action_by_interval( begin, end ); 156 | ++item; 157 | } 158 | 159 | bool inserted = transitions_.insert( RegexTransition(begin, end, state, action) ).second; 160 | LALR_ASSERT( inserted ); 161 | (void) inserted; 162 | } 163 | 164 | /** 165 | // Set the symbol that this state matches. 166 | // 167 | // @param symbol 168 | // The symbol to set this state as matching (assumed not null). 169 | */ 170 | void RegexState::set_symbol( const void* symbol ) 171 | { 172 | LALR_ASSERT( !symbol_ ); 173 | symbol_ = symbol; 174 | } 175 | 176 | /** 177 | // Set whether or not this state has been processed. 178 | // 179 | // @param processed 180 | // True to set this state as processed. 181 | */ 182 | void RegexState::set_processed( bool processed ) 183 | { 184 | processed_ = processed; 185 | } 186 | 187 | /** 188 | // Set the index of this state. 189 | // 190 | // @param index 191 | // The value to set the index of this state to. 192 | */ 193 | void RegexState::set_index( int index ) 194 | { 195 | index_ = index; 196 | } 197 | -------------------------------------------------------------------------------- /src/lalr/RegexState.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXSTATE_HPP_INCLUDED 2 | #define LALR_REGEXSTATE_HPP_INCLUDED 3 | 4 | #include "RegexItem.hpp" 5 | #include "RegexTransition.hpp" 6 | #include 7 | #include 8 | 9 | namespace lalr 10 | { 11 | 12 | class RegexNode; 13 | 14 | /** 15 | // A state in a lexical analyzer's state machine. 16 | */ 17 | class RegexState 18 | { 19 | std::set items_; ///< The items that define the positions within the regular expressions that this state represents. 20 | std::set transitions_; ///< The available transitions from this state to other states. 21 | const void* symbol_; ///< The symbol that this state recognizes or null if this state doesn't recognize a symbol. 22 | bool processed_; ///< True if this state has been processed during state machine generation otherwise false. 23 | int index_; ///< The index of this state. 24 | 25 | public: 26 | RegexState(); 27 | const std::set& get_items() const; 28 | const RegexTransition* find_transition_by_character( int character ) const; 29 | const std::set& get_transitions() const; 30 | const void* get_symbol() const; 31 | bool is_processed() const; 32 | int index() const; 33 | bool operator<( const RegexState& state ) const; 34 | int add_item( const std::set& next_nodes ); 35 | void add_transition( int begin, int end, RegexState* state ); 36 | void set_symbol( const void* symbol ); 37 | void set_processed( bool processed ); 38 | void set_index( int index ); 39 | }; 40 | 41 | } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/lalr/RegexStateLess.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexStateLess.cpp 3 | // Copyright (c) Charles Baker. All rights reserved 4 | // 5 | 6 | #include "RegexStateLess.hpp" 7 | #include "RegexState.hpp" 8 | 9 | using namespace lalr; 10 | 11 | bool RegexStateLess::operator()( const std::unique_ptr& lhs, const std::unique_ptr& rhs ) const 12 | { 13 | return *lhs < *rhs; 14 | } 15 | -------------------------------------------------------------------------------- /src/lalr/RegexStateLess.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXSTATELESS_HPP_INCLUDED 2 | #define LALR_REGEXSTATELESS_HPP_INCLUDED 3 | 4 | #include 5 | 6 | namespace lalr 7 | { 8 | 9 | class RegexState; 10 | 11 | /** 12 | // Indirectly compare objects through two `std::unique_ptr<>` objects. 13 | */ 14 | class RegexStateLess 15 | { 16 | public: 17 | bool operator()( const std::unique_ptr& lhs, const std::unique_ptr& rhs ) const; 18 | }; 19 | 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/lalr/RegexSyntaxTree.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXSYNTAXTREE_HPP_INCLUDED 2 | #define LALR_REGEXSYNTAXTREE_HPP_INCLUDED 3 | 4 | #include "RegexCharacter.hpp" 5 | #include "RegexNodeLess.hpp" 6 | #include "RegexNodeType.hpp" 7 | #include "RegexToken.hpp" 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace lalr 14 | { 15 | 16 | class RegexGenerator; 17 | class RegexAction; 18 | class RegexNode; 19 | 20 | /** 21 | // @internal 22 | // 23 | // Parse regular expressions. 24 | */ 25 | class RegexSyntaxTree 26 | { 27 | RegexGenerator* generator_; ///< The RegexGenerator to retrieve actions from and report errors and debug information to. 28 | std::set bracket_expression_characters_; ///< The characters in the current bracket expression. 29 | int index_; ///< The current node index. 30 | std::vector> nodes_; ///< The current nodes. 31 | int errors_; ///< The number of errors that have occured. 32 | 33 | public: 34 | RegexSyntaxTree(); 35 | 36 | bool empty() const; 37 | int errors() const; 38 | const std::shared_ptr& node() const; 39 | void print() const; 40 | 41 | void reset(); 42 | void reset( const RegexToken& token, RegexGenerator* generator ); 43 | void reset( const std::vector& tokens, RegexGenerator* generator ); 44 | 45 | void cat_expression(); 46 | void or_expression(); 47 | void star_expression(); 48 | void plus_expression(); 49 | void optional_expression(); 50 | void begin_bracket_expression(); 51 | void begin_negative_bracket_expression(); 52 | bool end_bracket_expression(); 53 | void action_expression( const std::string& identifier ); 54 | void character( int character ); 55 | void dot(); 56 | void item_range( int begin, int end ); 57 | void item_character( int character ); 58 | void item_alnum(); 59 | void item_word(); 60 | void item_alpha(); 61 | void item_blank(); 62 | void item_cntrl(); 63 | void item_digit(); 64 | void item_graph(); 65 | void item_lower(); 66 | void item_print(); 67 | void item_punct(); 68 | void item_space(); 69 | void item_upper(); 70 | void item_xdigit(); 71 | void negative_item_range( int begin, int end ); 72 | void negative_item_character( int character ); 73 | void negative_item_alnum(); 74 | void negative_item_word(); 75 | void negative_item_alpha(); 76 | void negative_item_blank(); 77 | void negative_item_cntrl(); 78 | void negative_item_digit(); 79 | void negative_item_graph(); 80 | void negative_item_lower(); 81 | void negative_item_print(); 82 | void negative_item_punct(); 83 | void negative_item_space(); 84 | void negative_item_upper(); 85 | void negative_item_xdigit(); 86 | 87 | private: 88 | std::shared_ptr regex_node( RegexNodeType type ); 89 | std::shared_ptr regex_node( int begin, int end ); 90 | std::shared_ptr regex_node( int begin, int end, const RegexToken* token ); 91 | std::shared_ptr regex_node( const RegexAction* action ); 92 | 93 | void print_positions( const std::set& positions ) const; 94 | void print_nodes( const std::vector >& nodes, int level ) const; 95 | 96 | void calculate_symbols_for_characters_start_and_end(); 97 | void calculate_combined_parse_tree( const std::vector& tokens ); 98 | void calculate_nullable_first_last_and_follow(); 99 | void parse_regular_expression( const RegexToken& token ); 100 | void parse_literal( const RegexToken& token ); 101 | 102 | int escape( std::string::const_iterator start, std::string::const_iterator end, std::string::const_iterator* next ) const; 103 | void insert_characters( int begin, int end ); 104 | void erase_characters( int begin, int end ); 105 | void insert_characters( const char* characters ); 106 | void erase_characters( const char* characters ); 107 | }; 108 | 109 | } 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /src/lalr/RegexToken.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexToken.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexToken.hpp" 7 | #include 8 | 9 | using std::find; 10 | using std::vector; 11 | using namespace lalr; 12 | 13 | RegexToken::RegexToken( RegexTokenType type, int line, int column, const void* symbol, const std::string& lexeme ) 14 | : type_( type ) 15 | , line_( line ) 16 | , column_( column ) 17 | , symbol_( symbol ) 18 | , lexeme_( lexeme ) 19 | , conflicted_with_() 20 | { 21 | } 22 | 23 | RegexToken::RegexToken( const RegexToken& token, const void* symbol ) 24 | : type_( token.type_ ) 25 | , line_( token.line_ ) 26 | , column_( token.column_ ) 27 | , symbol_( symbol ) 28 | , lexeme_( token.lexeme_ ) 29 | , conflicted_with_() 30 | { 31 | } 32 | 33 | RegexTokenType RegexToken::type() const 34 | { 35 | return type_; 36 | } 37 | 38 | int RegexToken::line() const 39 | { 40 | return line_; 41 | } 42 | 43 | int RegexToken::column() const 44 | { 45 | return column_; 46 | } 47 | 48 | const void* RegexToken::symbol() const 49 | { 50 | return symbol_; 51 | } 52 | 53 | const std::string& RegexToken::lexeme() const 54 | { 55 | return lexeme_; 56 | } 57 | 58 | bool RegexToken::conflicted_with( const RegexToken* token ) const 59 | { 60 | return find( conflicted_with_.begin(), conflicted_with_.end(), token ) != conflicted_with_.end(); 61 | } 62 | 63 | void RegexToken::add_conflicted_with( const RegexToken* token ) const 64 | { 65 | if ( !conflicted_with(token) ) 66 | { 67 | conflicted_with_.push_back( token ); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/lalr/RegexToken.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_LEXERTOKEN_HPP_INCLUDED 2 | #define LALR_LEXERTOKEN_HPP_INCLUDED 3 | 4 | #include "RegexTokenType.hpp" 5 | #include 6 | #include 7 | 8 | namespace lalr 9 | { 10 | 11 | /** 12 | // A regular expression or literal to be recognized by a lexical analyzer. 13 | */ 14 | class RegexToken 15 | { 16 | RegexTokenType type_; ///< The type of this RegexToken (literal or regular expression). 17 | int line_; ///< The line to use when resolving token conflicts and reporting errors. 18 | int column_; ///< The column to use when resolving token conflicts and reporting errors. 19 | const void* symbol_; ///< The symbol to return when this token is matched in input. 20 | std::string lexeme_; ///< The literal or regular expression pattern to match for this token. 21 | mutable std::vector conflicted_with_; ///< The RegexTokens that this RegexToken has conflicted with. 22 | 23 | public: 24 | RegexToken( RegexTokenType type, int line, int column, const void* symbol, const std::string& lexeme ); 25 | RegexToken( const RegexToken& token, const void* symbol ); 26 | RegexTokenType type() const; 27 | int line() const; 28 | int column() const; 29 | const void* symbol() const; 30 | const std::string& lexeme() const; 31 | bool conflicted_with( const RegexToken* token ) const; 32 | void add_conflicted_with( const RegexToken* token ) const; 33 | }; 34 | 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/lalr/RegexTokenType.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXTOKENTYPE_HPP_INCLUDED 2 | #define LALR_REGEXTOKENTYPE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /* 8 | // The type of a token. 9 | // 10 | // The %RegexTokenType enumeration is specified in order of increasing priority 11 | // so that states that can match both a literal and a regular expression 12 | // result in the literal being matched. 13 | // 14 | // @relates RegexToken 15 | */ 16 | enum RegexTokenType 17 | { 18 | TOKEN_NULL, 19 | TOKEN_REGULAR_EXPRESSION, 20 | TOKEN_LITERAL 21 | }; 22 | 23 | } 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/lalr/RegexTransition.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // RegexTransition.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "RegexTransition.hpp" 7 | #include "RegexAction.hpp" 8 | #include "RegexState.hpp" 9 | #include "assert.hpp" 10 | 11 | using namespace lalr; 12 | 13 | /** 14 | // Constructor. 15 | // 16 | // @param begin 17 | // The first character in the interval that this transition can be taken on. 18 | // 19 | // @param end 20 | // One past the last character in the interval that this transition can be 21 | // taken on. 22 | // 23 | // @param state 24 | // The state that is transitioned to (assumed not null). 25 | // 26 | // @param action 27 | // The action to take when this transition is taken or null if this 28 | // transition doesn't take an action. 29 | */ 30 | RegexTransition::RegexTransition( int begin, int end, const RegexState* state, const RegexAction* action ) 31 | : begin_( begin ) 32 | , end_( end ) 33 | , state_( state ) 34 | , action_( action ) 35 | { 36 | LALR_ASSERT( begin_ < end_ ); 37 | LALR_ASSERT( state_ ); 38 | } 39 | 40 | /** 41 | // Is this transition taken on \e character? 42 | // 43 | // @return 44 | // True if this transition is taken on character otherwise false. 45 | */ 46 | bool RegexTransition::on_character( int character ) const 47 | { 48 | return character >= begin_ && character < end_; 49 | } 50 | 51 | /** 52 | // Get the first character in the interval that this transition can be taken 53 | // on. 54 | // 55 | // @return 56 | // The first character. 57 | */ 58 | int RegexTransition::begin() const 59 | { 60 | return begin_; 61 | } 62 | 63 | /** 64 | // Get the character that is one past the last character in the interval 65 | // that this transition can be taken on. 66 | // 67 | // @return 68 | // The last character. 69 | */ 70 | int RegexTransition::end() const 71 | { 72 | return end_; 73 | } 74 | 75 | /** 76 | // Get the action that is taken when this transition is taken. 77 | // 78 | // @return 79 | // The action or null if this transition doesn't have an action. 80 | */ 81 | const RegexAction* RegexTransition::action() const 82 | { 83 | return action_; 84 | } 85 | 86 | /** 87 | // Get the state that this transition is to. 88 | // 89 | // @return 90 | // The state. 91 | */ 92 | const RegexState* RegexTransition::state() const 93 | { 94 | LALR_ASSERT( state_ ); 95 | return state_; 96 | } 97 | 98 | /** 99 | // Less than operator. 100 | // 101 | // @return 102 | // True if both the beginning and end of this transition's interval is less 103 | // than the beginning of \e transition's interval. 104 | */ 105 | bool RegexTransition::operator<( const RegexTransition& transition ) const 106 | { 107 | return begin_ < transition.begin_ && end_ <= transition.begin_; 108 | } 109 | -------------------------------------------------------------------------------- /src/lalr/RegexTransition.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_REGEXTRANSITION_HPP_INCLUDED 2 | #define LALR_REGEXTRANSITION_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | class RegexAction; 8 | class RegexState; 9 | 10 | /** 11 | // A transition in a lexical analyzer's state machine. 12 | */ 13 | class RegexTransition 14 | { 15 | int begin_; ///< The first character that the transition can be made on. 16 | int end_; ///< One past the last character that the transition can be made on. 17 | const RegexState* state_; ///< The state that is transitioned to. 18 | const RegexAction* action_; ///< The action that is taken on the transition or null if no action is taken. 19 | 20 | public: 21 | RegexTransition( int begin, int end, const RegexState* state, const RegexAction* action ); 22 | int begin() const; 23 | int end() const; 24 | bool on_character( int character ) const; 25 | const RegexState* state() const; 26 | const RegexAction* action() const; 27 | bool operator<( const RegexTransition& transition ) const; 28 | }; 29 | 30 | } 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/lalr/SymbolType.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_SYMBOLTYPE_HPP_INCLUDED 2 | #define LALR_SYMBOLTYPE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // The type of a symbol. 9 | // 10 | // @relates Symbol 11 | */ 12 | enum SymbolType 13 | { 14 | SYMBOL_NULL, ///< Null type. 15 | SYMBOL_TERMINAL, ///< Terminal symbols. 16 | SYMBOL_NON_TERMINAL, ///< Non-terminal symbols. 17 | SYMBOL_END, ///< The end symbol. 18 | SYMBOL_TYPE_COUNT ///< The number of symbol types. 19 | }; 20 | 21 | } 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /src/lalr/ThreadPool.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // ThreadPool.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include "ThreadPool.hpp" 7 | #include "assert.hpp" 8 | #include 9 | 10 | using std::deque; 11 | using std::mutex; 12 | using std::thread; 13 | using std::lock_guard; 14 | using std::unique_lock; 15 | using namespace lalr; 16 | 17 | ThreadPool::ThreadPool() 18 | : mutex_() 19 | , jobs_empty_() 20 | , jobs_ready_() 21 | , jobs_() 22 | , threads_() 23 | , active_jobs_( 0 ) 24 | , running_( false ) 25 | { 26 | } 27 | 28 | ThreadPool::~ThreadPool() 29 | { 30 | stop(); 31 | } 32 | 33 | void ThreadPool::start( int threads ) 34 | { 35 | LALR_ASSERT( threads > 0 ); 36 | if ( !running_ ) 37 | { 38 | unique_lock lock( mutex_ ); 39 | active_jobs_ = 0; 40 | running_ = true; 41 | threads_.reserve( threads ); 42 | for ( int i = 0; i < threads; ++i ) 43 | { 44 | threads_.push_back( thread(&ThreadPool::thread_process, this) ); 45 | } 46 | } 47 | } 48 | 49 | void ThreadPool::stop() 50 | { 51 | if ( !threads_.empty() ) 52 | { 53 | { 54 | unique_lock lock( mutex_ ); 55 | if ( !jobs_.empty() ) 56 | { 57 | jobs_empty_.wait( lock ); 58 | } 59 | running_ = false; 60 | jobs_ready_.notify_all(); 61 | } 62 | LALR_ASSERT( !running_ && jobs_.empty() ); 63 | for ( thread& thread : threads_ ) 64 | { 65 | thread.join(); 66 | } 67 | threads_.clear(); 68 | } 69 | } 70 | 71 | void ThreadPool::push_job( std::function&& job ) 72 | { 73 | { 74 | lock_guard lock( mutex_ ); 75 | jobs_.push_back( job ); 76 | ++active_jobs_; 77 | } 78 | jobs_ready_.notify_one(); 79 | } 80 | 81 | void ThreadPool::wait_idle() 82 | { 83 | unique_lock lock( mutex_ ); 84 | while ( running_ && active_jobs_ > 0 ) 85 | { 86 | jobs_empty_.wait( lock ); 87 | } 88 | } 89 | 90 | void ThreadPool::thread_process() 91 | { 92 | unique_lock lock( mutex_ ); 93 | while ( running_ ) 94 | { 95 | while ( running_ && jobs_.empty() ) 96 | { 97 | jobs_ready_.wait( lock ); 98 | } 99 | 100 | while ( running_ && !jobs_.empty() ) 101 | { 102 | std::function job = std::move( jobs_.front() ); 103 | jobs_.pop_front(); 104 | lock.unlock(); 105 | job(); 106 | lock.lock(); 107 | --active_jobs_; 108 | if ( active_jobs_ <= 0 ) 109 | { 110 | jobs_empty_.notify_all(); 111 | } 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/lalr/ThreadPool.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace lalr 11 | { 12 | 13 | /** 14 | // A thread pool. 15 | */ 16 | class ThreadPool 17 | { 18 | std::mutex mutex_; 19 | std::condition_variable jobs_empty_; 20 | std::condition_variable jobs_ready_; 21 | std::deque> jobs_; 22 | std::vector threads_; 23 | int active_jobs_; 24 | bool running_; 25 | 26 | public: 27 | ThreadPool(); 28 | ~ThreadPool(); 29 | void start( int threads ); 30 | void stop(); 31 | void push_job( std::function&& job ); 32 | void wait_idle(); 33 | 34 | private: 35 | void thread_process(); 36 | }; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/lalr/TransitionType.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_TRANSITIONTYPE_HPP_INCLUDED 2 | #define LALR_TRANSITIONTYPE_HPP_INCLUDED 3 | 4 | namespace lalr 5 | { 6 | 7 | /** 8 | // The type of a transition in a parser's state machine. 9 | */ 10 | enum TransitionType 11 | { 12 | TRANSITION_SHIFT, ///< Shift a terminal symbol onto the parser's stack. 13 | TRANSITION_REDUCE ///< Reduce one or more symbols on the parser's stack into a non terminal. 14 | }; 15 | 16 | } 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/lalr/assert.hpp: -------------------------------------------------------------------------------- 1 | #ifndef LALR_ASSERT_HPP_INCLUDED 2 | #define LALR_ASSERT_HPP_INCLUDED 3 | 4 | #include 5 | 6 | #if !defined LALR_ASSERT && !defined NDEBUG 7 | #define LALR_ASSERT( condition ) assert((condition)) 8 | #else 9 | #define LALR_ASSERT( condition ) ((void) 0) 10 | #endif 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/lalr/block_comment.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace lalr 8 | { 9 | 10 | template ::value_type, class Traits = typename std::char_traits, class Allocator = typename std::allocator> 11 | PositionIterator block_comment( const PositionIterator& begin, const PositionIterator& end, std::basic_string* lexeme, const void** /*symbol*/ ) 12 | { 13 | LALR_ASSERT( lexeme ); 14 | lexeme->clear(); 15 | 16 | bool done = false; 17 | PositionIterator i = begin; 18 | while ( i != end && !done ) 19 | { 20 | while ( i != end && *i != '*' ) 21 | { 22 | ++i; 23 | } 24 | 25 | if ( i != end ) 26 | { 27 | LALR_ASSERT( *i == '*' ); 28 | 29 | ++i; 30 | if ( *i == '/' ) 31 | { 32 | ++i; 33 | done = true; 34 | } 35 | } 36 | } 37 | return i; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/lalr/forge/lalr/Lalrc.lua: -------------------------------------------------------------------------------- 1 | 2 | local Lalrc = PatternRule( 'Lalrc' ); 3 | 4 | function Lalrc.created( toolset, target ) 5 | local lalrc = toolset:interpolate( toolset.lalr.lalrc ); 6 | target:add_dependency( toolset:Target(lalrc) ); 7 | end 8 | 9 | function Lalrc.build( toolset, target ) 10 | local lalrc = target:dependency(1); 11 | local filename = target:dependency(2):filename(); 12 | printf( leaf(filename) ); 13 | run( lalrc, ('lalrc -o "%s" "%s"'):format(target, filename) ); 14 | end 15 | 16 | return Lalrc; 17 | -------------------------------------------------------------------------------- /src/lalr/forge/lalr/init.lua: -------------------------------------------------------------------------------- 1 | 2 | local lalr = {}; 3 | 4 | function lalr.configure( toolset, lalr_settings ) 5 | return { 6 | lalrc = lalr_settings.lalrc or '${bin}/lalrc'; 7 | }; 8 | end 9 | 10 | function lalr.install( toolset ) 11 | toolset:configure_once( 'lalr', lalr.configure ); 12 | toolset.Lalrc = require( 'forge.lalr.Lalrc' ); 13 | return true; 14 | end 15 | 16 | return lalr; 17 | -------------------------------------------------------------------------------- /src/lalr/lalr.forge: -------------------------------------------------------------------------------- 1 | 2 | buildfile 'lalrc/lalrc.forge'; 3 | buildfile 'lalr_examples/lalr_examples.forge'; 4 | buildfile 'lalr_test/lalr_test.forge'; 5 | 6 | for _, cc in toolsets('^cc.*') do 7 | cc:all { 8 | cc:StaticLibrary '${lib}/lalr_${platform}_${architecture}' { 9 | cc:Cxx '${obj}/%1' { 10 | 'ErrorPolicy.cpp', 11 | 'ThreadPool.cpp'; 12 | }; 13 | 14 | cc:Cxx '${obj}/%1' { 15 | 'Grammar.cpp', 16 | 'GrammarAction.cpp', 17 | 'GrammarCompiler.cpp', 18 | 'GrammarGenerator.cpp', 19 | 'GrammarParser.cpp', 20 | 'GrammarState.cpp', 21 | 'GrammarSymbol.cpp', 22 | 'GrammarSymbolSet.cpp', 23 | 'GrammarTransition.cpp' 24 | }; 25 | 26 | cc:Cxx '${obj}/%1' { 27 | 'RegexAction.cpp', 28 | 'RegexCharacter.cpp', 29 | 'RegexCompiler.cpp', 30 | 'RegexGenerator.cpp', 31 | 'RegexItem.cpp', 32 | 'RegexNode.cpp', 33 | 'RegexNodeLess.cpp', 34 | 'RegexParser.cpp', 35 | 'RegexState.cpp', 36 | 'RegexStateLess.cpp', 37 | 'RegexSyntaxTree.cpp', 38 | 'RegexToken.cpp', 39 | 'RegexTransition.cpp' 40 | }; 41 | }; 42 | }; 43 | end 44 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/error_handling_calculator.g: -------------------------------------------------------------------------------- 1 | error_handling_calculator { 2 | %whitespace "[ \t\r\n]*"; 3 | %none error; 4 | %left '(' ')'; 5 | %left '+' '-'; 6 | %left '*' '/'; 7 | %none integer; 8 | stmts: stmts stmt | stmt | %precedence '('; 9 | stmt: 10 | expr ';' [result] | 11 | error ';' [unexpected_error] 12 | ; 13 | expr: 14 | expr '+' expr [add] | 15 | expr '-' expr [subtract] | 16 | expr '*' expr [multiply] | 17 | expr '/' expr [divide] | 18 | expr error expr [unknown_operator_error] | 19 | '(' expr ')' [compound] | 20 | integer [integer] 21 | ; 22 | integer: "[0-9]+"; 23 | } 24 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/json.g: -------------------------------------------------------------------------------- 1 | json { 2 | %whitespace "[ \t\r\n]*"; 3 | document: '{' attributes '}' [document] | ; 4 | attributes: attributes ',' attribute [add_to_object] | attribute [create_object] | ; 5 | attribute: name ':' value [attribute]; 6 | elements: elements ',' value [add_to_array] | value [create_array] | ; 7 | value: 8 | null [null] | 9 | boolean [value] | 10 | integer [value] | 11 | real [value] | 12 | string [value] | 13 | '{' attributes '}' [object] | 14 | '[' elements ']' [array] 15 | ; 16 | name: "[\"']:string:"; 17 | null: 'null'; 18 | boolean: "true|false"; 19 | integer: "(\+|\-)?[0-9]+"; 20 | real: "(\+|\-)?[0-9]+(\.[0-9]+)?((e|E)(\+|\-)?[0-9]+)?"; 21 | string: "[\"']:string:"; 22 | } 23 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_calculator_example.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | using namespace lalr; 10 | 11 | void lalr_calculator_example() 12 | { 13 | const char* calculator_grammar = 14 | "calculator { \n" 15 | " %left '+' '-'; \n" 16 | " %left '*' '/'; \n" 17 | " %none integer; \n" 18 | " %whitespace \"[ \\t\\r\\n]*\"; \n" 19 | " expr: \n" 20 | " expr '+' expr [add] | \n" 21 | " expr '-' expr [subtract] | \n" 22 | " expr '*' expr [multiply] | \n" 23 | " expr '/' expr [divide] | \n" 24 | " '(' expr ')' [compound] | \n" 25 | " integer [integer] \n" 26 | " ; \n" 27 | " integer: \"[0-9]+\"; \n" 28 | "} \n" 29 | ; 30 | 31 | GrammarCompiler compiler; 32 | compiler.compile( calculator_grammar, calculator_grammar + strlen(calculator_grammar) ); 33 | Parser parser( compiler.parser_state_machine() ); 34 | parser.parser_action_handlers() 35 | ( "add", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 36 | { 37 | return data[0] + data[2]; 38 | } 39 | ) 40 | ( "subtract", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 41 | { 42 | return data[0] - data[2]; 43 | } 44 | ) 45 | ( "multiply", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 46 | { 47 | return data[0] * data[2]; 48 | } 49 | ) 50 | ( "divide", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 51 | { 52 | return data[0] / data[2]; 53 | } 54 | ) 55 | ( "compound", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 56 | { 57 | return data[1]; 58 | } 59 | ) 60 | ( "integer", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 61 | { 62 | return ::atoi( nodes[0].lexeme().c_str() ); 63 | } 64 | ) 65 | ; 66 | 67 | const char* input = "1 + 2 * (3 + 4) + 5"; 68 | parser.parse( input, input + strlen(input) ); 69 | printf( "1 + 2 * (3 + 4) + 5 = %d\n", parser.user_data() ); 70 | LALR_ASSERT( parser.accepted() ); 71 | LALR_ASSERT( parser.full() ); 72 | LALR_ASSERT( parser.user_data() == 20 ); 73 | } 74 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_error_handling_calculator_example.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | using namespace lalr; 10 | 11 | void lalr_error_handling_calculator_example() 12 | { 13 | bool error = false; 14 | extern const lalr::ParserStateMachine* error_handling_calculator_parser_state_machine; 15 | Parser parser( error_handling_calculator_parser_state_machine ); 16 | parser.parser_action_handlers() 17 | ( "add", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 18 | { 19 | return data[0] + data[2]; 20 | } 21 | ) 22 | ( "subtract", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 23 | { 24 | return data[0] - data[2]; 25 | } 26 | ) 27 | ( "multiply", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 28 | { 29 | return data[0] * data[2]; 30 | } 31 | ) 32 | ( "divide", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 33 | { 34 | return data[0] / data[2]; 35 | } 36 | ) 37 | ( "compound", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 38 | { 39 | return data[1]; 40 | } 41 | ) 42 | ( "integer", [] ( const int* data, const ParserNode<>* nodes, size_t length ) 43 | { 44 | return ::atoi( nodes[0].lexeme().c_str() ); 45 | } 46 | ) 47 | ( "result", [&error] ( const int* data, const ParserNode<>* nodes, size_t length ) 48 | { 49 | if ( !error ) 50 | { 51 | printf( "%d\n", data[0] ); 52 | } 53 | error = false; 54 | return data[0]; 55 | } 56 | ) 57 | ( "unexpected_error", [&error] ( const int* data, const ParserNode<>* nodes, size_t length ) 58 | { 59 | error = true; 60 | printf( "Unexpected error!\n" ); 61 | return 0; 62 | } 63 | ) 64 | ( "unknown_operator_error", [&error] ( const int* data, const ParserNode<>* nodes, size_t length ) 65 | { 66 | error = true; 67 | printf( "Unknown operator error!\n" ); 68 | return 0; 69 | } 70 | ) 71 | ; 72 | 73 | const char* input = "1 + 2 * (3 + 4) + 5; 2 ^ 3; 3 * 4; foo!;"; 74 | parser.parse( input, input + strlen(input) ); 75 | LALR_ASSERT( parser.accepted() ); 76 | LALR_ASSERT( parser.full() ); 77 | } 78 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_examples.cpp: -------------------------------------------------------------------------------- 1 | 2 | int main() 3 | { 4 | extern void lalr_hello_world_example(); 5 | lalr_hello_world_example(); 6 | 7 | extern void lalr_calculator_example(); 8 | lalr_calculator_example(); 9 | 10 | extern void lalr_error_handling_calculator_example(); 11 | lalr_error_handling_calculator_example(); 12 | 13 | extern void lalr_xml_example(); 14 | lalr_xml_example(); 15 | 16 | extern void lalr_json_example(); 17 | lalr_json_example(); 18 | 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_examples.forge: -------------------------------------------------------------------------------- 1 | 2 | for _, cc in toolsets('^cc.*') do 3 | local libraries; 4 | if cc:platform_matches('linux') then 5 | libraries = { 6 | 'pthread'; 7 | }; 8 | end 9 | 10 | local cc = cc:inherit { 11 | warning_level = 0; 12 | }; 13 | 14 | local lalr_examples = cc:Executable '${bin}/lalr_examples' { 15 | libraries = libraries; 16 | '${lib}/lalr_${platform}_${architecture}'; 17 | cc:Cxx '${obj}/%1' { 18 | defines = { 19 | ([[LALR_EXAMPLES=\"%s/\"]]):format( pwd() ); 20 | }; 21 | "lalr_examples.cpp", 22 | "lalr_error_handling_calculator_example.cpp", 23 | "lalr_hello_world_example.cpp", 24 | "lalr_json_example.cpp", 25 | "lalr_calculator_example.cpp", 26 | "lalr_xml_example.cpp" 27 | }; 28 | }; 29 | 30 | -- If the Lalrc target prototype has been defined then use it to rebuild 31 | -- the example parse tables from grammars. Otherwise just use the source 32 | -- files directly. 33 | if cc.Lalrc then 34 | lalr_examples { 35 | cc:Cxx '${obj}/%1' { 36 | cc:Lalrc '${obj}/%1.cpp' { 37 | 'error_handling_calculator.g'; 38 | 'json.g'; 39 | 'xml.g'; 40 | }; 41 | }; 42 | }; 43 | else 44 | lalr_examples { 45 | cc:Cxx '${obj}/%1' { 46 | 'calculator.cpp', 47 | 'json.cpp', 48 | 'xml.cpp', 49 | }; 50 | }; 51 | end 52 | 53 | cc:all { 54 | lalr_examples; 55 | }; 56 | end 57 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_hello_world_example.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | using namespace lalr; 9 | 10 | void lalr_hello_world_example() 11 | { 12 | const char* hello_world_grammar = 13 | "hello_world {\n" 14 | " hello_world: 'Hello World!' [hello_world];\n" 15 | "}\n" 16 | ; 17 | 18 | GrammarCompiler compiler; 19 | compiler.compile( hello_world_grammar, hello_world_grammar + strlen(hello_world_grammar) ); 20 | Parser parser( compiler.parser_state_machine() ); 21 | parser.parser_action_handlers() 22 | ( "hello_world", [] (const shared_ptr>* data, const ParserNode<>* nodes, size_t length) 23 | { 24 | printf( "Hello World!\n" ); 25 | return shared_ptr>(); 26 | } 27 | ); 28 | ; 29 | 30 | const char* input = "Hello World!"; 31 | parser.parse( input, input + strlen(input) ); 32 | LALR_ASSERT( parser.accepted() ); 33 | LALR_ASSERT( parser.full() ); 34 | } 35 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_json_example.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #ifdef __APPLE__ 13 | #include 14 | #else 15 | #include 16 | #endif 17 | 18 | using namespace std; 19 | using namespace lalr; 20 | 21 | namespace 22 | { 23 | 24 | typedef std::basic_string String; 25 | 26 | struct Attribute; 27 | 28 | struct Value 29 | { 30 | String value_; 31 | std::vector> attributes_; 32 | std::vector> elements_; 33 | 34 | Value() 35 | : value_() 36 | , attributes_() 37 | , elements_() 38 | { 39 | } 40 | 41 | Value( const String& value ) 42 | : value_( value ) 43 | , attributes_() 44 | , elements_() 45 | { 46 | } 47 | }; 48 | 49 | struct Attribute 50 | { 51 | String name_; 52 | shared_ptr value_; 53 | 54 | Attribute() 55 | : name_() 56 | , value_() 57 | { 58 | } 59 | 60 | Attribute( const String& name, const shared_ptr& value ) 61 | : name_( name ) 62 | , value_( value ) 63 | { 64 | } 65 | }; 66 | 67 | struct JsonUserData 68 | { 69 | String name_; 70 | shared_ptr value_; 71 | 72 | JsonUserData() 73 | : name_() 74 | , value_() 75 | { 76 | } 77 | 78 | JsonUserData( shared_ptr value ) 79 | : name_() 80 | , value_( value ) 81 | { 82 | } 83 | 84 | JsonUserData( const String& name, shared_ptr value ) 85 | : name_( name ) 86 | , value_( value ) 87 | { 88 | } 89 | }; 90 | 91 | static JsonUserData document( const JsonUserData* start, const ParserNode* nodes, size_t length ) 92 | { 93 | return start[1]; 94 | } 95 | 96 | static JsonUserData attribute( const JsonUserData* start, const ParserNode* nodes, size_t length ) 97 | { 98 | const shared_ptr& attribute = start[2].value_; 99 | return JsonUserData( nodes[0].lexeme(), attribute ); 100 | } 101 | 102 | static JsonUserData null( const JsonUserData* start, const ParserNode* nodes, size_t length ) 103 | { 104 | shared_ptr null_value = make_shared(); 105 | return JsonUserData( null_value ); 106 | } 107 | 108 | static JsonUserData value( const JsonUserData* start, const ParserNode* nodes, size_t length ) 109 | { 110 | shared_ptr value = make_shared( nodes[0].lexeme() ); 111 | return JsonUserData( value ); 112 | } 113 | 114 | static JsonUserData object( const JsonUserData* start, const ParserNode* nodes, size_t length ) 115 | { 116 | return start[1]; 117 | } 118 | 119 | static JsonUserData add_to_object( const JsonUserData* start, const ParserNode* nodes, size_t length ) 120 | { 121 | const shared_ptr& object = start[0].value_; 122 | shared_ptr attribute = make_shared( start[2].name_, start[2].value_ ); 123 | object->attributes_.push_back( attribute ); 124 | return JsonUserData( object ); 125 | } 126 | 127 | static JsonUserData create_object( const JsonUserData* start, const ParserNode* nodes, size_t length ) 128 | { 129 | shared_ptr object = make_shared(); 130 | shared_ptr attribute = make_shared( start[0].name_, start[0].value_ ); 131 | object->attributes_.push_back( attribute ); 132 | return JsonUserData( object ); 133 | } 134 | 135 | static JsonUserData add_to_array( const JsonUserData* start, const ParserNode* nodes, size_t length ) 136 | { 137 | const shared_ptr& array = start[0].value_; 138 | const shared_ptr& element = start[2].value_; 139 | array->elements_.push_back( element ); 140 | return JsonUserData( array ); 141 | } 142 | 143 | static JsonUserData create_array( const JsonUserData* start, const ParserNode* nodes, size_t length ) 144 | { 145 | shared_ptr array = make_shared(); 146 | const shared_ptr& element = start[0].value_; 147 | array->elements_.push_back( element ); 148 | return JsonUserData( array ); 149 | } 150 | 151 | static void indent( int level ) 152 | { 153 | for ( int i = 0; i < level; ++i ) 154 | { 155 | printf( " " ); 156 | } 157 | } 158 | 159 | static void print( const Value& value, int level ) 160 | { 161 | std::setlocale( LC_ALL, "en_US.UTF-8" ); 162 | std::wstring_convert, char32_t> utf8; 163 | 164 | for ( const shared_ptr& attribute : value.attributes_ ) 165 | { 166 | LALR_ASSERT( attribute ); 167 | const String& name = attribute->name_; 168 | const Value& value = *attribute->value_; 169 | if ( value.attributes_.empty() && value.elements_.empty() ) 170 | { 171 | indent( level + 1 ); 172 | printf( "%s='%s'\n", 173 | utf8.to_bytes(attribute->name_).c_str(), 174 | utf8.to_bytes(attribute->value_->value_).c_str() 175 | ); 176 | } 177 | else 178 | { 179 | indent( level + 1 ); 180 | printf( "%s:\n", 181 | utf8.to_bytes(attribute->name_).c_str() 182 | ); 183 | print( value, level + 1 ); 184 | } 185 | } 186 | 187 | int index = 0; 188 | for ( const shared_ptr& element : value.elements_ ) 189 | { 190 | LALR_ASSERT( element ); 191 | const Value& value = *element; 192 | if ( value.attributes_.empty() && value.elements_.empty() ) 193 | { 194 | indent( level + 1 ); 195 | printf( "%d: '%s'\n", 196 | index, 197 | utf8.to_bytes(value.value_).c_str() 198 | ); 199 | } 200 | else 201 | { 202 | indent( level + 1 ); 203 | printf( "%d:\n", index ); 204 | print( value, level + 1 ); 205 | } 206 | ++index; 207 | } 208 | } 209 | 210 | } 211 | 212 | void lalr_json_example() 213 | { 214 | extern const lalr::ParserStateMachine* json_parser_state_machine; 215 | Parser, JsonUserData> parser( json_parser_state_machine ); 216 | parser.set_lexer_action_handler( "string", &string_literal> ); 217 | parser.parser_action_handlers() 218 | ( "document", &document ) 219 | ( "add_to_object", &add_to_object ) 220 | ( "create_object", &create_object ) 221 | ( "attribute", &attribute ) 222 | ( "add_to_array", &add_to_array ) 223 | ( "create_array", &create_array ) 224 | ( "null", &null ) 225 | ( "value", &value ) 226 | ( "object", &object ) 227 | ( "array", &object ) 228 | ; 229 | 230 | using std::locale; 231 | using std::codecvt; 232 | using std::basic_ifstream; 233 | using std::istreambuf_iterator; 234 | std::basic_ifstream file( LALR_EXAMPLES "lalr_json_example.json", std::ios_base::binary ); 235 | file.imbue( locale(file.getloc(), new codecvt) ); 236 | istreambuf_iterator input( file ); 237 | istreambuf_iterator input_end; 238 | 239 | parser.parse( input, input_end ); 240 | LALR_ASSERT( parser.accepted() ); 241 | LALR_ASSERT( parser.full() ); 242 | print( *parser.user_data().value_, 0 ); 243 | } 244 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_json_example.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": { 3 | "format": "Model", 4 | "version": 1, 5 | "address": "0017FAB0", 6 | "items": { 7 | "name": "Albert" 8 | }, 9 | "more_items": ["one", 2, 3, "to prove that it's really UTF-8... 😁 😄!"] 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/lalr_xml_example.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | using namespace lalr; 11 | 12 | namespace 13 | { 14 | 15 | typedef std::basic_string String; 16 | 17 | struct Attribute 18 | { 19 | String name_; 20 | String value_; 21 | 22 | Attribute( const String& name, const String& value ) 23 | : name_( name ) 24 | , value_( value ) 25 | { 26 | } 27 | }; 28 | 29 | struct Element 30 | { 31 | String name_; 32 | std::list > attributes_; 33 | std::list > elements_; 34 | 35 | Element() 36 | : name_(), 37 | attributes_(), 38 | elements_() 39 | { 40 | } 41 | }; 42 | 43 | struct XmlUserData 44 | { 45 | shared_ptr attribute_; 46 | shared_ptr element_; 47 | 48 | XmlUserData() 49 | : attribute_(), 50 | element_() 51 | { 52 | } 53 | 54 | XmlUserData( shared_ptr attribute ) 55 | : attribute_( attribute ), 56 | element_() 57 | { 58 | } 59 | 60 | XmlUserData( shared_ptr element ) 61 | : attribute_(), 62 | element_( element ) 63 | { 64 | } 65 | }; 66 | 67 | static XmlUserData document( const XmlUserData* start, const ParserNode* nodes, size_t length ) 68 | { 69 | const XmlUserData* end = start + length; 70 | while ( start != end && !start[0].element_ ) 71 | { 72 | ++start; 73 | } 74 | return start != end ? start[0] : XmlUserData(); 75 | } 76 | 77 | static XmlUserData add_element( const XmlUserData* start, const ParserNode* nodes, size_t length ) 78 | { 79 | shared_ptr element = start[0].element_; 80 | element->elements_.push_back( start[1].element_ ); 81 | return XmlUserData( element ); 82 | } 83 | 84 | static XmlUserData create_element( const XmlUserData* start, const ParserNode* nodes, size_t length ) 85 | { 86 | shared_ptr element( new Element() ); 87 | element->elements_.push_back( start[0].element_ ); 88 | return XmlUserData( element ); 89 | } 90 | 91 | static XmlUserData short_element( const XmlUserData* start, const ParserNode* nodes, size_t length ) 92 | { 93 | shared_ptr element = start[2].element_; 94 | element->name_ = nodes[1].lexeme(); 95 | return XmlUserData( element ); 96 | } 97 | 98 | static XmlUserData long_element( const XmlUserData* start, const ParserNode* nodes, size_t length ) 99 | { 100 | shared_ptr element = start[2].element_; 101 | if ( !element ) 102 | { 103 | element.reset( new Element() ); 104 | } 105 | 106 | element->name_ = nodes[1].lexeme(); 107 | if ( start[4].element_ ) 108 | { 109 | swap( element->elements_, start[4].element_->elements_ ); 110 | } 111 | return XmlUserData( element ); 112 | } 113 | 114 | static XmlUserData add_attribute( const XmlUserData* start, const ParserNode* nodes, size_t length ) 115 | { 116 | LALR_ASSERT( start[0].element_ ); 117 | shared_ptr element = start[0].element_; 118 | LALR_ASSERT( start[1].attribute_ ); 119 | element->attributes_.push_back( start[1].attribute_ ); 120 | return XmlUserData( element ); 121 | } 122 | 123 | static XmlUserData create_attribute( const XmlUserData* start, const ParserNode* nodes, size_t length ) 124 | { 125 | LALR_ASSERT( start[0].attribute_ ); 126 | shared_ptr element( new Element() ); 127 | element->attributes_.push_back( start[0].attribute_ ); 128 | return XmlUserData( element ); 129 | } 130 | 131 | static XmlUserData attribute( const XmlUserData* start, const ParserNode* nodes, size_t length ) 132 | { 133 | shared_ptr attribute( new Attribute(nodes[0].lexeme(), nodes[2].lexeme()) ); 134 | return XmlUserData( attribute ); 135 | } 136 | 137 | static void indent( int level ) 138 | { 139 | for ( int i = 0; i < level; ++i ) 140 | { 141 | printf( " " ); 142 | } 143 | } 144 | 145 | static void print( const Element* element, int level ) 146 | { 147 | LALR_ASSERT( element ); 148 | indent( level ); 149 | printf( "%s\n", element->name_.c_str() ); 150 | 151 | for ( list >::const_iterator i = element->attributes_.begin(); i != element->attributes_.end(); ++i ) 152 | { 153 | const Attribute* attribute = i->get(); 154 | LALR_ASSERT( attribute ); 155 | indent( level + 1 ); 156 | printf( "%s='%s'\n", attribute->name_.c_str(), attribute->value_.c_str() ); 157 | } 158 | 159 | for ( list >::const_iterator i = element->elements_.begin(); i != element->elements_.end(); ++i ) 160 | { 161 | const Element* element = i->get(); 162 | LALR_ASSERT( element ); 163 | print( element, level + 1 ); 164 | } 165 | } 166 | 167 | } 168 | 169 | void lalr_xml_example() 170 | { 171 | extern const lalr::ParserStateMachine* xml_parser_state_machine; 172 | Parser parser( xml_parser_state_machine ); 173 | parser.lexer_action_handlers() 174 | ( "string", &string_literal ) 175 | ; 176 | parser.parser_action_handlers() 177 | ( "document", &document ) 178 | ( "add_element", &add_element ) 179 | ( "create_element", &create_element ) 180 | ( "short_element", &short_element ) 181 | ( "long_element", &long_element ) 182 | ( "add_attribute", &add_attribute ) 183 | ( "create_attribute", &create_attribute ) 184 | ( "attribute", &attribute ) 185 | ; 186 | 187 | const char* input = 188 | "\n" 189 | " \n" 190 | " \n" 191 | " \n" 192 | " \n" 193 | " " 194 | ; 195 | 196 | parser.parse( (const uint8_t*) input, (const uint8_t*) input + strlen(input) ); 197 | LALR_ASSERT( parser.accepted() ); 198 | LALR_ASSERT( parser.full() ); 199 | print( parser.user_data().element_.get(), 0 ); 200 | } 201 | -------------------------------------------------------------------------------- /src/lalr/lalr_examples/xml.g: -------------------------------------------------------------------------------- 1 | xml { 2 | %whitespace "[ \t\r\n]*"; 3 | %left '<' '>'; 4 | %left name; 5 | document: prolog element [document]; 6 | prolog: "<\?xml" attributes "\?>" | ; 7 | elements: elements element [add_element] | element [create_element] | %precedence '<'; 8 | element: '<' name attributes '/>' [short_element] | '<' name attributes '>' elements '' [long_element]; 9 | attributes: attributes attribute [add_attribute] | attribute [create_attribute] | %precedence name; 10 | attribute: name '=' value [attribute]; 11 | name: "[A-Za-z_:][A-Za-z0-9_:\.-]*"; 12 | value: "[\"']:string:"; 13 | } 14 | -------------------------------------------------------------------------------- /src/lalr/lalr_test/TestPositionIterator.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // TestRegularExpressions.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using std::string; 13 | using namespace lalr; 14 | 15 | SUITE( PositionIterator ) 16 | { 17 | TEST( LineFeeds ) 18 | { 19 | const char* text = "1\n2\n3\n4\n5"; 20 | PositionIterator i( text, text + strlen(text) ); 21 | 22 | CHECK( !i.ended() ); 23 | CHECK_EQUAL( '1', *i ); 24 | CHECK_EQUAL( 1, i.line() ); 25 | ++i; 26 | 27 | CHECK( !i.ended() ); 28 | CHECK_EQUAL( '\n', *i ); 29 | CHECK_EQUAL( 1, i.line() ); 30 | ++i; 31 | 32 | CHECK( !i.ended() ); 33 | CHECK_EQUAL( '2', *i ); 34 | CHECK_EQUAL( 2, i.line() ); 35 | ++i; 36 | 37 | CHECK( !i.ended() ); 38 | CHECK_EQUAL( '\n', *i ); 39 | CHECK_EQUAL( 2, i.line() ); 40 | ++i; 41 | 42 | CHECK( !i.ended() ); 43 | CHECK_EQUAL( '3', *i ); 44 | CHECK_EQUAL( 3, i.line() ); 45 | ++i; 46 | 47 | CHECK( !i.ended() ); 48 | CHECK_EQUAL( '\n', *i ); 49 | CHECK_EQUAL( 3, i.line() ); 50 | ++i; 51 | 52 | CHECK( !i.ended() ); 53 | CHECK_EQUAL( '4', *i ); 54 | CHECK_EQUAL( 4, i.line() ); 55 | ++i; 56 | 57 | CHECK( !i.ended() ); 58 | CHECK_EQUAL( '\n', *i ); 59 | CHECK_EQUAL( 4, i.line() ); 60 | ++i; 61 | 62 | CHECK( !i.ended() ); 63 | CHECK_EQUAL( '5', *i ); 64 | CHECK_EQUAL( 5, i.line() ); 65 | 66 | ++i; 67 | CHECK( i.ended() ); 68 | } 69 | 70 | TEST( CarriageReturns ) 71 | { 72 | const char* text = "1\r2\r3\r4\r5"; 73 | PositionIterator i( text, text + strlen(text) ); 74 | 75 | CHECK( !i.ended() ); 76 | CHECK_EQUAL( '1', *i ); 77 | CHECK_EQUAL( 1, i.line() ); 78 | ++i; 79 | 80 | CHECK( !i.ended() ); 81 | CHECK_EQUAL( '\r', *i ); 82 | CHECK_EQUAL( 1, i.line() ); 83 | ++i; 84 | 85 | CHECK( !i.ended() ); 86 | CHECK_EQUAL( '2', *i ); 87 | CHECK_EQUAL( 2, i.line() ); 88 | ++i; 89 | 90 | CHECK( !i.ended() ); 91 | CHECK_EQUAL( '\r', *i ); 92 | CHECK_EQUAL( 2, i.line() ); 93 | ++i; 94 | 95 | CHECK( !i.ended() ); 96 | CHECK_EQUAL( '3', *i ); 97 | CHECK_EQUAL( 3, i.line() ); 98 | ++i; 99 | 100 | CHECK( !i.ended() ); 101 | CHECK_EQUAL( '\r', *i ); 102 | CHECK_EQUAL( 3, i.line() ); 103 | ++i; 104 | 105 | CHECK( !i.ended() ); 106 | CHECK_EQUAL( '4', *i ); 107 | CHECK_EQUAL( 4, i.line() ); 108 | ++i; 109 | 110 | CHECK( !i.ended() ); 111 | CHECK_EQUAL( '\r', *i ); 112 | CHECK_EQUAL( 4, i.line() ); 113 | ++i; 114 | 115 | CHECK( !i.ended() ); 116 | CHECK_EQUAL( '5', *i ); 117 | CHECK_EQUAL( 5, i.line() ); 118 | 119 | ++i; 120 | CHECK( i.ended() ); 121 | } 122 | 123 | TEST( NewLineCarriageReturns ) 124 | { 125 | const char* text = "1\n2\r3\n4\r5"; 126 | PositionIterator i( text, text + strlen(text) ); 127 | 128 | CHECK( !i.ended() ); 129 | CHECK_EQUAL( '1', *i ); 130 | CHECK_EQUAL( 1, i.line() ); 131 | ++i; 132 | 133 | CHECK( !i.ended() ); 134 | CHECK_EQUAL( '\n', *i ); 135 | CHECK_EQUAL( 1, i.line() ); 136 | ++i; 137 | 138 | CHECK( !i.ended() ); 139 | CHECK_EQUAL( '2', *i ); 140 | CHECK_EQUAL( 2, i.line() ); 141 | ++i; 142 | 143 | CHECK( !i.ended() ); 144 | CHECK_EQUAL( '\r', *i ); 145 | CHECK_EQUAL( 2, i.line() ); 146 | ++i; 147 | 148 | CHECK( !i.ended() ); 149 | CHECK_EQUAL( '3', *i ); 150 | CHECK_EQUAL( 3, i.line() ); 151 | ++i; 152 | 153 | CHECK( !i.ended() ); 154 | CHECK_EQUAL( '\n', *i ); 155 | CHECK_EQUAL( 3, i.line() ); 156 | ++i; 157 | 158 | CHECK( !i.ended() ); 159 | CHECK_EQUAL( '4', *i ); 160 | CHECK_EQUAL( 4, i.line() ); 161 | ++i; 162 | 163 | CHECK( !i.ended() ); 164 | CHECK_EQUAL( '\r', *i ); 165 | CHECK_EQUAL( 4, i.line() ); 166 | ++i; 167 | 168 | CHECK( !i.ended() ); 169 | ++i; 170 | CHECK( i.ended() ); 171 | } 172 | 173 | TEST( CarriageReturnLineFeeds ) 174 | { 175 | const char* text = "1\r\n2\r\n3\r\n4\r\n5"; 176 | PositionIterator i( text, text + strlen(text) ); 177 | 178 | CHECK( !i.ended() ); 179 | CHECK_EQUAL( '1', *i ); 180 | CHECK_EQUAL( 1, i.line() ); 181 | ++i; 182 | ++i; 183 | 184 | CHECK( !i.ended() ); 185 | CHECK_EQUAL( '2', *i ); 186 | CHECK_EQUAL( 2, i.line() ); 187 | ++i; 188 | ++i; 189 | 190 | CHECK( !i.ended() ); 191 | CHECK_EQUAL( '3', *i ); 192 | CHECK_EQUAL( 3, i.line() ); 193 | ++i; 194 | ++i; 195 | 196 | CHECK( !i.ended() ); 197 | CHECK_EQUAL( '4', *i ); 198 | CHECK_EQUAL( 4, i.line() ); 199 | ++i; 200 | ++i; 201 | 202 | CHECK( !i.ended() ); 203 | ++i; 204 | CHECK( i.ended() ); 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/lalr/lalr_test/TestPrecedenceDirectives.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // TestPrecedenceDirectives.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace lalr; 17 | 18 | SUITE( PrecedenceDirectives ) 19 | { 20 | struct EventSink : public ErrorPolicy 21 | { 22 | std::vector expected_errors_; 23 | int errors_; 24 | 25 | EventSink( int expected_error ) 26 | : expected_errors_() 27 | , errors_( 0 ) 28 | { 29 | expected_errors_.push_back( expected_error ); 30 | } 31 | 32 | EventSink( std::initializer_list expected_errors ) 33 | : expected_errors_() 34 | , errors_( 0 ) 35 | { 36 | expected_errors_.insert( expected_errors_.end(), expected_errors.begin(), expected_errors.end() ); 37 | } 38 | 39 | void lalr_error( int /*line*/, int /*column*/, int error, const char* /*format*/, va_list /*args*/ ) 40 | { 41 | CHECK( errors_ < int(expected_errors_.size()) ); 42 | if ( errors_ < int(expected_errors_.size()) ) 43 | { 44 | CHECK_EQUAL( expected_errors_[errors_], error ); 45 | } 46 | ++errors_; 47 | } 48 | }; 49 | 50 | TEST( ExpressionsThatRequireShiftReduceConflictResolution ) 51 | { 52 | const char* precedence_grammar = 53 | "precedence_grammar { \n" 54 | " %whitespace \"[ \\t\\r\\n]*\"; \n" 55 | " %left '+' '-'; \n" 56 | " %left '*' '/'; \n" 57 | " %none integer; \n" 58 | " unit: expr; \n" 59 | " expr: \n" 60 | " expr '+' expr | \n" 61 | " expr '-' expr | \n" 62 | " expr '*' expr | \n" 63 | " expr '/' expr | \n" 64 | " integer \n" 65 | " ; \n" 66 | " integer: \"[0-9]+\"; \n" 67 | "} \n" 68 | ; 69 | 70 | EventSink event_sink( PARSER_ERROR_PARSE_TABLE_CONFLICT ); 71 | GrammarCompiler compiler; 72 | compiler.compile( precedence_grammar, precedence_grammar + strlen(precedence_grammar), &event_sink ); 73 | CHECK( event_sink.errors_ == 0 ); 74 | } 75 | 76 | TEST( MissingGrammarHeader ) 77 | { 78 | const char* grammar = 79 | " %left 'int' float' 'void'; \n" 80 | "} \n" 81 | ; 82 | EventSink event_sink( LALR_ERROR_SYNTAX ); 83 | GrammarCompiler compiler; 84 | compiler.compile( grammar, grammar + strlen(grammar), &event_sink ); 85 | } 86 | 87 | TEST( MissingGrammarFooter ) 88 | { 89 | const char* grammar = 90 | "missing_footer { \n" 91 | " %left 'int' 'float' 'void'; \n" 92 | ; 93 | EventSink event_sink( LALR_ERROR_SYNTAX ); 94 | GrammarCompiler compiler; 95 | compiler.compile( grammar, grammar + strlen(grammar), &event_sink ); 96 | } 97 | 98 | TEST( UnterminatedDirectiveLiterals ) 99 | { 100 | const char* grammar = 101 | "unterminated_directive_literals { \n" 102 | "%left 'int' float' 'void'; \n" 103 | "%left 'return' 'break' 'continue' 'if' 'while' 'for' identifier '{'; \n" 104 | "} \n" 105 | ; 106 | EventSink event_sink{ LALR_ERROR_UNTERMINATED_LITERAL, LALR_ERROR_SYNTAX, LALR_ERROR_SYNTAX }; 107 | GrammarCompiler compiler; 108 | int errors = compiler.compile( grammar, grammar + strlen(grammar), &event_sink ); 109 | CHECK( errors > 0 ); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/lalr/lalr_test/lalr_test.forge: -------------------------------------------------------------------------------- 1 | 2 | for _, cc in toolsets('^cc.*') do 3 | local libraries; 4 | if cc:platform_matches('linux') then 5 | libraries = { 6 | 'pthread'; 7 | }; 8 | end 9 | 10 | cc:all { 11 | cc:Executable '${bin}/lalr_test' { 12 | libraries = libraries; 13 | '${lib}/lalr_${platform}_${architecture}'; 14 | '${lib}/UnitTest++_${platform}_${architecture}'; 15 | cc:Cxx '${obj}/%1' { 16 | 'main.cpp', 17 | 'TestParsers.cpp', 18 | 'TestPositionIterator.cpp', 19 | 'TestPrecedenceDirectives.cpp', 20 | 'TestRegularExpressions.cpp' 21 | }; 22 | }; 23 | }; 24 | end 25 | -------------------------------------------------------------------------------- /src/lalr/lalr_test/main.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // main.cpp 3 | // Copyright (c) Charles Baker. All rights reserved. 4 | // 5 | 6 | #include 7 | #include 8 | 9 | int main( int /*argc*/, char** /*argv*/ ) 10 | { 11 | return UnitTest::RunAllTests(); 12 | } 13 | -------------------------------------------------------------------------------- /src/lalr/lalrc/dot.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace lalr; 10 | 11 | extern void write( const char* format, ... ); 12 | 13 | namespace lalr 14 | { 15 | 16 | void write_graphviz_dot( const ParserStateMachine* state_machine ) 17 | { 18 | LALR_ASSERT( state_machine ); 19 | 20 | write( "digraph %s {\n", state_machine->identifier ); 21 | write( " rankdir = LR;\n" ); 22 | write( " size = \"8, 5\";\n" ); 23 | write( " bgcolor = transparent;\n" ); 24 | write( "\n" ); 25 | write( " node [shape = circle];\n" ); 26 | 27 | const ParserState* states = state_machine->states; 28 | const ParserState* states_end = states + state_machine->states_size; 29 | for ( const ParserState* state = states; state != states_end; ++state ) 30 | { 31 | write( " node [label =\"%s\"] i%d;\n", 32 | state->label, 33 | state->index 34 | ); 35 | } 36 | write( "\n" ); 37 | 38 | for ( const ParserState* state = states; state != states_end; ++state ) 39 | { 40 | const ParserTransition* transitions = state->transitions; 41 | const ParserTransition* transitions_end = transitions + state->length; 42 | for ( const ParserTransition* transition = transitions; transition != transitions_end; ++transition ) 43 | { 44 | if ( transition->state ) 45 | { 46 | write( " i%d -> i%d [label =\"%s\"];\n", 47 | state->index, 48 | transition->state->index, 49 | transition->symbol->label 50 | ); 51 | } 52 | } 53 | } 54 | 55 | write( "}\n" ); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/lalr/lalrc/dot.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace lalr 4 | { 5 | 6 | class ParserStateMachine; 7 | class LexerStateMachine; 8 | 9 | void write_graphviz_dot( const ParserStateMachine* state_machine ); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/lalr/lalrc/lalrc.forge: -------------------------------------------------------------------------------- 1 | 2 | for _, cc in toolsets('^cc.*') do 3 | local libraries; 4 | if cc:platform_matches('linux') then 5 | libraries = { 6 | 'pthread'; 7 | }; 8 | end 9 | 10 | cc:all { 11 | cc:Executable '${bin}/lalrc' { 12 | libraries = libraries; 13 | '${lib}/lalr_${platform}_${architecture}'; 14 | cc:Cxx '${obj}/%1' { 15 | 'dot.cpp'; 16 | 'lalrc.cpp'; 17 | }; 18 | }; 19 | }; 20 | end 21 | -------------------------------------------------------------------------------- /src/lalr/line_comment.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace lalr 8 | { 9 | 10 | template ::value_type, class Traits = typename std::char_traits, class Allocator = typename std::allocator> 11 | PositionIterator line_comment( const PositionIterator& begin, const PositionIterator& end, std::basic_string* lexeme, const void** /*symbol*/ ) 12 | { 13 | LALR_ASSERT( lexeme ); 14 | lexeme->clear(); 15 | 16 | PositionIterator i = begin; 17 | while ( i != end && *i != '\n' && *i != '\r' ) 18 | { 19 | ++i; 20 | } 21 | return i; 22 | } 23 | 24 | } -------------------------------------------------------------------------------- /src/lalr/string_literal.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace lalr 8 | { 9 | 10 | template ::value_type, class Traits = typename std::char_traits, class Allocator = typename std::allocator> 11 | PositionIterator string_literal( const PositionIterator& begin, const PositionIterator& end, std::basic_string* lexeme, const void** /*symbol*/ ) 12 | { 13 | LALR_ASSERT( lexeme ); 14 | LALR_ASSERT( lexeme->length() == 1 ); 15 | 16 | int terminator = lexeme->at( 0 ); 17 | LALR_ASSERT( terminator == '\'' || terminator == '"' ); 18 | lexeme->clear(); 19 | 20 | PositionIterator i = begin; 21 | while ( *i != terminator && i != end ) 22 | { 23 | if ( *i != '\\' ) 24 | { 25 | *lexeme += *i; 26 | ++i; 27 | } 28 | else 29 | { 30 | ++i; 31 | if ( i != end ) 32 | { 33 | int character = *i; 34 | switch ( character ) 35 | { 36 | case 'a': 37 | character = '\a'; 38 | break; 39 | 40 | case 'b': 41 | character = '\b'; 42 | break; 43 | 44 | case 'f': 45 | character = '\f'; 46 | break; 47 | 48 | case 'n': 49 | character = '\n'; 50 | break; 51 | 52 | case 'r': 53 | character = '\r'; 54 | break; 55 | 56 | case 't': 57 | character = '\t'; 58 | break; 59 | 60 | case 'v': 61 | character = '\v'; 62 | break; 63 | 64 | default: 65 | break; 66 | } 67 | *lexeme += character; 68 | ++i; 69 | } 70 | } 71 | } 72 | 73 | if ( i != end ) 74 | { 75 | ++i; 76 | } 77 | 78 | return i; 79 | } 80 | 81 | } 82 | --------------------------------------------------------------------------------