├── .dockerignore ├── examples ├── demo.flow ├── flowlexer.cpp ├── cxx.klex ├── wordcount.cpp ├── flow.klax ├── flow.klex └── mathexpr.cpp ├── test ├── good.klex ├── overshadowed.klex └── multiple_conditions.klex ├── .gitignore ├── .gitmodules ├── .github └── FUNDING.yml ├── klex.pc.cmake ├── src └── klex │ ├── sysconfig.h.cmake │ ├── klex_test.cpp │ ├── regular │ ├── State_test.cpp │ ├── MultiDFA.h │ ├── DotVisitor.h │ ├── State.cpp │ ├── MultiDFA.cpp │ ├── DFABuilder_test.cpp │ ├── State.h │ ├── TransitionMap-inl.h │ ├── Alphabet.cpp │ ├── Alphabet.h │ ├── NFABuilder.h │ ├── DotWriter_test.cpp │ ├── DFAMinimizer.h │ ├── TransitionMap.h │ ├── DFABuilder.h │ ├── RegExpr.h │ ├── DotWriter.h │ ├── LexerDef.h │ ├── NFA_test.cpp │ ├── Symbols_test.cpp │ ├── RegExprParser.h │ ├── Compiler.h │ ├── DotWriter.cpp │ ├── NFABuilder.cpp │ ├── Rule.h │ ├── Symbols.cpp │ ├── DFA.h │ ├── RegExpr.cpp │ ├── RuleParser.h │ ├── DFA.cpp │ └── Symbols.h │ ├── cfg │ ├── Grammar-inl.h │ ├── GrammarValidator.h │ ├── GrammarValidator.cpp │ ├── GrammarParser.h │ ├── LeftRecursion.h │ ├── ll │ │ ├── SyntaxTable_test.cpp │ │ ├── README.md │ │ ├── SyntaxTable.h │ │ ├── Analyzer.h │ │ └── Analyzer_test.cpp │ ├── GrammarLexer_test.cpp │ ├── GrammarLexer.h │ ├── LeftRecursion_test.cpp │ ├── GrammarLexer.cpp │ ├── LeftRecursion.cpp │ └── GrammarParser_test.cpp │ ├── util │ ├── overloaded.h │ ├── IntVector.h │ ├── literals.h │ ├── UnboxedRange.h │ ├── iterator.h │ ├── AnsiColor.h │ ├── iterator-detail.h │ ├── iterator_test.cpp │ └── Flags.h │ ├── SourceLocation.cpp │ ├── SourceLocation.h │ ├── CharStream.h │ └── Report.cpp ├── .editorconfig ├── cmake ├── ClangTidy.cmake ├── EnableCcache.cmake └── mklex.cmake ├── .travis.yml ├── klex.vim ├── TODO.md ├── appveyor.yml ├── klax.vim ├── Dockerfile ├── autogen.sh ├── .circleci └── config.yml ├── klex2flex.sh ├── .clang-format ├── .clang-tidy ├── cmdlineTests.sh └── README.md /.dockerignore: -------------------------------------------------------------------------------- 1 | /build 2 | -------------------------------------------------------------------------------- /examples/demo.flow: -------------------------------------------------------------------------------- 1 | handler main { 2 | echo "hello"; 3 | } 4 | -------------------------------------------------------------------------------- /test/good.klex: -------------------------------------------------------------------------------- 1 | # vim:syntax=klex 2 | 3 | A ::= a 4 | B ::= b 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /.vscode 3 | /.vs 4 | /.cache 5 | compile_commands.json 6 | -------------------------------------------------------------------------------- /test/overshadowed.klex: -------------------------------------------------------------------------------- 1 | # vim:syntax=klex 2 | 3 | Ident ::= [a-z]+ 4 | If ::= aa 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "fmtlib"] 2 | path = 3rdparty/fmt 3 | url = https://github.com/fmtlib/fmt.git 4 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [christianparpart] 4 | custom: ['https://paypal.me/ChristianParpart'] 5 | -------------------------------------------------------------------------------- /test/multiple_conditions.klex: -------------------------------------------------------------------------------- 1 | # vim:syntax=klex 2 | 3 | Main ::= main 4 | Cond ::= cond 5 | <*>Spacing(ignore) ::= [\t\n\s]+ 6 | -------------------------------------------------------------------------------- /klex.pc.cmake: -------------------------------------------------------------------------------- 1 | # klex library 2 | Name: klex 3 | Description: klex compiler frontend library 4 | Version: @klex_VERSION@ 5 | # Requires: 6 | # Conflicts: 7 | Libs: -L@CMAKE_INSTALL_PREFIX@/lib -lklex @LDFLAGS@ 8 | Cflags: -I@CMAKE_INSTALL_PREFIX@/include @CXXFLAGS@ 9 | -------------------------------------------------------------------------------- /src/klex/sysconfig.h.cmake: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | insert_final_newline = true 7 | end_of_line = lf 8 | charset = utf-8 9 | trim_trailing_whitespace = true 10 | 11 | [*.md] 12 | indent_style = space 13 | indent_size = 2 14 | 15 | [*.xml] 16 | indent_style = space 17 | indent_size = 2 18 | 19 | [*.yml] 20 | indent_style = space 21 | indent_size = 4 22 | 23 | [.github/**/*.yml] 24 | indent_style = space 25 | indent_size = 2 26 | -------------------------------------------------------------------------------- /cmake/ClangTidy.cmake: -------------------------------------------------------------------------------- 1 | 2 | option(ENABLE_TIDY "Enable clang-tidy [default: OFF]" OFF) 3 | if(ENABLE_TIDY) 4 | find_program(CLANG_TIDY_EXE 5 | NAMES clang-tidy-8 clang-tidy-7 clang-tidy-6.0 clang-tidy 6 | DOC "Path to clang-tidy executable") 7 | if(NOT CLANG_TIDY_EXE) 8 | message(STATUS "clang-tidy not found.") 9 | else() 10 | message(STATUS "clang-tidy found: ${CLANG_TIDY_EXE}") 11 | set(DO_CLANG_TIDY "${CLANG_TIDY_EXE}") 12 | endif() 13 | endif() 14 | -------------------------------------------------------------------------------- /src/klex/klex_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2009-2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | int main(int argc, const char* argv[]) 11 | { 12 | return klex::util::testing::main(argc, argv); 13 | } 14 | -------------------------------------------------------------------------------- /cmake/EnableCcache.cmake: -------------------------------------------------------------------------------- 1 | # Setup ccache. 2 | # 3 | # The ccache is auto-enabled if the tool is found. 4 | # To disable set -DCCACHE=OFF option. 5 | if(NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER) 6 | find_program(CCACHE ccache DOC "ccache tool path; set to OFF to disable") 7 | if(CCACHE) 8 | set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE}) 9 | if(COMMAND cotire) 10 | # Change ccache config to meet cotire requirements. 11 | set(ENV{CCACHE_SLOPPINESS} pch_defines,time_macros) 12 | endif() 13 | message(STATUS "[ccache] Enabled: ${CCACHE}") 14 | endif() 15 | endif() 16 | -------------------------------------------------------------------------------- /src/klex/regular/State_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | TEST(regular_State, to_string) 14 | { 15 | klex::regular::StateIdVec v { 1, 2, 3 }; 16 | EXPECT_EQ("{n1, n2, n3}", fmt::format("{}", v)); 17 | } 18 | -------------------------------------------------------------------------------- /src/klex/cfg/Grammar-inl.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | namespace klex::cfg { 11 | 12 | inline bool _Symbols::empty() const noexcept 13 | { 14 | return begin() == end(); 15 | } 16 | 17 | inline size_t _Symbols::size() const noexcept 18 | { 19 | return std::distance(begin(), end()); 20 | } 21 | 22 | } // namespace klex::cfg 23 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarValidator.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace klex::cfg { 13 | 14 | struct Grammar; 15 | 16 | class GrammarValidator { 17 | public: 18 | GrammarValidator(Report* _report) : report_{_report} {} 19 | 20 | void validate(const Grammar& G); 21 | 22 | private: 23 | Report* report_; 24 | }; 25 | 26 | } // namespace klex::cfg 27 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | dist: trusty 3 | 4 | compiler: 5 | - gcc 6 | 7 | before_install: 8 | - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test 9 | - sudo apt-get update -qq 10 | 11 | install: 12 | - sudo apt-get install -qqy g++-7 13 | - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 90 14 | - sudo apt install -qqy python-pip 15 | - sudo pip install codecov 16 | 17 | before_script: 18 | - git submodule update --init --recursive 19 | - mkdir build 20 | - cd build 21 | - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DKLEX_COVERAGE=ON .. 22 | 23 | script: 24 | - cmake --build . 25 | - ./klex_test -v 26 | - ../cmdlineTests.sh 27 | 28 | after_success: 29 | - codecov --flags all --gcov-glob '*/src/klex/util/*' 30 | -------------------------------------------------------------------------------- /src/klex/util/overloaded.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | // This is a nice helper for conviniently using std::visit() with an arbitrary list of lambdas as 11 | // overload for pattern matching the variant's input type 12 | 13 | template 14 | struct overloaded : Ts... 15 | { 16 | using Ts::operator()...; 17 | }; 18 | 19 | template 20 | overloaded(Ts...) -> overloaded; 21 | 22 | -------------------------------------------------------------------------------- /src/klex/SourceLocation.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | #include 11 | 12 | using namespace std; 13 | 14 | namespace klex 15 | { 16 | 17 | string SourceLocation::source() const // TODO 18 | { 19 | string code; 20 | ifstream ifs(filename); 21 | ifs.seekg(offset, ifs.beg); 22 | code.resize(count); 23 | ifs.read(&code[0], count); 24 | return code; 25 | } 26 | 27 | } // namespace klex 28 | -------------------------------------------------------------------------------- /src/klex/regular/MultiDFA.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace klex::regular { 16 | 17 | struct MultiDFA { 18 | using InitialStateMap = std::map; 19 | 20 | InitialStateMap initialStates; 21 | DFA dfa; 22 | }; 23 | 24 | MultiDFA constructMultiDFA(std::map many); 25 | 26 | } // namespace klex::regular 27 | -------------------------------------------------------------------------------- /cmake/mklex.cmake: -------------------------------------------------------------------------------- 1 | # mklex cmake integration 2 | 3 | function(klex_generate_cpp KLEX_FILE TOKEN_FILE TABLE_FILE) 4 | set(${TABLE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${KLEX_FILE}.table.cc") 5 | set(${TABLE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${KLEX_FILE}.table.cc" PARENT_SCOPE) 6 | set(dot_file "${CMAKE_CURRENT_BINARY_DIR}/${KLEX_FILE}.dot") 7 | set(klex_file "${CMAKE_CURRENT_SOURCE_DIR}/${KLEX_FILE}") 8 | 9 | add_custom_command( 10 | OUTPUT "${TOKEN_FILE}" "${${TABLE_FILE}}" 11 | COMMAND mklex -f "${klex_file}" -t "${${TABLE_FILE}}" -T "${TOKEN_FILE}" -x "${dot_file}" -p 12 | DEPENDS mklex ${klex_file} 13 | COMMENT "Generating lexer table and tokens for ${KLEX_FILE}" 14 | VERBATIM) 15 | set_source_files_properties(${TOKEN_FILE} PROPERTIES GENERATED TRUE) 16 | set_source_files_properties(${${TABLE_FILE}} PROPERTIES GENERATED TRUE) 17 | endfunction() 18 | 19 | -------------------------------------------------------------------------------- /src/klex/regular/DotVisitor.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | 13 | namespace klex::regular { 14 | 15 | class DotVisitor { 16 | public: 17 | virtual ~DotVisitor() {} 18 | 19 | virtual void start(StateId initialState) = 0; 20 | virtual void visitNode(StateId number, bool start, bool accept) = 0; 21 | virtual void visitEdge(StateId from, StateId to, Symbol s) = 0; 22 | virtual void endVisitEdge(StateId from, StateId to) = 0; 23 | virtual void end() = 0; 24 | }; 25 | 26 | } // namespace klex::regular 27 | -------------------------------------------------------------------------------- /src/klex/util/IntVector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /** 4 | * Encapsulates std::vector with speed improvements. 5 | * 6 | */ 7 | template 8 | class IntVector { 9 | public: 10 | using value_type = T; 11 | using vector = std::vector; 12 | using iterator = Vector::iterator; 13 | using const_iterator = Vector::const_iterator; 14 | 15 | IntVector() : vector_{}, hash_{2166136261llu} {} 16 | 17 | void clear() { 18 | vector_.clear(); 19 | hash_ = 2166136261llu; 20 | } 21 | 22 | void push_back(T v) { 23 | vector_.push_back(v); 24 | 25 | hash_ ^= v; 26 | hash_ *= 16777619llu; 27 | } 28 | 29 | bool operator==(const IntVector& rhs) const noexcept { 30 | return hash_ == rhs.hash_ && vector_ == rhs.vector_; 31 | } 32 | 33 | bool operator!=(const IntVector& rhs) const noexcept { 34 | return !(*this == rhs); 35 | } 36 | 37 | private: 38 | Vector vector_; 39 | unsigned hash_; 40 | }; 41 | -------------------------------------------------------------------------------- /src/klex/regular/State.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | #include 11 | 12 | using namespace std; 13 | 14 | namespace klex::regular 15 | { 16 | 17 | string to_string(const StateIdVec& S, string_view stateLabelPrefix) 18 | { 19 | StateIdVec names = S; 20 | sort(names.begin(), names.end()); 21 | 22 | stringstream sstr; 23 | sstr << "{"; 24 | int i = 0; 25 | for (StateId name: names) 26 | { 27 | if (i) 28 | sstr << ", "; 29 | sstr << stateLabelPrefix << name; 30 | i++; 31 | } 32 | sstr << "}"; 33 | 34 | return sstr.str(); 35 | } 36 | 37 | } // namespace klex::regular 38 | -------------------------------------------------------------------------------- /klex.vim: -------------------------------------------------------------------------------- 1 | " klex syntax highlighting 2 | " 3 | 4 | " quit when a syntax file was already loaded 5 | if exists("b:current_syntax") 6 | finish 7 | endif 8 | 9 | " # comment LF 10 | " RuleName(option) ::= PATTERN 11 | 12 | " Options Section 13 | syn keyword klexTodo contained TODO FIXME XXX NOTE BUG 14 | syn match klexComment "#.*$" contains=klexTodo 15 | syn match klexOptions '^%\s*pragma\>.*$' 16 | syn match klexRuleName '^\s*\(<[a-zA-Z,]\+>\)\?[a-zA-Z_][a-zA-Z0-9_]*' 17 | syn match klexOperator "(\|)\||" 18 | syn match klexAssign "::=" 19 | syn match klexRulePattern /\".*\"/ 20 | syn match lexEof "<>" 21 | 22 | " The default highlighting. 23 | hi def link klexComment Comment 24 | hi def link klexOperator Operator 25 | hi def link klexAssign Operator 26 | hi def link klexTodo Todo 27 | hi def link klexRuleName Function 28 | hi def link klexRulePattern Constant 29 | hi def link klexOptions PreProc 30 | hi def link klexEof Special 31 | 32 | let b:current_syntax = "klex" 33 | 34 | " vim:ts=10 35 | -------------------------------------------------------------------------------- /src/klex/regular/MultiDFA.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | using namespace std; 11 | 12 | namespace klex::regular 13 | { 14 | 15 | MultiDFA constructMultiDFA(map many) 16 | { 17 | MultiDFA multiDFA {}; 18 | multiDFA.dfa.createStates(1 + many.size()); 19 | multiDFA.dfa.setInitialState(0); 20 | 21 | StateId q0 = 1; 22 | for (pair& p: many) 23 | { 24 | multiDFA.dfa.append(move(p.second), q0); 25 | multiDFA.initialStates[p.first] = q0; 26 | multiDFA.dfa.setTransition(0, static_cast(q0), q0); 27 | q0++; 28 | } 29 | 30 | return multiDFA; 31 | } 32 | 33 | } // namespace klex::regular 34 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # REG 2 | 3 | - ignore whitespaces in REGEX rules 4 | - `LookaheadLexer` 5 | 6 | # CFG 7 | 8 | - klex::LeftFactoring 9 | Rewrites rules to eliminate common prefixes in order to reduce lookahead from k>1 to k=1 10 | - basic actions 11 | 12 | # Incomplete TODO items: Lexer 13 | 14 | - [ ] proper file offset reporting 15 | - [ ] distinguish between Token ID, TokenTraits, and Token class 16 | 17 | # Incomplete TODO list 18 | 19 | - [ ] cfg::ll::SyntaxTable::dump() MUST NOT depend on Grammar 20 | - [ ] left-recursion-elimination (direct) 21 | - call it: struct LeftToRightRecursion {}; that can idealy be used with std::transform() 22 | - first all left-recursive rules need to be collected 23 | - [ ] left-recursion-elimination (indirect) 24 | - [ ] Analyzer production matching hooks (check ANTLR) 25 | 26 | ### left-recursion 27 | 28 | ``` 29 | # left 30 | A ::= A b 31 | | b; 32 | 33 | # right 34 | A ::= A' b; 35 | A' ::= b A'; 36 | | ; 37 | 38 | # LEFT 39 | Expr ::= Expr '+' Term 40 | | Expr '-' Term 41 | | Term; 42 | 43 | Expr ::= Term Expr'; 44 | Expr' ::= '+' Expr' 45 | | '-' Expr'; 46 | ``` 47 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarValidator.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | using namespace std; 14 | using namespace klex; 15 | using namespace klex::cfg; 16 | 17 | void GrammarValidator::validate(const Grammar& G) 18 | { 19 | for (const Production& p: G.productions) 20 | for (const Symbol b: symbols(p.handle)) 21 | if (holds_alternative(b)) 22 | if (!G.containsProduction(get(b))) 23 | report_->typeError(SourceLocation { /*TODO: b.location()*/ }, 24 | "Non-terminal {} is missing a production rule.", 25 | b); 26 | 27 | // TODO: check for unwanted infinite recursions 28 | // such as: E ::= E 29 | } 30 | -------------------------------------------------------------------------------- /src/klex/regular/DFABuilder_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | using namespace klex::regular; 18 | 19 | TEST(regular_DFABuilder, shadowing) 20 | { 21 | Compiler cc; 22 | cc.parse(std::make_unique(R"( 23 | Identifier ::= [a-z][a-z0-9]* 24 | TrueLiteral ::= "true" 25 | )")); 26 | // rule 2 is overshadowed by rule 1 27 | Compiler::OvershadowMap overshadows; 28 | DFA dfa = cc.compileDFA(&overshadows); 29 | ASSERT_EQ(1, overshadows.size()); 30 | EXPECT_EQ(2, overshadows[0].first); // overshadowee 31 | EXPECT_EQ(1, overshadows[0].second); // overshadower 32 | } 33 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{build}' 2 | branches: 3 | only: 4 | - master 5 | clone_folder: c:\projects\klex 6 | image: 7 | - Visual Studio 2017 8 | configuration: 9 | - Release 10 | - Debug 11 | platform: 12 | - x64 13 | - x86 14 | 15 | matrix: 16 | fast_finish: true 17 | 18 | # skip unsupported combinations 19 | init: 20 | - set arch= 21 | - if "%PLATFORM%"=="x64" (set arch= Win64) 22 | - echo %arch% 23 | - echo %APPVEYOR_BUILD_WORKER_IMAGE% 24 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" ( set generator="Visual Studio 15 2017%arch%" ) 25 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2015" ( set generator="Visual Studio 14 2015%arch%" ) 26 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2013" ( set generator="Visual Studio 12 2013%arch%" ) 27 | - echo %generator% 28 | 29 | build_script: 30 | - git submodule update --init --recursive 31 | - mkdir build 32 | - cd build 33 | - echo %generator% 34 | - echo %CONFIGURATION% 35 | - cmake -G %generator% --config %CONFIGURATION% .. 36 | - cmake --build . --config %CONFIGURATION% 37 | 38 | test_script: 39 | - .\%CONFIGURATION%\klex_test.exe 40 | 41 | only_commits: 42 | files: 43 | - CMakeLists.txt 44 | - appveyor.yml 45 | - src/ 46 | - examples/ 47 | - docs/ 48 | - cmake/ 49 | - 3rdparty/ 50 | -------------------------------------------------------------------------------- /klax.vim: -------------------------------------------------------------------------------- 1 | " klax syntax highlighting 2 | " 3 | 4 | " quit when a syntax file was already loaded 5 | if exists("b:current_syntax") 6 | finish 7 | endif 8 | 9 | " # comment LF 10 | " RuleName(option) ::= PATTERN 11 | 12 | syn match klaxSpecial display contained "\\\(t\|v\|r\|n\|s\)\||\|\[\|\]\|\.\|+\|*\|?\|(\|)" 13 | "syn region klaxString start=+L\="+ skip=+\\\\\|\\"+ end=+"+ contains=klaxSpecial 14 | syn region klaxString start=/\v"/ skip=/\v\\./ end=/\v"/ contains=klaxSpecial 15 | syn region klaxRawString start="'" end="'" 16 | 17 | " Options Section 18 | syn keyword klaxTodo contained TODO FIXME XXX NOTE BUG 19 | syn match klaxComment "#.*$" contains=klaxTodo 20 | syn match klaxOptions '^%\s*pragma\>.*$' 21 | syn match klaxRuleName '^\s*\(<[a-zA-Z,]\+>\)\?[a-zA-Z_][a-zA-Z0-9_]*' 22 | syn match klaxOperator "(\|)\||" 23 | syn match klaxAssign "::=" 24 | syn match lexEof "<>" 25 | 26 | " The default highlighting. 27 | hi def link klaxComment Comment 28 | hi def link klaxOperator Operator 29 | hi def link klaxAssign Operator 30 | hi def link klaxTodo Todo 31 | hi def link klaxRuleName Function 32 | hi def link klaxOptions PreProc 33 | hi def link klaxEof Special 34 | hi def link klaxString String 35 | hi def link klaxRawString String 36 | hi def link klaxSpecial Special 37 | 38 | let b:current_syntax = "klax" 39 | 40 | " vim:ts=10 41 | 42 | -------------------------------------------------------------------------------- /src/klex/SourceLocation.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace klex { 13 | 14 | struct SourceLocation { 15 | std::string filename; 16 | size_t offset; 17 | size_t count; 18 | 19 | [[nodiscard]] long long int compare(const SourceLocation& other) const noexcept 20 | { 21 | if (filename == other.filename) 22 | return (long) offset - (long) other.offset; 23 | else if (filename < other.filename) 24 | return -1; 25 | else 26 | return 1; 27 | } 28 | 29 | [[nodiscard]] std::string source() const; 30 | 31 | bool operator==(const SourceLocation& other) const noexcept { return compare(other) == 0; } 32 | bool operator<=(const SourceLocation& other) const noexcept { return compare(other) <= 0; } 33 | bool operator>=(const SourceLocation& other) const noexcept { return compare(other) >= 0; } 34 | bool operator<(const SourceLocation& other) const noexcept { return compare(other) < 0; } 35 | bool operator>(const SourceLocation& other) const noexcept { return compare(other) > 0; } 36 | }; 37 | 38 | } // namespace klex 39 | -------------------------------------------------------------------------------- /examples/flowlexer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include "token.h" // generated via mklex 16 | 17 | extern klex::regular::LexerDef lexerDef; // generated via mklex 18 | 19 | int main(int argc, const char* argv[]) 20 | { 21 | auto ls = argc == 2 ? klex::regular::Lexable { lexerDef, 22 | std::make_unique(argv[1]) } 23 | : klex::regular::Lexable { lexerDef, std::cin }; 24 | 25 | for (const auto& token: ls) 26 | { 27 | std::cerr << fmt::format("[{}-{}]: token {} (\"{}\")\n", 28 | token.offset, 29 | token.offset + token.literal.length(), 30 | lexerDef.tagName(static_cast(token.token)), 31 | token.literal); 32 | } 33 | 34 | return EXIT_SUCCESS; 35 | } 36 | -------------------------------------------------------------------------------- /src/klex/regular/State.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace klex::regular { 21 | 22 | using Tag = int; 23 | using StateId = size_t; 24 | using StateIdVec = std::vector; 25 | 26 | using AcceptMap = std::map; 27 | 28 | /** 29 | * Returns a human readable string of @p S, such as "{n0, n1, n2}". 30 | */ 31 | std::string to_string(const StateIdVec& S, std::string_view stateLabelPrefix = "n"); 32 | 33 | } // namespace klex::regular 34 | 35 | namespace fmt { 36 | template <> 37 | struct formatter { 38 | template 39 | constexpr auto parse(ParseContext& ctx) 40 | { 41 | return ctx.begin(); 42 | } 43 | 44 | template 45 | constexpr auto format(const klex::regular::StateIdVec& v, FormatContext& ctx) 46 | { 47 | return format_to(ctx.out(), "{}", klex::regular::to_string(v)); 48 | } 49 | }; 50 | } // namespace fmt 51 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 AS build 2 | MAINTAINER Christian Parpart 3 | 4 | RUN apt-get -qqy update 5 | RUN apt-get install -qqy cmake make g++-7 6 | 7 | WORKDIR /app/src 8 | 9 | COPY /3rdparty /app/src/3rdparty 10 | COPY /cmake /app/src/cmake 11 | COPY /src /app/src/src 12 | COPY /CMakeLists.txt $WORKDIR 13 | RUN ls -hlaF 14 | 15 | ARG BUILD_CONCURRENCY="0" 16 | 17 | RUN cmake -DCMAKE_BUILD_TYPE=Release \ 18 | -DKLEX_EXAMPLES=OFF \ 19 | -DKLEX_TESTS=OFF \ 20 | -DMKLEX_LINK_STATIC=ON \ 21 | -DCMAKE_CXX_COMPILER=g++-7 \ 22 | $WORKDIR 23 | 24 | RUN make \ 25 | -j$(awk "BEGIN { \ 26 | if (${BUILD_CONCURRENCY} != 0) { \ 27 | print(${BUILD_CONCURRENCY}); \ 28 | } else { \ 29 | x=($(grep -c ^processor /proc/cpuinfo) * 2/3); \ 30 | if (x > 1) { \ 31 | printf(\"%d\n\", x); \ 32 | } else { \ 33 | print(1); \ 34 | } \ 35 | } \ 36 | }") 37 | 38 | RUN strip mklex 39 | 40 | FROM scratch 41 | COPY --from=build /app/src/mklex /usr/bin/mklex 42 | ENTRYPOINT ["/usr/bin/mklex"] 43 | CMD ["--help"] 44 | -------------------------------------------------------------------------------- /src/klex/regular/TransitionMap-inl.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace klex::regular { 13 | 14 | inline void TransitionMap::define(StateId currentState, Symbol charCat, StateId nextState) 15 | { 16 | mapping_[currentState][charCat] = nextState; 17 | } 18 | 19 | inline StateId TransitionMap::apply(StateId currentState, Symbol charCat) const 20 | { 21 | if (auto i = mapping_.find(currentState); i != mapping_.end()) 22 | if (auto k = i->second.find(charCat); k != i->second.end()) 23 | return k->second; 24 | 25 | return ErrorState; 26 | } 27 | 28 | inline std::vector TransitionMap::states() const 29 | { 30 | std::vector v; 31 | v.reserve(mapping_.size()); 32 | for (const auto& i : mapping_) 33 | v.push_back(i.first); 34 | std::sort(v.begin(), v.end()); 35 | return v; 36 | } 37 | 38 | inline std::map TransitionMap::map(StateId s) const 39 | { 40 | std::map m; 41 | if (auto mapping = mapping_.find(s); mapping != mapping_.end()) 42 | for (const auto& i : mapping->second) 43 | m[i.first] = i.second; 44 | return m; 45 | } 46 | 47 | } // namespace klex::regular 48 | -------------------------------------------------------------------------------- /src/klex/regular/Alphabet.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | namespace klex::regular 18 | { 19 | 20 | #if 0 21 | #define DEBUG(msg, ...) \ 22 | do \ 23 | { \ 24 | cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \ 25 | } while (0) 26 | #else 27 | #define DEBUG(msg, ...) \ 28 | do \ 29 | { \ 30 | } while (0) 31 | #endif 32 | 33 | void Alphabet::insert(Symbol ch) 34 | { 35 | if (alphabet_.find(ch) == alphabet_.end()) 36 | { 37 | DEBUG("Alphabet: insert '{:}'", prettySymbol(ch)); 38 | alphabet_.insert(ch); 39 | } 40 | } 41 | 42 | string Alphabet::to_string() const 43 | { 44 | stringstream sstr; 45 | 46 | sstr << '{'; 47 | 48 | for (Symbol c: alphabet_) 49 | sstr << prettySymbol(c); 50 | 51 | sstr << '}'; 52 | 53 | return sstr.str(); 54 | } 55 | 56 | } // namespace klex::regular 57 | -------------------------------------------------------------------------------- /src/klex/regular/Alphabet.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace klex::regular { 15 | 16 | /** 17 | * Represents the alphabet of a finite automaton or regular expression. 18 | */ 19 | class Alphabet { 20 | public: 21 | using set_type = std::set; 22 | using iterator = set_type::iterator; 23 | 24 | size_t size() const noexcept { return alphabet_.size(); } 25 | 26 | void insert(Symbol ch); 27 | 28 | std::string to_string() const; 29 | 30 | const iterator begin() const { return alphabet_.begin(); } 31 | const iterator end() const { return alphabet_.end(); } 32 | 33 | private: 34 | set_type alphabet_; 35 | }; 36 | 37 | } // namespace klex::regular 38 | 39 | namespace fmt { 40 | template <> 41 | struct formatter { 42 | template 43 | constexpr auto parse(ParseContext& ctx) 44 | { 45 | return ctx.begin(); 46 | } 47 | 48 | template 49 | constexpr auto format(const klex::regular::Alphabet& v, FormatContext& ctx) 50 | { 51 | return format_to(ctx.out(), "{}", v.to_string()); 52 | } 53 | }; 54 | } // namespace fmt 55 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarParser.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace klex { 16 | class Report; 17 | } 18 | 19 | namespace klex::cfg { 20 | 21 | /** 22 | * Parses a context-free-grammar specification. 23 | */ 24 | class GrammarParser 25 | { 26 | public: 27 | GrammarParser(GrammarLexer&& lexer, Report* report); 28 | GrammarParser(std::string source, Report* report); 29 | 30 | Grammar parse(); 31 | void parseRule(); 32 | Handle parseHandle(); 33 | 34 | private: 35 | using Token = GrammarLexer::Token; 36 | 37 | void parseTokenBlock(); 38 | 39 | [[nodiscard]] const std::string& currentLiteral() const noexcept { return lexer_.currentLiteral(); } 40 | [[nodiscard]] Token currentToken() const noexcept { return lexer_.currentToken(); } 41 | void consumeToken(); 42 | void consumeToken(Token expectedToken); 43 | 44 | [[nodiscard]] std::optional findExplicitTerminal(const std::string& terminalName) const; 45 | 46 | private: 47 | Report* report_; 48 | GrammarLexer lexer_; 49 | Grammar grammar_; 50 | }; 51 | 52 | } // namespace klex::cfg 53 | 54 | // vim:ts=4:sw=4:noet 55 | -------------------------------------------------------------------------------- /examples/cxx.klex: -------------------------------------------------------------------------------- 1 | # vim:syntax=klex 2 | 3 | # keywords 4 | If ::= "if" 5 | Else ::= "else" 6 | While ::= "while" 7 | Do ::= "do" 8 | 9 | # builtin types 10 | Void ::= "void" 11 | Int ::= "int" 12 | Signed ::= "signed" 13 | Unsigned ::= "unsigned" 14 | 15 | Auto ::= "auto" 16 | Const ::= "const" 17 | ConstExpr ::= "constexpr" 18 | 19 | # symbols 20 | CurlyOpen ::= "{" 21 | CurlyClose ::= "}" 22 | RndOpen ::= "(" 23 | RndClose ::= ")" 24 | BrOpen ::= "[" 25 | BrClose ::= "]" 26 | Assign ::= "=" 27 | Not ::= "!" 28 | NotEqual ::= "!=" 29 | Equal ::= "==" 30 | Less ::= "<" 31 | Greater ::= ">" 32 | LessEqu ::= "<=" 33 | GreaterEqu ::= ">=" 34 | Shl ::= "<<" 35 | Shr ::= ">>" 36 | Plus ::= "+" 37 | Minus ::= "-" 38 | Mul ::= "*" 39 | Div ::= "/" 40 | PlusPlus ::= "++" 41 | MinusMinus ::= "--" 42 | PlusAssign ::= "+=" 43 | MinusAssign ::= "-=" 44 | MulAssign ::= "*=" 45 | DivAssign ::= "/=" 46 | Modulo ::= "%" 47 | ModuloAssign ::= "%=" 48 | 49 | # Misc 50 | Spacing(ignore) ::= [\s\t\n]+ 51 | CxxComment(ignore) ::= "//"[^$]* 52 | CComment(ignore) ::= "/*".*"*/" 53 | Identifier ::= [a-zA-Z_][a-zA-Z_0-9]* 54 | NumberLiteral ::= [0-9]|0x[0-9a-fA-F]+ 55 | Eof ::= <> 56 | StringLiteral ::= \"([^\"\n]|\\\")*\" 57 | CharLiteral ::= '(.)' 58 | -------------------------------------------------------------------------------- /src/klex/cfg/LeftRecursion.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace klex::cfg { 16 | 17 | /** 18 | * Eliminates left-recursion by rewriting a Grammar into an equivalent right-recursion grammar. 19 | * 20 | * @note This transformation is required for LL parsers. 21 | */ 22 | class LeftRecursion { 23 | public: 24 | explicit LeftRecursion(Grammar& _grammar); 25 | 26 | static bool isLeftRecursive(const Grammar& grammar); 27 | 28 | void direct(); 29 | void indirect(); 30 | 31 | private: 32 | std::list select(const NonTerminal& lhs, const NonTerminal& first); 33 | void eliminateDirect(const NonTerminal& nt); 34 | 35 | /** 36 | * Creates a unique nonterminal symbol that by name relates to @p nt. 37 | */ 38 | [[nodiscard]] NonTerminal createRelatedNonTerminal(const NonTerminal& nt) const; 39 | 40 | /** 41 | * Splits all productions of the same nonterminal into a vector of left-recursives and the rest. 42 | */ 43 | [[nodiscard]] std::pair, std::vector> split(std::vector productions) const; 44 | 45 | private: 46 | Grammar& grammar_; 47 | }; 48 | 49 | } // namespace klex::cfg 50 | -------------------------------------------------------------------------------- /examples/wordcount.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | int main(int argc, const char* argv[]) 18 | { 19 | klex::regular::Compiler cc; 20 | cc.parse(R"( 21 | Word ::= [a-zA-Z]+ 22 | LF ::= \n 23 | Other ::= . 24 | Eof ::= <> 25 | )"); 26 | 27 | size_t words = 0; 28 | size_t chars = 0; 29 | size_t lines = 0; 30 | 31 | auto ld = cc.compile(); 32 | klex::regular::Lexable lexer { ld, std::cin }; 33 | for (const auto& ti: lexer) 34 | { 35 | switch (token(ti)) 36 | { 37 | case 4: // EOF 38 | break; 39 | case 3: // Other 40 | chars++; 41 | break; 42 | case 2: // LF 43 | chars++; 44 | lines++; 45 | break; 46 | case 1: // Word 47 | words++; 48 | chars += literal(ti).size(); 49 | break; 50 | } 51 | } 52 | 53 | std::cout << "newlines: " << lines << ", words: " << words << ", characters: " << chars << "\n"; 54 | 55 | return EXIT_SUCCESS; 56 | } 57 | -------------------------------------------------------------------------------- /src/klex/util/literals.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0> 2 | // (c) 2009-2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | namespace klex::util::literals { 15 | 16 | /** 17 | * Strips a multiline string's indentation prefix. 18 | * 19 | * Example: 20 | * \code 21 | * string s = R"(|line one 22 | * |line two 23 | * |line three 24 | * )"_multiline; 25 | * fmt::print(s); 26 | * \endcode 27 | * 28 | * This prints three lines: @c "line one\nline two\nline three\n" 29 | */ 30 | inline std::string operator""_multiline(const char* text, size_t size) 31 | { 32 | if (!*text) 33 | return {}; 34 | 35 | enum class State { 36 | LineData, 37 | SkipUntilPrefix, 38 | }; 39 | 40 | constexpr char LF = '\n'; 41 | State state = State::LineData; 42 | std::stringstream sstr; 43 | char sep = *text++; 44 | 45 | while (*text) 46 | { 47 | switch (state) 48 | { 49 | case State::LineData: 50 | if (*text == LF) 51 | { 52 | state = State::SkipUntilPrefix; 53 | sstr << *text++; 54 | } 55 | else 56 | sstr << *text++; 57 | break; 58 | case State::SkipUntilPrefix: 59 | if (*text == sep) 60 | { 61 | state = State::LineData; 62 | text++; 63 | } 64 | else 65 | text++; 66 | break; 67 | } 68 | } 69 | 70 | return sstr.str(); 71 | } 72 | 73 | } // namespace klex::util::literals 74 | -------------------------------------------------------------------------------- /examples/flow.klax: -------------------------------------------------------------------------------- 1 | # vim:syntax=klax 2 | 3 | token { 4 | # explicit token definitions, in klex lexer format: 5 | Space(ignore) ::= [\s\t\n]+ 6 | IDENT ::= [a-zA-Z_][a-zA-Z0-9_]* 7 | } 8 | 9 | Start ::= FlowProgram 10 | FlowProgram ::= HandlerDef* 11 | HandlerDef ::= 'handler' IDENT BlockStmt 12 | VarDecl ::= 'var' IDENT '=' Expr ';' 13 | 14 | # statements 15 | Stmt ::= IfStmt | WhileStmt | AssignStmt | CallStmt | EmptyStmt 16 | IfStmt ::= 'if' Expr BlockStmt 17 | | 'if' Expr BlockStmt 'else' BlockStmt 18 | WhileStmt ::= 'while' Expr BlockStmt 19 | AssignStmt ::= IDENT '=' Expr ';' 20 | BlockStmt ::= '{' VarDecl* Stmt '}' 21 | CallStmt ::= IDENT ';' 22 | | IDENT CallArgs ';' 23 | | IDENT '(' CallArgs ')' ';' 24 | CallArgs ::= Expr (',' Expr)* 25 | | NamedArg (',' NamedArg)* 26 | NamedArg ::= IDENT ':' Expr 27 | 28 | EmptyStmt ::= ';' 29 | 30 | # expressions 31 | Expr ::= LogicExpr 32 | LogicExpr ::= NotExpr 'and' NotExpr 33 | | NotExpr 'xor' NotExpr 34 | | NotExpr 'or' NotExpr 35 | NotExpr ::= '?' NotExpr 36 | | RelExpr 37 | RelExpr ::= AddExpr _RelOp AddExpr 38 | | AddExpr 39 | _RelOp ::= '==' | '!=' | '<=' | '>=' | '<' | '>' | '=~' | '=^' | '=$' | 'in' 40 | 41 | # TODO the parser must automatically rewrite the rule 42 | AddExpr ::= MulExpr ('*' MulExpr)* 43 | 44 | MulExpr ::= BitNotExpr ('*' BitNotExpr)+ 45 | | BitNotExpr ('/' BitNotExpr)+ 46 | | BitNotExpr 47 | BitNotExpr ::= 48 | NegExpr ::= 49 | PrimaryExpr ::= 50 | LiteralExpr ::= Number | IPv4 51 | CastExpr ::= TypeName '(' Expr ')' 52 | TypeName ::= 'bool' | 'int' | 'string' 53 | -------------------------------------------------------------------------------- /src/klex/regular/NFABuilder.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace klex::regular { 23 | 24 | class DFA; 25 | 26 | /*! 27 | * Generates a finite automaton from the given input (a regular expression). 28 | */ 29 | class NFABuilder { 30 | public: 31 | explicit NFABuilder() : fa_{} {} 32 | 33 | NFA construct(const RegExpr& re, Tag tag); 34 | NFA construct(const RegExpr& re); 35 | void operator()(const LookAheadExpr& lookaheadExpr); 36 | void operator()(const ConcatenationExpr& concatenationExpr); 37 | void operator()(const AlternationExpr& alternationExpr); 38 | void operator()(const CharacterExpr& characterExpr); 39 | void operator()(const CharacterClassExpr& characterClassExpr); 40 | void operator()(const ClosureExpr& closureExpr); 41 | void operator()(const BeginOfLineExpr& bolExpr); 42 | void operator()(const EndOfLineExpr& eolExpr); 43 | void operator()(const EndOfFileExpr& eofExpr); 44 | void operator()(const DotExpr& dotExpr); 45 | void operator()(const EmptyExpr& emptyExpr); 46 | 47 | private: 48 | NFA fa_; 49 | std::optional acceptState_; 50 | }; 51 | 52 | } // namespace klex::regular 53 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -e 4 | 5 | BUILDDIR=`pwd` 6 | ROOT=`dirname $0` 7 | 8 | FILES=( 9 | /ar-lib 10 | /aclocal.m4 11 | /compile 12 | /autom4te.cache 13 | /configure 14 | /install-sh 15 | /missing 16 | /depcomp 17 | ) 18 | 19 | if test "$1" == "clean"; then 20 | find ${ROOT} -name Makefile.in -exec rm {} \; 21 | for file in ${FILES[*]}; do rm -vrf "${ROOT}${file}"; done 22 | exit 0 23 | fi 24 | 25 | findexe() { 26 | for exe in ${@}; do 27 | if which $exe 2>/dev/null; then 28 | return 29 | fi 30 | done 31 | echo $1 32 | } 33 | 34 | # Mac OSX has a special location for more recent LLVM/clang installations 35 | # $ brew tap homebrew/versions 36 | # $ brew install llvm 37 | if [[ -d "/usr/local/opt/llvm/bin" ]]; then 38 | export PATH="/usr/local/opt/llvm/bin:${PATH}" 39 | export CXXFLAGS="$CXXFLAGS -nostdinc++ -I/usr/local/opt/llvm/include/c++/v1" 40 | export LDFLAGS="$LDFLAGS -L/usr/local/opt/llvm/lib" 41 | fi 42 | 43 | # Mac OS/X has `brew install zlib`'d its zlib.pc somewhere non-standard ;-) 44 | pkgdirs=( "/usr/local/opt/zlib/lib/pkgconfig" ) 45 | for pkgdir in ${pkgdirs[*]}; do 46 | if [[ -d "${pkgdir}" ]]; then 47 | export PKG_CONFIG_PATH=${PKG_CONFIG_PATH}${PKG_CONFIG_PATH:+:}${pkgdir} 48 | fi 49 | done 50 | 51 | export CXX=$(findexe $CXX g++-7 clang++-6.0 clang++ g++) 52 | export CXXFLAGS="${CXXFLAGS:--O0 -g}" 53 | 54 | echo CXX = $CXX 55 | echo CXXFLAGS = $CXXFLAGS 56 | echo PKG_CONFIG_PATH = $PKG_CONFIG_PATH 57 | 58 | exec cmake "${ROOT}" \ 59 | -DCMAKE_BUILD_TYPE="debug" \ 60 | -DCMAKE_INSTALL_PREFIX="${HOME}/local" \ 61 | -DCMAKE_VERBOSE_MAKEFILE=OFF \ 62 | -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ 63 | -DENABLE_TIDY=OFF \ 64 | "${@}" 65 | -------------------------------------------------------------------------------- /src/klex/cfg/ll/SyntaxTable_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | using namespace klex; 17 | using namespace klex::cfg; 18 | using namespace klex::cfg::ll; 19 | using namespace klex::util::literals; 20 | 21 | TEST(cfg_ll_SyntaxTable, construct_right_recursive) 22 | { 23 | BufferedReport report; 24 | Grammar grammar = GrammarParser( 25 | GrammarLexer { 26 | R"(`token { 27 | ` Spacing(ignore) ::= [\s\t]+ 28 | ` Number ::= [0-9]+ 29 | `} 30 | ` 31 | `Start ::= Expr; 32 | `Expr ::= Term Expr_; 33 | `Expr_ ::= '+' Term Expr_ 34 | ` | ; 35 | `Term ::= Factor Term_; 36 | `Term_ ::= '*' Factor Term_ 37 | ` | ; 38 | `Factor ::= '(' Expr ')' 39 | ` | Number 40 | ` ; 41 | `)"_multiline }, 42 | &report) 43 | .parse(); 44 | 45 | ASSERT_FALSE(report.containsFailures()); 46 | 47 | grammar.finalize(); 48 | log("Grammar:"); 49 | log(grammar.dump()); 50 | 51 | ll::SyntaxTable st = ll::SyntaxTable::construct(grammar); 52 | 53 | log("Syntax Table:"); 54 | log(st.dump(grammar)); 55 | 56 | // TODO 57 | } 58 | 59 | // vim:ts=4:sw=4:noet 60 | -------------------------------------------------------------------------------- /src/klex/CharStream.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace klex { 14 | 15 | class CharStream { 16 | public: 17 | virtual ~CharStream() = default; 18 | 19 | [[nodiscard]] virtual bool isEof() const noexcept = 0; 20 | virtual char get() = 0; 21 | virtual void rollback(int count) = 0; 22 | virtual void rewind() = 0; 23 | }; 24 | 25 | class StringStream : public CharStream { 26 | public: 27 | explicit StringStream(std::string&& s) : source_{std::move(s)} {} 28 | 29 | [[nodiscard]] bool isEof() const noexcept override { return pos_ >= source_.size(); } 30 | char get() override { return source_[pos_++]; } 31 | void rollback(int count) override { pos_ -= count; } 32 | void rewind() override { pos_ = 0; } 33 | 34 | private: 35 | std::string source_; 36 | size_t pos_ = 0; 37 | }; 38 | 39 | class StandardStream : public CharStream { 40 | public: 41 | explicit StandardStream(std::istream* source); 42 | 43 | [[nodiscard]] bool isEof() const noexcept override { return !source_->good(); } 44 | char get() override { return static_cast(source_->get()); } 45 | 46 | void rollback(int count) override 47 | { 48 | source_->clear(); 49 | source_->seekg(-count, std::ios::cur); 50 | } 51 | 52 | void rewind() override 53 | { 54 | source_->clear(); 55 | source_->seekg(initialOffset_, std::ios::beg); 56 | } 57 | 58 | private: 59 | std::istream* source_; 60 | std::streamoff initialOffset_; 61 | }; 62 | 63 | } // namespace klex 64 | -------------------------------------------------------------------------------- /src/klex/regular/DotWriter_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | using namespace std; 14 | using namespace klex::regular; 15 | 16 | TEST(regular_DotWriter, simple) 17 | { 18 | stringstream sstr; 19 | DotWriter dw(sstr, "n"); 20 | 21 | dw.start(0); 22 | dw.visitNode(0, true, true); 23 | dw.visitEdge(0, 1, 'a'); 24 | dw.endVisitEdge(0, 1); 25 | 26 | dw.visitNode(1, false, true); 27 | dw.visitEdge(1, 1, 'b'); 28 | dw.visitEdge(1, 1, '\r'); 29 | dw.visitEdge(1, 1, '\n'); 30 | dw.visitEdge(1, 1, '\t'); 31 | dw.visitEdge(1, 1, ' '); 32 | dw.endVisitEdge(1, 1); 33 | dw.end(); 34 | 35 | log(sstr.str()); 36 | ASSERT_TRUE(!sstr.str().empty()); 37 | // just make sure it processes 38 | } 39 | 40 | TEST(regular_DotWriter, multidfa_simple) 41 | { 42 | stringstream sstr; 43 | const MultiDFA::InitialStateMap mis { { "foo", 1 }, { "bar", 2 } }; 44 | DotWriter dw(sstr, "n", mis); 45 | 46 | dw.start(0); 47 | dw.visitNode(0, true, false); 48 | dw.visitNode(1, false, true); 49 | dw.visitNode(2, false, true); 50 | 51 | dw.visitEdge(0, 1, 0x01); 52 | dw.endVisitEdge(0, 1); 53 | 54 | dw.visitEdge(0, 2, 0x02); 55 | dw.endVisitEdge(0, 2); 56 | 57 | dw.visitEdge(1, 1, 'a'); 58 | dw.endVisitEdge(1, 1); 59 | 60 | dw.visitEdge(2, 2, 'a'); 61 | dw.endVisitEdge(2, 2); 62 | 63 | dw.end(); 64 | 65 | log(sstr.str()); 66 | ASSERT_TRUE(!sstr.str().empty()); 67 | // just make sure it processes 68 | } 69 | -------------------------------------------------------------------------------- /src/klex/regular/DFAMinimizer.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace klex::regular { 21 | 22 | class DFA; 23 | 24 | class DFAMinimizer { 25 | public: 26 | explicit DFAMinimizer(const DFA& dfa); 27 | explicit DFAMinimizer(const MultiDFA& multiDFA); 28 | 29 | DFA constructDFA(); 30 | MultiDFA constructMultiDFA(); 31 | 32 | private: 33 | using PartitionVec = std::list; 34 | 35 | void constructPartitions(); 36 | StateIdVec nonAcceptStates() const; 37 | bool containsInitialState(const StateIdVec& S) const; 38 | bool isMultiInitialState(StateId s) const; 39 | PartitionVec::iterator findGroup(StateId s); 40 | int partitionId(StateId s) const; 41 | PartitionVec split(const StateIdVec& S) const; 42 | DFA constructFromPartitions(const PartitionVec& P) const; 43 | std::optional containsBacktrackState(const StateIdVec& Q) const; 44 | 45 | static void dumpGroups(const PartitionVec& T); 46 | 47 | StateId targetStateId(StateId oldId) const 48 | { 49 | auto i = targetStateIdMap_.find(oldId); 50 | assert(i != targetStateIdMap_.end()); 51 | return i->second; 52 | } 53 | 54 | private: 55 | const DFA& dfa_; 56 | const MultiDFA::InitialStateMap initialStates_; 57 | const Alphabet alphabet_; 58 | PartitionVec T; 59 | PartitionVec P; 60 | std::unordered_map targetStateIdMap_; 61 | }; 62 | 63 | } // namespace klex::regular 64 | -------------------------------------------------------------------------------- /src/klex/regular/TransitionMap.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace klex::regular { 14 | 15 | using CharCatId = int; 16 | 17 | constexpr CharCatId ErrorCharCat = static_cast(-1); 18 | 19 | /** 20 | * Represents an error-state, such as invalid input character or unexpected EOF. 21 | */ 22 | constexpr StateId ErrorState{808080}; // static_cast(-1); 23 | 24 | /** 25 | * Transition mapping API to map the input (currentState, charCat) to (newState). 26 | */ 27 | class TransitionMap { 28 | public: 29 | using Container = std::map>; 30 | 31 | TransitionMap() : mapping_{} {} 32 | 33 | TransitionMap(Container mapping) : mapping_{std::move(mapping)} {} 34 | 35 | /** 36 | * Defines a new mapping for (currentState, charCat) to (nextState). 37 | */ 38 | void define(StateId currentState, Symbol charCat, StateId nextState); 39 | 40 | /** 41 | * Retrieves the next state for the input (currentState, charCat). 42 | * 43 | * @returns the transition from (currentState, charCat) to (nextState) or ErrorState if not defined. 44 | */ 45 | StateId apply(StateId currentState, Symbol charCat) const; 46 | 47 | /** 48 | * Retrieves a list of all available states. 49 | */ 50 | std::vector states() const; 51 | 52 | /** 53 | * Retrieves a map of all transitions from given state @p inputState. 54 | */ 55 | std::map map(StateId inputState) const; 56 | 57 | private: 58 | Container mapping_; 59 | }; 60 | 61 | } // namespace klex::regular 62 | 63 | #include 64 | -------------------------------------------------------------------------------- /src/klex/regular/DFABuilder.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace klex::regular { 15 | 16 | class DFA; 17 | class State; 18 | 19 | class DFABuilder { 20 | public: 21 | //! Map of rules that shows which rule is overshadowed by which other rule. 22 | using OvershadowMap = std::vector>; 23 | 24 | explicit DFABuilder(NFA&& nfa) : nfa_{std::move(nfa)} {} 25 | 26 | /** 27 | * Constructs a DFA out of the NFA. 28 | * 29 | * @param overshadows if not nullptr, it will be used to store semantic information about 30 | * which rule tags have been overshadowed by which. 31 | */ 32 | DFA construct(OvershadowMap* overshadows = nullptr); 33 | 34 | private: 35 | struct TransitionTable; 36 | 37 | DFA constructDFA(const std::vector& Q, const TransitionTable& T, 38 | OvershadowMap* overshadows) const; 39 | 40 | /** 41 | * Finds @p t in @p Q and returns its offset (aka configuration number) or -1 if not found. 42 | */ 43 | static std::optional configurationNumber(const std::vector& Q, const StateIdVec& t); 44 | 45 | /** 46 | * Determines the tag to use for the deterministic state representing @p q from non-deterministic FA @p 47 | * fa. 48 | * 49 | * @param q the set of states that reflect a single state in the DFA equal to the input FA 50 | * 51 | * @returns the determined tag or std::nullopt if none 52 | */ 53 | std::optional determineTag(const StateIdVec& q, std::map* overshadows) const; 54 | 55 | private: 56 | const NFA nfa_; 57 | }; 58 | 59 | } // namespace klex::regular 60 | -------------------------------------------------------------------------------- /src/klex/regular/RegExpr.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | namespace klex::regular { 21 | 22 | struct AlternationExpr; 23 | struct BeginOfLineExpr; 24 | struct CharacterClassExpr; 25 | struct CharacterExpr; 26 | struct ClosureExpr; 27 | struct ConcatenationExpr; 28 | struct DotExpr; 29 | struct EmptyExpr; 30 | struct EndOfFileExpr; 31 | struct EndOfLineExpr; 32 | struct LookAheadExpr; 33 | 34 | using RegExpr = std::variant; 35 | 36 | struct LookAheadExpr { 37 | std::unique_ptr left; 38 | std::unique_ptr right; 39 | }; 40 | 41 | struct AlternationExpr { 42 | std::unique_ptr left; 43 | std::unique_ptr right; 44 | }; 45 | 46 | struct ConcatenationExpr { 47 | std::unique_ptr left; 48 | std::unique_ptr right; 49 | }; 50 | 51 | struct ClosureExpr { 52 | std::unique_ptr subExpr; 53 | unsigned minimumOccurrences {0}; 54 | unsigned maximumOccurrences {std::numeric_limits::max()}; 55 | }; 56 | 57 | struct CharacterExpr { 58 | Symbol value; 59 | }; 60 | 61 | struct CharacterClassExpr { 62 | SymbolSet symbols; 63 | }; 64 | 65 | struct DotExpr {}; 66 | struct BeginOfLineExpr {}; 67 | struct EndOfLineExpr {}; 68 | struct EndOfFileExpr {}; 69 | struct EmptyExpr {}; 70 | 71 | std::string to_string(const RegExpr& regex); 72 | int precedence(const RegExpr& regex); 73 | bool containsBeginOfLine(const RegExpr& regex); 74 | 75 | } // namespace klex::regular 76 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # vim:ts=2:sw=2:et 2 | version: 2 3 | 4 | # ----------------------------------------------------------------------------------------------- 5 | defaults: 6 | 7 | - workflow_trigger_on_tags: &workflow_trigger_on_tags 8 | filters: 9 | tags: 10 | only: /.*/ 11 | 12 | - run_prepare: &run_prepare 13 | name: Prepare 14 | command: | 15 | set -ex 16 | apt-get -q update 17 | apt-get -qy install cmake clang++-8 18 | 19 | - run_build: &run_build 20 | name: Build 21 | command: | 22 | set -ex 23 | mkdir -p build 24 | cd build 25 | cmake .. -G "Unix Makefiles" \ 26 | -DCMAKE_CXX_COMPILER=$CMAKE_CXX_COMPILER \ 27 | -DCMAKE_C_COMPILER=$CMAKE_C_COMPILER \ 28 | -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \ 29 | $CMAKE_OPTIONS 30 | make -j3 31 | 32 | - run_test: &run_test 33 | name: klex_test 34 | command: ./build/klex_test 35 | 36 | - run_git_sm_init: &run_git_sm_init 37 | name: git submodule init 38 | command: git submodule update --init 39 | 40 | # ----------------------------------------------------------------------------------------------- 41 | jobs: 42 | 43 | build_ubuntu1904_gcc: 44 | docker: 45 | - image: buildpack-deps:disco 46 | environment: 47 | CMAKE_BUILD_TYPE: "Release" 48 | CMAKE_C_COMPILER: "gcc-8" 49 | CMAKE_CXX_COMPILER: "g++-8" 50 | steps: 51 | - checkout 52 | - run: *run_git_sm_init 53 | - run: *run_prepare 54 | - run: *run_build 55 | - run: *run_test 56 | - persist_to_workspace: 57 | root: build 58 | paths: 59 | - "*" 60 | 61 | build_ubuntu1904_clang: 62 | docker: 63 | - image: buildpack-deps:disco 64 | environment: 65 | CMAKE_BUILD_TYPE: "Release" 66 | CMAKE_C_COMPILER: "clang-8" 67 | CMAKE_CXX_COMPILER: "clang++-8" 68 | steps: 69 | - checkout 70 | - run: *run_git_sm_init 71 | - run: *run_prepare 72 | - run: *run_build 73 | - run: *run_test 74 | - persist_to_workspace: 75 | root: build 76 | paths: 77 | - "*" 78 | 79 | workflows: 80 | version: 2 81 | 82 | build_and_test: 83 | jobs: 84 | - build_ubuntu1904_gcc: *workflow_trigger_on_tags 85 | - build_ubuntu1904_clang: *workflow_trigger_on_tags 86 | -------------------------------------------------------------------------------- /klex2flex.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | # This file is part of the "klex" project, http://github.com/christianparpart/klex> 3 | # (c) 2018 Christian Parpart 4 | # 5 | # Licensed under the MIT License (the "License"); you may not use this 6 | # file except in compliance with the License. You may obtain a copy of 7 | # the License at: http://opensource.org/licenses/MIT 8 | 9 | set -e 10 | 11 | klex_file="$1" 12 | lex_file="out.lex" 13 | table_file="table.cc" 14 | token_file="token.h" 15 | typeName="Token" 16 | 17 | echo klex file: ${klex_file} 18 | echo table file: ${table_file} 19 | echo token file: ${token_file} 20 | 21 | generate_token_file() { 22 | awk >${token_file} <"${klex_file}" -f <(echo ' 23 | BEGIN { 24 | rule_nr = 0; 25 | printf("#pragma once\n\n"); 26 | printf("#include // abort()\n"); 27 | printf("#include \n\n"); 28 | printf("enum class Token {\n"); 29 | } 30 | 31 | match($0, /^(\w+)\(ignore\)\s*::=\s*(.*)$/, rule) { 32 | } 33 | 34 | match($0, /^(\w+)\s*::=\s*(.*)$/, rule) { 35 | name = rule[1]; 36 | pattern = rule[2]; 37 | rule_nr++; 38 | printf(" %-20s = %4s, // %s\n", name, rule_nr, pattern); 39 | } 40 | 41 | END { 42 | printf("};\n\n"); # end enum 43 | } 44 | ') 45 | 46 | awk >>${token_file} <"${klex_file}" -f <(echo " 47 | BEGIN { 48 | printf(\"inline constexpr std::string_view to_string(${typeName} t) {\n\"); 49 | printf(\" switch (t) { \n\"); 50 | } 51 | match(\$0, /^(\w+)\s*::=\s*(.*)$/, rule) { 52 | name = rule[1]; 53 | printf(\" case ${typeName}::%s: return \\\"%s\\\";\n\", name, name); 54 | } 55 | END { 56 | printf(\" default: abort();\n\"); 57 | printf(\" }\n\"); 58 | printf(\"}\n\"); 59 | } 60 | ") 61 | } 62 | 63 | generate_table_file() { 64 | awk >${lex_file} <"${klex_file}" -f <(echo ' 65 | BEGIN { 66 | rule_nr = 0; 67 | printf("%%%%\n"); 68 | printf("%%option noyywrap\n"); 69 | } 70 | 71 | match($0, /^(\w+)\(ignore\)\s*::=\s*(.*)$/, rule) { 72 | name = rule[1]; 73 | pattern = rule[2]; 74 | printf("%-40s { /* %s */ }\n", pattern, name); 75 | } 76 | 77 | match($0, /^(\w+)\s*::=\s*(.*)$/, rule) { 78 | name = rule[1]; 79 | pattern = rule[2]; 80 | rule_nr++; 81 | printf("%-40s { return %d; /* %s */ }\n", pattern, rule_nr, name); 82 | }') 83 | } 84 | 85 | generate_table_file 86 | generate_token_file 87 | 88 | flex -t ${lex_file} >${table_file} 89 | -------------------------------------------------------------------------------- /src/klex/regular/DotWriter.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace klex::regular { 21 | 22 | class DotWriter : public DotVisitor { 23 | public: 24 | DotWriter(std::ostream& os, std::string stateLabelPrefix) 25 | : ownedStream_{}, 26 | stream_{os}, 27 | stateLabelPrefix_{stateLabelPrefix}, 28 | transitionGroups_{}, 29 | initialStates_{nullptr}, 30 | initialState_{0} 31 | { 32 | } 33 | 34 | DotWriter(const std::string& filename, std::string stateLabelPrefix) 35 | : ownedStream_{std::make_unique(filename)}, 36 | stream_{*ownedStream_.get()}, 37 | stateLabelPrefix_{stateLabelPrefix}, 38 | transitionGroups_{}, 39 | initialStates_{nullptr}, 40 | initialState_{0} 41 | { 42 | } 43 | 44 | DotWriter(std::ostream& os, std::string stateLabelPrefix, const MultiDFA::InitialStateMap& initialStates) 45 | : ownedStream_{}, 46 | stream_{os}, 47 | stateLabelPrefix_{stateLabelPrefix}, 48 | transitionGroups_{}, 49 | initialStates_{&initialStates}, 50 | initialState_{0} 51 | { 52 | } 53 | 54 | DotWriter(const std::string& filename, std::string stateLabelPrefix, 55 | const MultiDFA::InitialStateMap& initialStates) 56 | : ownedStream_{std::make_unique(filename)}, 57 | stream_{*ownedStream_.get()}, 58 | stateLabelPrefix_{stateLabelPrefix}, 59 | transitionGroups_{}, 60 | initialStates_{&initialStates}, 61 | initialState_{0} 62 | { 63 | } 64 | 65 | public: 66 | void start(StateId initialState) override; 67 | void visitNode(StateId number, bool start, bool accept) override; 68 | void visitEdge(StateId from, StateId to, Symbol s) override; 69 | void endVisitEdge(StateId from, StateId to) override; 70 | void end() override; 71 | 72 | private: 73 | std::unique_ptr ownedStream_; 74 | std::ostream& stream_; 75 | std::string stateLabelPrefix_; 76 | std::map /*transition symbols*/> transitionGroups_; 77 | const MultiDFA::InitialStateMap* initialStates_; 78 | StateId initialState_; 79 | }; 80 | 81 | } // namespace klex::regular 82 | -------------------------------------------------------------------------------- /src/klex/regular/LexerDef.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace klex::regular { 16 | 17 | // special tags 18 | constexpr Tag IgnoreTag = static_cast(-1); 19 | constexpr Tag FirstUserTag = 1; 20 | 21 | using AcceptStateMap = std::map; 22 | 23 | //! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream to. 24 | using BacktrackingMap = std::map; 25 | 26 | struct LexerDef { 27 | std::map initialStates; 28 | bool containsBeginOfLineStates; 29 | TransitionMap transitions; 30 | AcceptStateMap acceptStates; 31 | BacktrackingMap backtrackingStates; 32 | std::map tagNames; 33 | 34 | std::string to_string() const; 35 | 36 | bool isValidTag(Tag t) const noexcept { 37 | return tagNames.find(t) != tagNames.end(); 38 | } 39 | 40 | std::string tagName(Tag t) const { 41 | auto i = tagNames.find(t); 42 | assert(i != tagNames.end()); 43 | return i->second; 44 | } 45 | }; 46 | 47 | inline std::string LexerDef::to_string() const { 48 | std::stringstream sstr; 49 | 50 | sstr << fmt::format("initializerStates:\n"); 51 | for (const std::pair q0 : initialStates) 52 | sstr << fmt::format(" {}: {}\n", q0.first, q0.second); 53 | sstr << fmt::format("totalStates: {}\n", transitions.states().size()); 54 | 55 | sstr << "transitions:\n"; 56 | for (StateId inputState : transitions.states()) { 57 | std::map> T; 58 | for (const std::pair p : transitions.map(inputState)) { 59 | T[p.second].push_back(p.first); 60 | } 61 | for (auto& t : T) { 62 | sstr << fmt::format("- n{} --({})--> n{}\n", inputState, groupCharacterClassRanges(std::move(t.second)), t.first); 63 | } 64 | } 65 | 66 | sstr << "accepts:\n"; 67 | for (const std::pair a : acceptStates) 68 | sstr << fmt::format("- n{} to {} ({})\n", a.first, a.second, tagName(a.second)); 69 | 70 | if (!backtrackingStates.empty()) { 71 | sstr << "backtracking:\n"; 72 | for (const std::pair bt : backtrackingStates) 73 | sstr << fmt::format("- n{} to n{}\n", bt.first, bt.second); 74 | } 75 | 76 | return sstr.str(); 77 | } 78 | 79 | } // namespace klex::regular 80 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarLexer_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | using namespace klex; 14 | using namespace klex::util::literals; 15 | 16 | using cfg::Grammar; 17 | using cfg::GrammarLexer; 18 | 19 | TEST(cfg_GrammarLexer, literals) 20 | { 21 | GrammarLexer lexer(R"('1' '23' '456' "789")"); 22 | 23 | ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize()); 24 | ASSERT_EQ("1", lexer.currentLiteral()); 25 | 26 | ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize()); 27 | ASSERT_EQ("23", lexer.currentLiteral()); 28 | 29 | ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize()); 30 | ASSERT_EQ("456", lexer.currentLiteral()); 31 | 32 | ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize()); 33 | ASSERT_EQ("789", lexer.currentLiteral()); 34 | 35 | ASSERT_EQ(GrammarLexer::Token::Eof, lexer.recognize()); 36 | } 37 | 38 | TEST(cfg_GrammarLexer, tokenization) 39 | { 40 | GrammarLexer lexer(R"(: 41 | :Expr ::= Expr '+' Term {addExpr} 42 | : | Expr '-' Term {subExpr} 43 | : ; 44 | :)"_multiline); 45 | 46 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 47 | ASSERT_EQ(GrammarLexer::Token::Assoc, lexer.recognize()); 48 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 49 | ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize()); 50 | ASSERT_EQ("+", lexer.currentLiteral()); 51 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 52 | ASSERT_EQ(GrammarLexer::Token::SetOpen, lexer.recognize()); 53 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 54 | ASSERT_EQ(GrammarLexer::Token::SetClose, lexer.recognize()); 55 | 56 | ASSERT_EQ(GrammarLexer::Token::Or, lexer.recognize()); 57 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 58 | ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize()); 59 | ASSERT_EQ("-", lexer.currentLiteral()); 60 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 61 | ASSERT_EQ(GrammarLexer::Token::SetOpen, lexer.recognize()); 62 | ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize()); 63 | ASSERT_EQ(GrammarLexer::Token::SetClose, lexer.recognize()); 64 | 65 | ASSERT_EQ(GrammarLexer::Token::Semicolon, lexer.recognize()); 66 | ASSERT_EQ(GrammarLexer::Token::Eof, lexer.recognize()); 67 | } 68 | 69 | // vim:ts=4:sw=4:noet 70 | -------------------------------------------------------------------------------- /src/klex/regular/NFA_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | using namespace klex::regular; 15 | 16 | TEST(regular_NFA, emptyCtor) 17 | { 18 | const NFA nfa; 19 | ASSERT_EQ(0, nfa.size()); 20 | ASSERT_TRUE(nfa.empty()); 21 | } 22 | 23 | TEST(regular_NFA, characterCtor) 24 | { 25 | const NFA nfa { 'a' }; 26 | ASSERT_EQ(2, nfa.size()); 27 | ASSERT_EQ(0, nfa.initialStateId()); 28 | ASSERT_EQ(1, nfa.acceptStateId()); 29 | ASSERT_EQ(StateIdVec { 1 }, nfa.delta(StateIdVec { 0 }, 'a')); 30 | } 31 | 32 | TEST(regular_NFA, concatenate) 33 | { 34 | const NFA ab = move(NFA { 'a' }.concatenate(NFA { 'b' })); 35 | ASSERT_EQ(4, ab.size()); 36 | ASSERT_EQ(0, ab.initialStateId()); 37 | ASSERT_EQ(3, ab.acceptStateId()); 38 | 39 | // TODO: check ab.initial == A.initial 40 | // TODO: check A.accept == B.initial 41 | // TODO: check ab.accept == B.accept 42 | } 43 | 44 | TEST(regular_NFA, alternate) 45 | { 46 | const NFA ab = move(NFA { 'a' }.alternate(NFA { 'b' })); 47 | ASSERT_EQ(6, ab.size()); 48 | ASSERT_EQ(2, ab.initialStateId()); 49 | ASSERT_EQ(3, ab.acceptStateId()); 50 | 51 | // TODO: check acceptState transitions to A and B 52 | // TODO: check A and B's outgoing edges to final acceptState 53 | } 54 | 55 | TEST(regular_NFA, epsilonClosure) 56 | { 57 | const NFA nfa { 'a' }; 58 | ASSERT_EQ(0, nfa.initialStateId()); 59 | ASSERT_EQ(1, nfa.acceptStateId()); 60 | ASSERT_EQ(StateIdVec { 0 }, nfa.epsilonClosure(StateIdVec { 0 })); 61 | 62 | const NFA abc = move(NFA { 'a' }.concatenate(move(NFA { 'b' }.alternate(NFA { 'c' }).recurring()))); 63 | ASSERT_EQ(StateIdVec { 0 }, abc.epsilonClosure(StateIdVec { 0 })); 64 | 65 | const StateIdVec e1 { 1, 2, 4, 6, 8, 9 }; 66 | ASSERT_EQ(e1, abc.epsilonClosure(StateIdVec { 1 })); 67 | } 68 | 69 | TEST(regular_NFA, delta) 70 | { 71 | const NFA nfa { 'a' }; 72 | ASSERT_EQ(0, nfa.initialStateId()); 73 | ASSERT_EQ(1, nfa.acceptStateId()); 74 | ASSERT_EQ(StateIdVec { 1 }, nfa.delta(StateIdVec { 0 }, 'a')); 75 | } 76 | 77 | TEST(regular_NFA, alphabet) 78 | { 79 | ASSERT_EQ("{}", NFA {}.alphabet().to_string()); 80 | ASSERT_EQ("{a}", NFA { 'a' }.alphabet().to_string()); 81 | ASSERT_EQ("{ab}", NFA { 'a' }.concatenate(NFA { 'b' }).alphabet().to_string()); 82 | ASSERT_EQ("{abc}", NFA { 'a' }.concatenate(NFA { 'b' }).alternate(NFA { 'c' }).alphabet().to_string()); 83 | } 84 | -------------------------------------------------------------------------------- /src/klex/regular/Symbols_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0> 2 | // (c) 2009-2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | using klex::regular::SymbolSet; 13 | 14 | TEST(regular_SymbolSet, s0) 15 | { 16 | SymbolSet s0; 17 | ASSERT_EQ(0, s0.size()); 18 | ASSERT_TRUE(s0.empty()); 19 | } 20 | 21 | TEST(regular_SymbolSet, s1) 22 | { 23 | SymbolSet s1; 24 | 25 | // first add 26 | s1.insert('a'); 27 | ASSERT_EQ(1, s1.size()); 28 | ASSERT_FALSE(s1.empty()); 29 | 30 | // overwrite 31 | s1.insert('a'); 32 | ASSERT_EQ(1, s1.size()); 33 | ASSERT_FALSE(s1.empty()); 34 | } 35 | 36 | TEST(regular_SymbolSet, initializer_list) 37 | { 38 | SymbolSet a { 'a' }; 39 | EXPECT_EQ(1, a.size()); 40 | EXPECT_TRUE(a.contains('a')); 41 | 42 | SymbolSet s2 { 'a', 'b', 'b', 'c' }; 43 | EXPECT_EQ(3, s2.size()); 44 | EXPECT_EQ("abc", s2.to_string()); 45 | } 46 | 47 | TEST(regular_SymbolSet, dot) 48 | { 49 | SymbolSet dot(SymbolSet::Dot); 50 | EXPECT_FALSE(dot.contains('\n')); 51 | EXPECT_TRUE(dot.contains('\0')); 52 | EXPECT_TRUE(dot.contains(' ')); 53 | EXPECT_TRUE(dot.isDot()); 54 | EXPECT_EQ(".", dot.to_string()); 55 | } 56 | 57 | TEST(regular_SymbolSet, complement) 58 | { 59 | SymbolSet s; 60 | s.insert('\n'); 61 | EXPECT_EQ("\\n", s.to_string()); 62 | s.complement(); 63 | EXPECT_EQ(".", s.to_string()); 64 | } 65 | 66 | TEST(regular_SymbolSet, range) 67 | { 68 | SymbolSet r; 69 | r.insert(make_pair('a', 'f')); 70 | 71 | EXPECT_EQ(6, r.size()); 72 | EXPECT_EQ("a-f", r.to_string()); 73 | 74 | r.insert(make_pair('0', '9')); 75 | EXPECT_EQ(16, r.size()); 76 | EXPECT_EQ("0-9a-f", r.to_string()); 77 | } 78 | 79 | TEST(regular_SymbolSet, fmt_format) 80 | { 81 | SymbolSet s; 82 | s.insert(make_pair('0', '9')); 83 | s.insert(make_pair('a', 'f')); 84 | 85 | EXPECT_EQ("0-9a-f", fmt::format("{}", s)); 86 | } 87 | 88 | TEST(regular_SymbolSet, hash_map) 89 | { 90 | SymbolSet s0; 91 | SymbolSet s1 { 'a' }; 92 | SymbolSet s2 { 'a', 'b' }; 93 | 94 | unordered_map map; 95 | map[s0] = 0; 96 | map[s1] = 1; 97 | map[s2] = 2; 98 | 99 | EXPECT_EQ(0, map[s0]); 100 | EXPECT_EQ(1, map[s1]); 101 | EXPECT_EQ(2, map[s2]); 102 | } 103 | 104 | TEST(regular_SymbolSet, compare) 105 | { 106 | SymbolSet s1 { 'a', 'b' }; 107 | SymbolSet s2 { 'a', 'b' }; 108 | SymbolSet s3 { 'a', 'c' }; 109 | ASSERT_TRUE(s1 == s2); 110 | ASSERT_TRUE(s1 != s3); 111 | } 112 | -------------------------------------------------------------------------------- /src/klex/cfg/ll/README.md: -------------------------------------------------------------------------------- 1 | 2 | # LL(1) Syntax Analyzer 3 | 4 | ## Motivations 5 | 6 | - Have a convenience-first API for generating and analyzing context free grammars (of type LL(1) and LL(k)). 7 | - Rule rewriting to solve various conflicts or improve power & convenience of the input grammar: 8 | - Must solve left-recursion by rewriting into right-recursive rules. 9 | - Must rewrite iterations into set of right-recursive rules. 10 | - Must support epsilon rules. 11 | - Keep C++20's constexpr changes in mind to allow early adoption of compile-time table constructions. 12 | 13 | ## klax-Grammar File Format 14 | 15 | ``` 16 | Start ::= ExplicitTokenGroup? GrammarRule+ 17 | ExplicitTokenGroup ::= 'token' '{' KLEX_TOKEN_GRAMMAR* '}' 18 | GrammarRule ::= NonTerminal '::=' Handle ('|' Handle)* ';' 19 | NonTerminal ::= _*[A-Z][a-zA-Z0-9_]* 20 | Terminal ::= _*[a-z][A-Za-z0-9_]* 21 | | "'" ... "'" 22 | | '([^'\n]|\\\\')*'|\"([^\"\n]|\\\")*\" 23 | Handle ::= (Terminal | NonTerminal)* 24 | ``` 25 | 26 | ## klax example files 27 | 28 | ### Expression-Term-Factor 29 | 30 | ``` 31 | token { 32 | Spacing(ignore) ::= [\s\t\n]+ 33 | Number ::= 0|[1-9][0-9]* 34 | Ident ::= [a-z]+ 35 | Eof ::= <> 36 | } 37 | 38 | # NTS ::= HANDLES {ACTION_LABELS} 39 | 40 | Start ::= Expr Eof {expr} 41 | Expr ::= Expr '+' Term {addExpr} 42 | | Expr '-' Term {subExpr} 43 | | Term 44 | ; 45 | Term ::= Term '*' Factor {mulExpr} 46 | | Term '/' Factor {divExpr} 47 | | Factor 48 | ; 49 | Factor ::= Number {numberLiteral} 50 | | Ident {variable} 51 | | '(' Expr ')' 52 | ; 53 | ``` 54 | 55 | ```cpp 56 | using namespace std; 57 | 58 | klex::ll::Def pd = klex::ll::Compiler{ETF_RULES}.compile(); 59 | klex::ll::Analyzer parser{ pd, "2 + 3 * (10 - 6)" }; 60 | parser.action("numberLiteral", [](auto& args) { return stoi(args.literal(1)); }) 61 | .action("mulExpr", [](auto const& args) { return args(1) * args(2); }) 62 | .action("divExpr", [](auto const& args) { return args(1) / args(2); }) 63 | .action("addExpr", [](auto const& args) { return args(1) + args(2); }) 64 | .action("subExpr", [](auto const& args) { return args(1) - args(2); }); 65 | unique_ptr expr = parser.analyze(); 66 | ``` 67 | 68 | The parse-table generator needs to rewrite the left-recursion into right-recursion to make 69 | the grammar LL(1) compatible. 70 | 71 | # Random Brainstorming Thoughts 72 | 73 | ``` 74 | # should be supportable 75 | AddExpr ::= MulExpr ('+' MulExpr)* 76 | 77 | # and automatically rewritten into right-most derivative grammar 78 | AddExpr ::= MulExpr 79 | | MulExpr '+' AddExpr 80 | | MulExpr '-' AddExpr 81 | 82 | A -> aX*b 83 | into 84 | A -> ab 85 | | aX'b 86 | X' -> X X'? 87 | ``` 88 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Microsoft 3 | AccessModifierOffset: '-2' 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveMacros: 'true' 6 | AlignConsecutiveDeclarations: 'false' 7 | AlignEscapedNewlines: Left 8 | AlignOperands: 'true' 9 | AlignTrailingComments: 'true' 10 | AllowAllArgumentsOnNextLine: 'true' 11 | AllowAllConstructorInitializersOnNextLine: 'true' 12 | AllowAllParametersOfDeclarationOnNextLine: 'true' 13 | AllowShortBlocksOnASingleLine: 'false' 14 | AllowShortCaseLabelsOnASingleLine: 'true' 15 | AllowShortFunctionsOnASingleLine: InlineOnly 16 | AllowShortIfStatementsOnASingleLine: Never 17 | AllowShortLambdasOnASingleLine: Inline 18 | AllowShortLoopsOnASingleLine: 'false' 19 | AlwaysBreakAfterReturnType: None 20 | AlwaysBreakBeforeMultilineStrings: 'false' 21 | AlwaysBreakTemplateDeclarations: 'Yes' 22 | BinPackArguments: 'false' 23 | BinPackParameters: 'false' 24 | BreakBeforeBinaryOperators: NonAssignment 25 | BreakBeforeBraces: Custom 26 | BreakBeforeTernaryOperators: 'true' 27 | BreakConstructorInitializers: AfterColon 28 | BreakInheritanceList: AfterColon 29 | BreakStringLiterals: 'true' 30 | ColumnLimit: '110' 31 | CompactNamespaces: 'false' 32 | ConstructorInitializerAllOnOneLineOrOnePerLine: 'true' 33 | ConstructorInitializerIndentWidth: '4' 34 | ContinuationIndentWidth: '4' 35 | Cpp11BracedListStyle: 'false' 36 | DerivePointerAlignment: 'false' 37 | FixNamespaceComments: 'true' 38 | IncludeBlocks: Regroup 39 | IndentCaseLabels: true 40 | IndentPPDirectives: BeforeHash 41 | IndentWidth: '4' 42 | IndentWrappedFunctionNames: 'false' 43 | Language: Cpp 44 | MaxEmptyLinesToKeep: '1' 45 | NamespaceIndentation: Inner 46 | PenaltyBreakAssignment: '0' 47 | PointerAlignment: Left 48 | ReflowComments: 'true' 49 | SortIncludes: 'true' 50 | SortUsingDeclarations: 'true' 51 | SpaceAfterCStyleCast: 'true' 52 | SpaceAfterLogicalNot: 'false' 53 | SpaceAfterTemplateKeyword: 'true' 54 | SpaceBeforeAssignmentOperators: 'true' 55 | SpaceBeforeCpp11BracedList: 'true' 56 | SpaceBeforeCtorInitializerColon: 'false' 57 | SpaceBeforeInheritanceColon: 'false' 58 | SpaceBeforeParens: ControlStatements 59 | SpaceBeforeRangeBasedForLoopColon: 'false' 60 | SpaceInEmptyParentheses: 'false' 61 | SpacesInAngles: 'false' 62 | SpacesInCStyleCastParentheses: 'false' 63 | SpacesInContainerLiterals: 'false' 64 | SpacesInParentheses: 'false' 65 | SpacesInSquareBrackets: 'false' 66 | Standard: Cpp11 67 | TabWidth: '4' 68 | UseTab: Never 69 | IncludeCategories: 70 | - Regex: '^<(klex)/' 71 | Priority: 0 72 | - Regex: '^<(crispy)/' 73 | Priority: 4 74 | - Regex: '^<(unicode)/' 75 | Priority: 5 76 | - Regex: '^<(fmt)/' 77 | Priority: 6 78 | - Regex: '^<(yaml-cpp)/' 79 | Priority: 7 80 | - Regex: '^<(range)/' 81 | Priority: 8 82 | - Regex: '^' 89 | Priority: 41 90 | - Regex: '<[[:alnum:]_]+\.h>' 91 | Priority: 42 92 | - Regex: '.*' 93 | Priority: 99 94 | -------------------------------------------------------------------------------- /src/klex/regular/RegExprParser.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace klex::regular { 16 | 17 | class SymbolSet; 18 | 19 | class RegExprParser { 20 | public: 21 | RegExprParser(); 22 | 23 | RegExpr parse(std::string_view expr, int line, int column); 24 | 25 | RegExpr parse(std::string_view expr) { return parse(std::move(expr), 1, 1); } 26 | 27 | class UnexpectedToken : public std::runtime_error { 28 | public: 29 | UnexpectedToken(unsigned int line, unsigned int column, std::string actual, std::string expected) 30 | : std::runtime_error{fmt::format("[{}:{}] Unexpected token {}. Expected {} instead.", line, 31 | column, actual, expected)}, 32 | line_{line}, 33 | column_{column}, 34 | actual_{std::move(actual)}, 35 | expected_{std::move(expected)} 36 | { 37 | } 38 | 39 | UnexpectedToken(unsigned int line, unsigned int column, int actual, int expected) 40 | : UnexpectedToken{line, column, 41 | actual == -1 ? "EOF" : fmt::format("{}", static_cast(actual)), 42 | std::string(1, static_cast(expected))} 43 | { 44 | } 45 | 46 | unsigned int line() const noexcept { return line_; } 47 | unsigned int column() const noexcept { return column_; } 48 | const std::string& actual() const noexcept { return actual_; } 49 | const std::string& expected() const noexcept { return expected_; } 50 | 51 | private: 52 | unsigned int line_; 53 | unsigned int column_; 54 | std::string actual_; 55 | std::string expected_; 56 | }; 57 | 58 | private: 59 | int currentChar() const; 60 | bool eof() const noexcept { return currentChar() == -1; } 61 | bool consumeIf(int ch); 62 | void consume(int ch); 63 | int consume(); 64 | unsigned parseInt(); 65 | 66 | RegExpr parse(); // expr 67 | RegExpr parseExpr(); // lookahead 68 | RegExpr parseLookAheadExpr(); // alternation ('/' alternation)? 69 | RegExpr parseAlternation(); // concatenation ('|' concatenation)* 70 | RegExpr parseConcatenation(); // closure (closure)* 71 | RegExpr parseClosure(); // atom ['*' | '?' | '{' NUM [',' NUM] '}'] 72 | RegExpr parseAtom(); // character | characterClass | '(' expr ')' 73 | RegExpr parseCharacterClass(); // '[' characterClassFragment+ ']' 74 | void parseCharacterClassFragment(SymbolSet& ss); // namedClass | character | character '-' character 75 | void parseNamedCharacterClass(SymbolSet& ss); // '[' ':' NAME ':' ']' 76 | Symbol parseSingleCharacter(); 77 | 78 | private: 79 | std::string_view input_; 80 | std::string_view::iterator currentChar_; 81 | unsigned int line_; 82 | unsigned int column_; 83 | }; 84 | 85 | } // namespace klex::regular 86 | -------------------------------------------------------------------------------- /src/klex/util/UnboxedRange.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0> 2 | // (c) 2009-2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace klex::util { 12 | 13 | template 14 | class UnboxedRange { 15 | public: 16 | using BoxedContainer = T; 17 | using BoxedIterator = typename BoxedContainer::iterator; 18 | using element_type = typename BoxedContainer::value_type::element_type; 19 | 20 | class iterator { // {{{ 21 | public: 22 | typedef typename BoxedContainer::iterator::difference_type difference_type; 23 | typedef typename BoxedContainer::iterator::value_type::element_type value_type; 24 | typedef typename BoxedContainer::iterator::value_type::element_type* pointer; 25 | typedef typename BoxedContainer::iterator::value_type::element_type& reference; 26 | typedef typename BoxedContainer::iterator::iterator_category iterator_category; 27 | 28 | explicit iterator(BoxedIterator boxed) : it_(boxed) {} 29 | 30 | const element_type& operator->() const { return **it_; } 31 | element_type& operator->() { return **it_; } 32 | 33 | const element_type* operator*() const { return (*it_).get(); } 34 | element_type* operator*() { return (*it_).get(); } 35 | 36 | iterator& operator++() 37 | { 38 | ++it_; 39 | return *this; 40 | } 41 | iterator& operator++(int) 42 | { 43 | ++it_; 44 | return *this; 45 | } 46 | 47 | bool operator==(const iterator& other) const { return it_ == other.it_; } 48 | bool operator!=(const iterator& other) const { return it_ != other.it_; } 49 | 50 | private: 51 | BoxedIterator it_; 52 | }; // }}} 53 | 54 | UnboxedRange(BoxedIterator begin, BoxedIterator end) : begin_(begin), end_(end) {} 55 | explicit UnboxedRange(BoxedContainer& c) : begin_(c.begin()), end_(c.end()) {} 56 | explicit UnboxedRange(const BoxedContainer& c) : UnboxedRange{const_cast(c)} {} 57 | 58 | iterator begin() const { return begin_; } 59 | iterator end() const { return end_; } 60 | iterator cbegin() const { return begin_; } 61 | iterator cend() const { return end_; } 62 | size_t size() const { return std::distance(begin_, end_); } 63 | 64 | private: 65 | iterator begin_; 66 | iterator end_; 67 | }; 68 | 69 | /** 70 | * Unboxes boxed element types in containers. 71 | * 72 | * Good examples are: 73 | * 74 | * \code 75 | * std::vector> numbers; 76 | * // ... 77 | * for (int number: unbox(numbers)) { 78 | * // ... juse use number here, instead of number.get() or *number. 79 | * }; 80 | * \endcode 81 | */ 82 | template 83 | UnboxedRange unbox(BoxedContainer& boxedContainer) 84 | { 85 | return UnboxedRange(boxedContainer); 86 | } 87 | 88 | template 89 | UnboxedRange unbox(const BoxedContainer& boxedContainer) 90 | { 91 | return UnboxedRange(boxedContainer); 92 | } 93 | 94 | } // namespace klex::util 95 | -------------------------------------------------------------------------------- /src/klex/Report.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | using namespace klex; 15 | 16 | // {{{ Message 17 | string Report::Message::to_string() const 18 | { 19 | switch (type) 20 | { 21 | case Type::Warning: return fmt::format("[{}] {}", sourceLocation, text); 22 | case Type::LinkError: return fmt::format("{}: {}", type, text); 23 | default: return fmt::format("[{}] {}: {}", sourceLocation, type, text); 24 | } 25 | } 26 | 27 | bool Report::Message::operator==(const Message& other) const noexcept 28 | { 29 | // XXX ignore SourceLocation's filename & end 30 | return type == other.type && sourceLocation.offset == other.sourceLocation.offset && text == other.text; 31 | } 32 | // }}} 33 | // {{{ ConsoleReport 34 | void ConsoleReport::onMessage(Message&& message) 35 | { 36 | switch (message.type) 37 | { 38 | case Type::Warning: cerr << fmt::format("Warning: {}\n", message); break; 39 | default: cerr << fmt::format("Error: {}\n", message); break; 40 | } 41 | } 42 | // }}} 43 | // {{{ BufferedReport 44 | void BufferedReport::onMessage(Message&& msg) 45 | { 46 | messages_.emplace_back(move(msg)); 47 | } 48 | 49 | void BufferedReport::clear() 50 | { 51 | messages_.clear(); 52 | } 53 | 54 | string BufferedReport::to_string() const 55 | { 56 | stringstream sstr; 57 | for (const Message& message: messages_) 58 | { 59 | switch (message.type) 60 | { 61 | case Type::Warning: sstr << "Warning: " << message.to_string() << "\n"; break; 62 | default: sstr << "Error: " << message.to_string() << "\n"; break; 63 | } 64 | } 65 | return sstr.str(); 66 | } 67 | 68 | bool BufferedReport::operator==(const BufferedReport& other) const noexcept 69 | { 70 | if (size() != other.size()) 71 | return false; 72 | 73 | for (size_t i = 0, e = size(); i != e; ++i) 74 | if (messages_[i] != other.messages_[i]) 75 | return false; 76 | 77 | return true; 78 | } 79 | 80 | bool BufferedReport::contains(const Message& message) const noexcept 81 | { 82 | for (const Message& m: messages_) 83 | if (m == message) 84 | return true; 85 | 86 | return false; 87 | } 88 | 89 | DifferenceReport difference(const BufferedReport& first, const BufferedReport& second) 90 | { 91 | DifferenceReport diff; 92 | 93 | for (const Report::Message& m: first) 94 | if (!second.contains(m)) 95 | diff.first.push_back(m); 96 | 97 | for (const Report::Message& m: second) 98 | if (!first.contains(m)) 99 | diff.second.push_back(m); 100 | 101 | return diff; 102 | } 103 | 104 | ostream& operator<<(ostream& os, const BufferedReport& report) 105 | { 106 | os << report.to_string(); 107 | return os; 108 | } 109 | // }}} 110 | -------------------------------------------------------------------------------- /src/klex/util/iterator.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace klex::util { 19 | 20 | template 21 | inline auto reversed(Container&& c) 22 | { 23 | if constexpr (std::is_reference::value) 24 | return detail::reversed{std::forward(c)}; 25 | else 26 | return detail::reversed{std::forward(c)}; 27 | } 28 | 29 | template 30 | inline auto indexed(const Container& c) 31 | { 32 | return typename std::add_const>::type{c}; 33 | } 34 | 35 | template 36 | inline auto indexed(Container& c) 37 | { 38 | return detail::indexed{c}; 39 | } 40 | 41 | template 42 | inline auto translate(const Container& container, Lambda mapfn) { 43 | using namespace std; 44 | using T = decltype(mapfn(*begin(container))); 45 | 46 | vector out; 47 | out.reserve(distance(begin(container), end(container))); 48 | transform(begin(container), end(container), back_inserter(out), move(mapfn)); 49 | 50 | return out; 51 | } 52 | 53 | template 54 | inline std::string join(const Container& container, const std::string& separator = ", ") 55 | { 56 | std::stringstream out; 57 | 58 | for (const auto&& [i, v] : indexed(container)) 59 | if (i) 60 | out << separator << v; 61 | else 62 | out << v; 63 | 64 | return out.str(); 65 | } 66 | 67 | template 68 | inline auto filter(std::initializer_list&& c, Lambda proc) 69 | { 70 | return typename std::add_const, Lambda>>::type{c, proc}; 71 | } 72 | 73 | template 74 | inline auto filter(const Container& c, Lambda proc) 75 | { 76 | return typename std::add_const>::type{c, proc}; 77 | } 78 | 79 | template 80 | inline auto filter(Container& c, Lambda proc) 81 | { 82 | return detail::filter{c, proc}; 83 | } 84 | 85 | /** 86 | * Finds the last occurence of a given element satisfying @p test. 87 | * 88 | * @returns the iterator representing the last item satisfying @p test or @p end if none found. 89 | */ 90 | template 91 | auto find_last(const Container& container, Test test) -> decltype(std::cbegin(container)) 92 | { 93 | auto begin = std::cbegin(container); 94 | auto end = std::cend(container); 95 | 96 | for (auto i = std::prev(end); i != begin; --i) 97 | if (test(*i)) 98 | return i; 99 | 100 | if (test(*begin)) 101 | return begin; 102 | else 103 | return end; 104 | } 105 | 106 | } // namespace klex::util 107 | -------------------------------------------------------------------------------- /src/klex/cfg/ll/SyntaxTable.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace klex::cfg { 20 | struct Grammar; 21 | } 22 | 23 | namespace klex::cfg::ll { 24 | 25 | // using Symbol = int; 26 | // using Handle = std::vector; 27 | 28 | /** LL(1)-compatible syntax table. 29 | */ 30 | struct SyntaxTable { 31 | using Expression = std::vector; // non-terminals & terminals 32 | using LookAheadMap = std::unordered_map; 33 | using NonTerminalMap = std::unordered_map; 34 | using ProductionVec = std::vector; 35 | 36 | std::vector names; 37 | std::vector terminalNames; 38 | std::vector nonterminalNames; 39 | std::vector actionNames; 40 | std::vector productionNames; 41 | ProductionVec productions; 42 | NonTerminalMap table; 43 | int startSymbol; 44 | regular::LexerDef lexerDef; 45 | 46 | int actionId(const std::string& name) const 47 | { 48 | return actionMin() + std::distance(std::begin(actionNames), 49 | std::find_if(std::begin(actionNames), std::end(actionNames), 50 | [&](const std::string& n) { return n == name; })); 51 | } 52 | 53 | std::optional lookup(int nonterminal, int lookahead) const; 54 | 55 | size_t nonterminalCount() const noexcept { return nonterminalNames.size(); } 56 | size_t terminalCount() const noexcept { return terminalNames.size(); } 57 | 58 | int nonterminalMin() const noexcept { return 0; } 59 | int nonterminalMax() const noexcept 60 | { 61 | return nonterminalMin() + static_cast(nonterminalNames.size()) - 1; 62 | } 63 | 64 | int terminalMin() const noexcept { return nonterminalMax() + 1; } 65 | int terminalMax() const noexcept { return terminalMin() + static_cast(terminalNames.size()) - 1; } 66 | 67 | int actionMin() const noexcept { return terminalMax() + 1; } 68 | int actionMax() const noexcept { return actionMin() + static_cast(actionNames.size()) - 1; } 69 | 70 | bool isNonTerminal(int id) const noexcept { return id >= nonterminalMin() && id <= nonterminalMax(); } 71 | bool isTerminal(int id) const noexcept { return id >= terminalMin() && id <= terminalMax(); } 72 | bool isAction(int id) const noexcept { return id >= actionMin() && id <= actionMax(); } 73 | 74 | const std::string& terminalName(int s) const noexcept { return terminalNames[s - terminalMin()]; } 75 | 76 | const std::string& nonterminalName(int s) const noexcept 77 | { 78 | return nonterminalNames[s - nonterminalMin()]; 79 | } 80 | 81 | const std::string& actionName(int s) const noexcept { return actionNames[s - actionMin()]; } 82 | 83 | std::string dump(const Grammar& grammar) const; 84 | 85 | static SyntaxTable construct(const Grammar& grammar); 86 | }; 87 | 88 | } // namespace klex::cfg::ll 89 | 90 | // vim:ts=4:sw=4:noet 91 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarLexer.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | 13 | namespace klex::cfg { 14 | 15 | class GrammarLexer 16 | { 17 | public: 18 | explicit GrammarLexer(std::string content); 19 | 20 | enum class Token { 21 | Illegal, 22 | Spacing, // [\s\t\n]+ 23 | Identifier, // [a-z][a-z0-9]* 24 | Token, // 'token' 25 | Literal, // '[^']*'|"[^"]*" 26 | Or, // '|' 27 | Semicolon, // ';' 28 | Assoc, // '::=' 29 | SetOpen, // '{' 30 | SetClose, // '}' 31 | Eof, // <> 32 | }; 33 | 34 | [[nodiscard]] bool eof() const noexcept { return offset_ >= content_.size(); } 35 | [[nodiscard]] size_t currentOffset() const { return offset_; } 36 | [[nodiscard]] Token currentToken() const { return currentToken_; } 37 | [[nodiscard]] const std::string& currentLiteral() const noexcept { return currentLiteral_; } 38 | 39 | [[nodiscard]] Token recognize(); 40 | 41 | [[nodiscard]] std::string consumeLiteralUntilLF(); // NB. only used for sub-language (klex) 42 | 43 | private: 44 | Token recognizeOne(); 45 | Token consumeIdentifier(); 46 | Token consumeLiteral(); 47 | [[nodiscard]] int currentChar() const; 48 | [[nodiscard]] int peekChar(size_t offset) const; 49 | int consumeChar(size_t count = 1); 50 | 51 | private: 52 | std::string content_; 53 | size_t offset_; 54 | std::string currentLiteral_; 55 | Token currentToken_; 56 | }; 57 | 58 | inline std::string to_string(klex::cfg::GrammarLexer::Token v) 59 | { 60 | switch (v) 61 | { 62 | case klex::cfg::GrammarLexer::Token::Spacing: 63 | return "Spacing"; 64 | case klex::cfg::GrammarLexer::Token::Identifier: 65 | return "Identifier"; 66 | case klex::cfg::GrammarLexer::Token::Token: 67 | return "Token"; 68 | case klex::cfg::GrammarLexer::Token::Literal: 69 | return "Literal"; 70 | case klex::cfg::GrammarLexer::Token::Or: 71 | return "'|'"; 72 | case klex::cfg::GrammarLexer::Token::Semicolon: 73 | return "';'"; 74 | case klex::cfg::GrammarLexer::Token::Assoc: 75 | return "'::='"; 76 | case klex::cfg::GrammarLexer::Token::SetOpen: 77 | return "'{'"; 78 | case klex::cfg::GrammarLexer::Token::SetClose: 79 | return "'}'"; 80 | case klex::cfg::GrammarLexer::Token::Eof: 81 | return "<>"; 82 | // case klex::cfg::GrammarLexer::Illegal: 83 | default: 84 | return "Illegal"; 85 | } 86 | } 87 | 88 | } // namespace klex::cfg 89 | 90 | namespace fmt { 91 | template <> 92 | struct formatter { 93 | template 94 | constexpr auto parse(ParseContext& ctx) 95 | { 96 | return ctx.begin(); 97 | } 98 | 99 | template 100 | constexpr auto format(const klex::cfg::GrammarLexer::Token& v, FormatContext& ctx) 101 | { 102 | return format_to(ctx.out(), "{}", to_string(v)); 103 | } 104 | }; 105 | } // namespace fmt 106 | 107 | // vim:ts=4:sw=4:noet 108 | -------------------------------------------------------------------------------- /src/klex/cfg/LeftRecursion_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | using namespace klex; 16 | using namespace klex::cfg; 17 | using namespace klex::util::literals; 18 | 19 | Grammar makeGrammar(string G) 20 | { 21 | BufferedReport report; 22 | Grammar grammar = GrammarParser(move(G), &report).parse(); 23 | ASSERT_FALSE(report.containsFailures()); 24 | return grammar; 25 | } 26 | 27 | TEST(cfg_LeftRecursion, isLeftRecursive) 28 | { 29 | BufferedReport report; 30 | 31 | // direct left-recursive 32 | const Grammar grammar = GrammarParser("A ::= A 'b' | 'a';", &report).parse(); 33 | ASSERT_FALSE(report.containsFailures()); 34 | ASSERT_TRUE(isLeftRecursive(grammar)); 35 | 36 | // direct right recursive 37 | const Grammar right = GrammarParser("A ::= 'b' A | 'a';", &report).parse(); 38 | ASSERT_FALSE(report.containsFailures()); 39 | ASSERT_FALSE(isLeftRecursive(right)); 40 | 41 | // neither left nor right 42 | const Grammar neinor = GrammarParser("A ::= 'b' | 'a';", &report).parse(); 43 | ASSERT_FALSE(report.containsFailures()); 44 | ASSERT_FALSE(isLeftRecursive(neinor)); 45 | } 46 | 47 | TEST(cfg_LeftRecursion, simple) 48 | { 49 | ConsoleReport report; 50 | Grammar grammar = GrammarParser(R"(`S ::= A; 51 | `A ::= A 'b' 52 | ` | 'a'; 53 | `)"_multiline, 54 | &report) 55 | .parse(); 56 | 57 | ASSERT_FALSE(report.containsFailures()); 58 | ASSERT_TRUE(isLeftRecursive(grammar)); 59 | 60 | LeftRecursion { grammar }.direct(); 61 | 62 | grammar.finalize(); 63 | logf("grammar: {}", grammar.dump()); 64 | 65 | ASSERT_FALSE(isLeftRecursive(grammar)); 66 | } 67 | 68 | TEST(cfg_LeftRecursion, ETF) 69 | { 70 | BufferedReport report; 71 | Grammar grammar = GrammarParser(R"(`token { 72 | ` Spacing(ignore) ::= [\s\t]+ 73 | ` Number ::= [0-9]+ 74 | `} 75 | ` 76 | `Start ::= Expr; 77 | `Expr ::= Expr '+' Term 78 | ` | Expr '-' Term 79 | ` | Term ; 80 | `Term ::= Term '*' Factor 81 | ` | Term '/' Factor 82 | ` | Factor ; 83 | `Factor ::= '(' Expr ')' 84 | ` | Number 85 | ` ; 86 | `)"_multiline, 87 | &report) 88 | .parse(); 89 | 90 | ASSERT_FALSE(report.containsFailures()); 91 | ASSERT_TRUE(isLeftRecursive(grammar)); 92 | 93 | LeftRecursion { grammar }.direct(); 94 | 95 | grammar.finalize(); 96 | logf("grammar: {}", grammar.dump()); 97 | 98 | ASSERT_FALSE(isLeftRecursive(grammar)); 99 | } 100 | -------------------------------------------------------------------------------- /src/klex/regular/Compiler.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace klex::regular { 22 | 23 | struct MultiDFA; 24 | 25 | /** 26 | * Top-Level API for compiling lexical patterns into table definitions for Lexer. 27 | * 28 | * @see Lexer 29 | */ 30 | class Compiler { 31 | public: 32 | using TagNameMap = std::map; 33 | using OvershadowMap = DFABuilder::OvershadowMap; 34 | using AutomataMap = std::map; 35 | 36 | Compiler() : rules_{}, containsBeginOfLine_{false}, fa_{}, names_{} {} 37 | 38 | /** 39 | * Parses a @p stream of textual rule definitions to construct their internal data structures. 40 | */ 41 | void parse(std::unique_ptr stream); 42 | void parse(std::string text); 43 | 44 | /** 45 | * Parses a list of @p rules to construct their internal data structures. 46 | */ 47 | void declareAll(RuleList rules); 48 | 49 | const RuleList& rules() const noexcept { return rules_; } 50 | const TagNameMap& names() const noexcept { return names_; } 51 | size_t size() const; 52 | 53 | /** 54 | * Compiles all previousely parsed rules into a DFA. 55 | */ 56 | DFA compileDFA(OvershadowMap* overshadows = nullptr); 57 | MultiDFA compileMultiDFA(OvershadowMap* overshadows = nullptr); 58 | 59 | /** 60 | * Compiles all previousely parsed rules into a minimal DFA. 61 | */ 62 | DFA compileMinimalDFA(); 63 | 64 | /** 65 | * Compiles all previousely parsed rules into a suitable data structure for Lexer. 66 | * 67 | * @see Lexer 68 | */ 69 | LexerDef compile(); 70 | 71 | /** 72 | * Compiles all previousely parsed rules into a suitable data structure for Lexer, taking care of 73 | * multiple conditions as well as begin-of-line. 74 | */ 75 | LexerDef compileMulti(OvershadowMap* overshadows = nullptr); 76 | 77 | /** 78 | * Translates the given DFA @p dfa with a given TagNameMap @p names into trivial table mappings. 79 | * 80 | * @see Lexer 81 | */ 82 | static LexerDef generateTables(const DFA& dfa, bool requiresBeginOfLine, const TagNameMap& names); 83 | static LexerDef generateTables(const MultiDFA& dfa, bool requiresBeginOfLine, const TagNameMap& names); 84 | 85 | const std::map& automata() const { return fa_; } 86 | 87 | bool containsBeginOfLine() const noexcept { return containsBeginOfLine_; } 88 | 89 | private: 90 | /** 91 | * Parses a single @p rule to construct their internal data structures. 92 | */ 93 | void declare(const Rule& rule, const std::string& conditionSuffix = ""); 94 | 95 | private: 96 | RuleList rules_; 97 | bool containsBeginOfLine_; 98 | AutomataMap fa_; 99 | TagNameMap names_; 100 | }; 101 | 102 | } // namespace klex::regular 103 | -------------------------------------------------------------------------------- /src/klex/cfg/ll/Analyzer.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace klex::cfg::ll { 21 | 22 | template 23 | class Analyzer { 24 | public: 25 | using Terminal = regular::Tag; // typename regular::Lexer::value_type; 26 | using NonTerminal = int; 27 | using Action = int; 28 | using Lexer = regular::Lexer; 29 | using ActionHandler = std::function&)>; 30 | 31 | struct StateValue { 32 | int value; 33 | operator int() const noexcept { return value; } 34 | StateValue(int _value) : value{_value} {} 35 | }; 36 | 37 | Analyzer(const SyntaxTable& table, Report* report, std::string input, 38 | ActionHandler actionHandler = ActionHandler()); 39 | 40 | [[nodiscard]] const Lexer& lexer() const noexcept { return lexer_; } 41 | [[nodiscard]] const std::string& lastLiteral() const noexcept { return lastLiteral_; } 42 | 43 | [[nodiscard]] const std::string& actionName(int id) const noexcept { return def_.actionNames[id - def_.actionMin()]; } 44 | 45 | [[nodiscard]] const SemanticValue& semanticValue(int offset) const { 46 | if (offset < 0) 47 | return valueStack_[valueStack_.size() + offset]; 48 | else 49 | return valueStack_[valueStackBase_ + offset]; 50 | } 51 | 52 | [[nodiscard]] std::optional analyze(); 53 | 54 | private: 55 | [[nodiscard]] std::optional getHandleFor(StateValue nonterminal, 56 | Terminal currentTerminal) const; 57 | 58 | [[nodiscard]] bool isAction(StateValue v) const noexcept; 59 | [[nodiscard]] bool isTerminal(StateValue v) const noexcept; 60 | [[nodiscard]] bool isNonTerminal(StateValue v) const noexcept; 61 | 62 | void log(const std::string& msg); 63 | 64 | [[nodiscard]] std::string dumpStateStack() const; 65 | [[nodiscard]] std::string dumpSemanticStack() const; 66 | [[nodiscard]] std::string stateValue(StateValue sv) const; 67 | [[nodiscard]] std::string handleString(const SyntaxTable::Expression& handle) const; 68 | 69 | private: 70 | const SyntaxTable& def_; 71 | Lexer lexer_; 72 | std::string lastLiteral_; 73 | Report* report_; 74 | std::deque stack_; 75 | std::deque valueStack_; 76 | size_t valueStackBase_; 77 | ActionHandler actionHandler_; 78 | }; 79 | 80 | } // namespace klex::cfg::ll 81 | 82 | namespace fmt { 83 | template <> 84 | struct formatter::StateValue> { 85 | template 86 | constexpr auto parse(ParseContext& ctx) 87 | { 88 | return ctx.begin(); 89 | } 90 | 91 | template 92 | constexpr auto format(const klex::cfg::ll::Analyzer::StateValue& v, FormatContext& ctx) 93 | { 94 | return format_to(ctx.out(), "{}", "hello"); 95 | } 96 | }; 97 | } // namespace fmt 98 | 99 | #include 100 | 101 | // vim:ts=4:sw=4:noet 102 | -------------------------------------------------------------------------------- /examples/flow.klex: -------------------------------------------------------------------------------- 1 | # vim:syntax=klex 2 | # Lexical Grammar for the Flow Language 3 | 4 | # be case insensitive in pattern matching? 5 | # %pragma ignorecase 6 | 7 | # NUMBER ::= 0|[1-9][0-9]* 8 | # IDENT ::= [a-zA-Z_][a-zA-Z_0-9]* 9 | # IP4 ::= {IP4Oct}(\.{IP4Oct}){3} 10 | # IP4Cidr ::= {IP4}/{CidrMask} 11 | # 12 | # %% 13 | 14 | # symbols 15 | Assign ::= "=" 16 | OrAssign ::= "|=" 17 | AndAssign ::= "&=" 18 | PlusAssign ::= "+=" 19 | MinusAssign ::= "-=" 20 | MulAssign ::= "*=" 21 | DivAssign ::= "/=" 22 | Semicolon ::= ";" 23 | Question ::= "?" 24 | Colon ::= ":" 25 | And ::= "and" 26 | Or ::= "or" 27 | Xor ::= "xor" 28 | Equal ::= "==" 29 | UnEqual ::= "!=" 30 | 31 | Less ::= "<" 32 | Greater ::= ">" 33 | LessOrEqual ::= "<=" 34 | GreaterOrEqual ::= ">=" 35 | PrefixMatch ::= "=^" 36 | SuffixMatch ::= "=$" 37 | RegexMatch ::= "=~" 38 | In ::= "in" 39 | HashRocket ::= "=>" 40 | Plus ::= "+" 41 | Minus ::= "-" 42 | Mul ::= "/*" 43 | Div ::= "/" 44 | Mod ::= "%" 45 | Shl ::= "shl" 46 | Shr ::= "shr" 47 | Comma ::= "," 48 | Pow ::= "**" 49 | Not ::= "not" 50 | BitNot ::= "~" 51 | BitOr ::= "|" 52 | BitAnd ::= "&" 53 | BitXor ::= "^" 54 | BrOpen ::= "[" 55 | BrClose ::= "]" 56 | RndOpen ::= "(" 57 | RndClose ::= ")" 58 | Begin ::= "{" 59 | End ::= "}" 60 | 61 | # keywords 62 | Handler ::= handler 63 | If ::= if 64 | Then ::= then 65 | Else ::= else 66 | Unless ::= unless 67 | Match ::= match 68 | On ::= on 69 | While ::= while 70 | For ::= for 71 | Import ::= import 72 | From ::= from 73 | Var ::= var 74 | 75 | # data types 76 | VoidType ::= void 77 | BoolType ::= bool 78 | NumberType ::= int 79 | StringType ::= string 80 | 81 | # literals 82 | TrueLiteral ::= true 83 | FalseLiteral ::= false 84 | StringLiteral ::= '([^'\n]|\\\\')*'|\"([^\"\n]|\\\")*\" 85 | NumberLiteral ::= [0-9]+|[0-9]{1,3}(_[0-9]{3})* 86 | 87 | IPv4Octet(ref) ::= [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5] 88 | IPv4(ref) ::= {IPv4Octet}(\.{IPv4Octet}){3} 89 | IPv4Literal ::= {IPv4} 90 | 91 | CidrPart(ref) ::= [0-9]|[1-2][0-9]|3[012] 92 | Cidr ::= {IPv4}\/{CidrPart} 93 | 94 | ipv6Part(ref) ::= [[:xdigit:]]{1,4} 95 | IPv6 ::= {ipv6Part}(:{ipv6Part}){7,7} 96 | | ({ipv6Part}:){1,7}: 97 | | :(:{ipv6Part}){1,7} 98 | | :: 99 | | ({ipv6Part}:){1}(:{ipv6Part}){0,6} 100 | | ({ipv6Part}:){2}(:{ipv6Part}){0,5} 101 | | ({ipv6Part}:){3}(:{ipv6Part}){0,4} 102 | | ({ipv6Part}:){4}(:{ipv6Part}){0,3} 103 | | ({ipv6Part}:){5}(:{ipv6Part}){0,2} 104 | | ({ipv6Part}:){6}(:{ipv6Part}){0,1} 105 | | ::[fF]{4}:{IPv4} 106 | 107 | # misc 108 | Ident ::= [a-zA-Z_][a-zA-Z_0-9]* 109 | RegExpGroup ::= \$[0-9]+ 110 | 111 | RegExp ::= [^/]*/ 112 | 113 | # specials 114 | Comment(ignore) ::= "#.*" 115 | Spacing(ignore) ::= [\s\t\n]+ 116 | <*>Eof ::= <> 117 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | Checks: >- 3 | -*, 4 | bugprone-*, 5 | -bugprone-easily-swappable-parameters, 6 | -bugprone-suspicious-include, 7 | -bugprone-unchecked-optional-access, 8 | cppcoreguidelines-*, 9 | -cppcoreguidelines-avoid-c-arrays, 10 | -cppcoreguidelines-avoid-magic-numbers, 11 | -cppcoreguidelines-macro-usage, 12 | -cppcoreguidelines-no-malloc, 13 | -cppcoreguidelines-non-private-member-variables-in-classes, 14 | -cppcoreguidelines-pro-bounds-constant-array-index, 15 | -cppcoreguidelines-owning-memory, 16 | -cppcoreguidelines-pro-bounds-array-to-pointer-decay, 17 | -cppcoreguidelines-pro-bounds-pointer-arithmetic, 18 | -cppcoreguidelines-pro-type-const-cast, 19 | -cppcoreguidelines-pro-type-cstyle-cast, 20 | -cppcoreguidelines-pro-type-static-cast-downcast, 21 | -cppcoreguidelines-pro-type-vararg, 22 | -cppcoreguidelines-special-member-functions, 23 | modernize-*, 24 | -modernize-avoid-bind, 25 | -modernize-avoid-c-arrays, 26 | -modernize-return-braced-init-list, 27 | -modernize-use-bool-literals, 28 | -modernize-use-nullptr, 29 | -modernize-use-trailing-return-type, 30 | readability-non-const-parameter, 31 | readability-redundant-* 32 | -readability-redundant-access-specifiers, 33 | WarningsAsErrors: >- 34 | clang-analyzer-*, 35 | clang-diagnostic-*, 36 | performance-*, 37 | -performance-no-int-to-ptr, 38 | readability-identifier-naming 39 | UseColor: true 40 | HeaderFilterRegex: '^src/(terminal.*)/.*\.(h|cpp)$' 41 | AnalyzeTemporaryDtors: false 42 | FormatStyle: none 43 | CheckOptions: 44 | - key: bugprone-easily-swappable-parameters.MinimumLength 45 | value: '3' 46 | - key: cert-dcl16-c.NewSuffixes 47 | value: 'L;LL;LU;LLU' 48 | - key: cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField 49 | value: '0' 50 | - key: cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors 51 | value: '1' 52 | - key: cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic 53 | value: '1' 54 | - key: google-readability-braces-around-statements.ShortStatementLines 55 | value: '1' 56 | - key: google-readability-function-size.StatementThreshold 57 | value: '800' 58 | - key: google-readability-namespace-comments.ShortNamespaceLines 59 | value: '10' 60 | - key: google-readability-namespace-comments.SpacesBeforeComments 61 | value: '2' 62 | - key: modernize-loop-convert.MaxCopySize 63 | value: '16' 64 | - key: modernize-loop-convert.MinConfidence 65 | value: reasonable 66 | - key: modernize-loop-convert.NamingStyle 67 | value: CamelCase 68 | - key: modernize-pass-by-value.IncludeStyle 69 | value: llvm 70 | - key: modernize-replace-auto-ptr.IncludeStyle 71 | value: llvm 72 | - key: modernize-use-nullptr.NullMacros 73 | value: 'NULL' 74 | - key: modernize-use-default-member-init.UseAssignment 75 | value: '1' 76 | # - key: readability-identifier-naming.EnumCase 77 | # value: CamelCase 78 | # - key: readability-identifier-naming.ClassCase 79 | # value: CamelCase 80 | # - key: readability-identifier-naming.ClassMemberCase 81 | # value: camelBack 82 | # - key: readability-identifier-naming.ClassMethodCase 83 | # value: camelBack 84 | # - key: readability-identifier-naming.ParameterCase 85 | # value: camelBack 86 | # - key: readability-identifier-naming.ParameterPrefix 87 | # value: '' 88 | # - key: readability-identifier-naming.ScopedEnumConstantCase 89 | # value: CamelCase 90 | -------------------------------------------------------------------------------- /src/klex/regular/DotWriter.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | namespace klex::regular 20 | { 21 | 22 | template 23 | static string escapeString(const StringType& str) 24 | { 25 | stringstream stream_; 26 | for (char ch: str) 27 | { 28 | // \t\n\r is already converted to escape sequence 29 | switch (ch) 30 | { 31 | case '\\': stream_ << "\\\\"; break; 32 | case '"': stream_ << "\\\""; break; 33 | default: stream_ << ch; break; 34 | } 35 | } 36 | return stream_.str(); 37 | } 38 | 39 | void DotWriter::start(StateId initialState) 40 | { 41 | initialState_ = initialState; 42 | stream_ << "digraph {\n"; 43 | stream_ << " rankdir=LR;\n"; 44 | // stream_ << " label=\"" << escapeString("FA" /*TODO*/) << "\";\n"; 45 | } 46 | 47 | void DotWriter::visitNode(StateId number, bool start, bool accept) 48 | { 49 | if (start) 50 | { 51 | const string_view shape = accept ? "doublecircle" : "circle"; 52 | stream_ << " \"\" [shape=plaintext];\n"; 53 | stream_ << " node [shape=" << shape << ",color=red];\n"; 54 | stream_ << " \"\" -> " << stateLabelPrefix_ << number << ";\n"; 55 | stream_ << " node [color=black];\n"; 56 | } 57 | else if (accept) 58 | { 59 | stream_ << " node [shape=doublecircle]; " << stateLabelPrefix_ << number << ";\n"; 60 | stream_ << " node [shape=circle,color=black];\n"; 61 | } 62 | else 63 | { 64 | // stream_ << stateLabelPrefix_ << number << ";\n"; 65 | } 66 | } 67 | 68 | void DotWriter::visitEdge(StateId from, StateId to, Symbol s) 69 | { 70 | transitionGroups_[to].push_back(s); 71 | } 72 | 73 | void DotWriter::endVisitEdge(StateId from, StateId to) 74 | { 75 | auto& tgroup = transitionGroups_[to]; 76 | if (!tgroup.empty()) 77 | { 78 | if (from == initialState_ && initialStates_ != nullptr) 79 | { 80 | for (Symbol s: tgroup) 81 | { 82 | const string label = [this, s]() { 83 | for (const auto& p: *initialStates_) 84 | if (p.second == static_cast(s)) 85 | return fmt::format("<{}>", p.first); 86 | return prettySymbol(s); 87 | }(); 88 | stream_ << fmt::format(" {}{} -> {}{} [label=\"{}\"];\n", 89 | stateLabelPrefix_, 90 | from, 91 | stateLabelPrefix_, 92 | to, 93 | escapeString(label)); 94 | } 95 | } 96 | else 97 | { 98 | string label = groupCharacterClassRanges(move(tgroup)); 99 | stream_ << fmt::format(" {}{} -> {}{} [label=\"{}\"];\n", 100 | stateLabelPrefix_, 101 | from, 102 | stateLabelPrefix_, 103 | to, 104 | escapeString(label)); 105 | } 106 | tgroup.clear(); 107 | } 108 | } 109 | 110 | void DotWriter::end() 111 | { 112 | stream_ << "}\n"; 113 | } 114 | 115 | } // namespace klex::regular 116 | -------------------------------------------------------------------------------- /src/klex/regular/NFABuilder.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | namespace klex::regular 14 | { 15 | 16 | NFA NFABuilder::construct(const RegExpr& re, Tag tag) 17 | { 18 | visit(*this, re); 19 | 20 | // fa_.setAccept(acceptState_.value_or(fa_.acceptStateId()), tag); 21 | if (acceptState_) 22 | fa_.setAccept(acceptState_.value(), tag); 23 | else 24 | fa_.setAccept(tag); 25 | 26 | return move(fa_); 27 | } 28 | 29 | NFA NFABuilder::construct(const RegExpr& re) 30 | { 31 | visit(*this, re); 32 | return move(fa_); 33 | } 34 | 35 | void NFABuilder::operator()(const LookAheadExpr& lookaheadExpr) 36 | { 37 | // fa_ = move(construct(lookaheadExpr.leftExpr()).lookahead(construct(lookaheadExpr.rightExpr()))); 38 | NFA lhs = construct(*lookaheadExpr.left); 39 | NFA rhs = construct(*lookaheadExpr.right); 40 | lhs.lookahead(move(rhs)); 41 | fa_ = move(lhs); 42 | } 43 | 44 | void NFABuilder::operator()(const AlternationExpr& alternationExpr) 45 | { 46 | NFA lhs = construct(*alternationExpr.left); 47 | NFA rhs = construct(*alternationExpr.right); 48 | lhs.alternate(move(rhs)); 49 | fa_ = move(lhs); 50 | } 51 | 52 | void NFABuilder::operator()(const ConcatenationExpr& concatenationExpr) 53 | { 54 | NFA lhs = construct(*concatenationExpr.left); 55 | NFA rhs = construct(*concatenationExpr.right); 56 | lhs.concatenate(move(rhs)); 57 | fa_ = move(lhs); 58 | } 59 | 60 | void NFABuilder::operator()(const CharacterExpr& characterExpr) 61 | { 62 | fa_ = NFA { characterExpr.value }; 63 | } 64 | 65 | void NFABuilder::operator()(const CharacterClassExpr& characterClassExpr) 66 | { 67 | fa_ = NFA { characterClassExpr.symbols }; 68 | } 69 | 70 | void NFABuilder::operator()(const ClosureExpr& closureExpr) 71 | { 72 | const unsigned xmin = closureExpr.minimumOccurrences; 73 | const unsigned xmax = closureExpr.maximumOccurrences; 74 | constexpr unsigned Infinity = numeric_limits::max(); 75 | 76 | if (xmin == 0 && xmax == 1) 77 | fa_ = move(construct(*closureExpr.subExpr).optional()); 78 | else if (xmin == 0 && xmax == Infinity) 79 | fa_ = move(construct(*closureExpr.subExpr).recurring()); 80 | else if (xmin == 1 && xmax == Infinity) 81 | fa_ = move(construct(*closureExpr.subExpr).positive()); 82 | else if (xmin < xmax) 83 | fa_ = move(construct(*closureExpr.subExpr).repeat(xmin, xmax)); 84 | else if (xmin == xmax) 85 | fa_ = move(construct(*closureExpr.subExpr).times(xmin)); 86 | else 87 | throw invalid_argument { "closureExpr" }; 88 | } 89 | 90 | void NFABuilder::operator()(const BeginOfLineExpr&) 91 | { 92 | fa_ = NFA { Symbols::Epsilon }; 93 | } 94 | 95 | void NFABuilder::operator()(const EndOfLineExpr& eolExpr) 96 | { 97 | // NFA lhs; 98 | // NFA rhs{'\n'}; 99 | // lhs.lookahead(move(rhs)); 100 | // fa_ = move(lhs); 101 | fa_ = move(NFA {}.lookahead(NFA { '\n' })); 102 | } 103 | 104 | void NFABuilder::operator()(const EndOfFileExpr& eofExpr) 105 | { 106 | fa_ = NFA { Symbols::EndOfFile }; 107 | } 108 | 109 | void NFABuilder::operator()(const DotExpr& dotExpr) 110 | { 111 | // any character except LF 112 | fa_ = NFA { '\t' }; 113 | for (int ch = 32; ch < 127; ++ch) 114 | { 115 | fa_.addTransition(fa_.initialStateId(), ch, fa_.acceptStateId()); 116 | } 117 | } 118 | 119 | void NFABuilder::operator()(const EmptyExpr& emptyExpr) 120 | { 121 | fa_ = NFA { Symbols::Epsilon }; 122 | } 123 | 124 | } // namespace klex::regular 125 | -------------------------------------------------------------------------------- /src/klex/regular/Rule.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include // IgnoreTag 10 | #include 11 | #include 12 | #include // Tag 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | namespace klex::regular { 19 | 20 | struct Rule { 21 | unsigned int line; 22 | unsigned int column; 23 | Tag tag; 24 | std::vector conditions; 25 | std::string name; 26 | std::string pattern; 27 | std::unique_ptr regexpr = nullptr; 28 | 29 | bool isIgnored() const noexcept { return tag == IgnoreTag; } 30 | 31 | Rule clone() const 32 | { 33 | return regexpr ? Rule{line, 34 | column, 35 | tag, 36 | conditions, 37 | name, 38 | pattern, 39 | std::make_unique(RegExprParser{}.parse(pattern, line, column))} 40 | : Rule{line, column, tag, conditions, name, pattern, nullptr}; 41 | } 42 | 43 | Rule() = default; 44 | 45 | Rule(unsigned _line, unsigned _column, Tag _tag, std::vector _conditions, std::string _name, 46 | std::string _pattern, std::unique_ptr _regexpr = nullptr) 47 | : line{_line}, 48 | column{_column}, 49 | tag{_tag}, 50 | conditions{_conditions}, 51 | name{_name}, 52 | pattern{_pattern}, 53 | regexpr{std::move(_regexpr)} 54 | { 55 | } 56 | 57 | Rule(const Rule& v) 58 | : line{v.line}, 59 | column{v.column}, 60 | tag{v.tag}, 61 | conditions{v.conditions}, 62 | name{v.name}, 63 | pattern{v.pattern}, 64 | regexpr{v.regexpr ? std::make_unique(RegExprParser{}.parse(pattern, line, column)) : nullptr} 65 | { 66 | } 67 | 68 | Rule& operator=(const Rule& v) 69 | { 70 | line = v.line; 71 | column = v.column; 72 | tag = v.tag; 73 | conditions = v.conditions; 74 | name = v.name; 75 | pattern = v.pattern; 76 | regexpr = v.regexpr ? std::make_unique(RegExprParser{}.parse(pattern, line, column)) : nullptr; 77 | return *this; 78 | } 79 | 80 | bool operator<(const Rule& rhs) const noexcept { return tag < rhs.tag; } 81 | bool operator<=(const Rule& rhs) const noexcept { return tag <= rhs.tag; } 82 | bool operator==(const Rule& rhs) const noexcept { return tag == rhs.tag; } 83 | bool operator!=(const Rule& rhs) const noexcept { return tag != rhs.tag; } 84 | bool operator>=(const Rule& rhs) const noexcept { return tag >= rhs.tag; } 85 | bool operator>(const Rule& rhs) const noexcept { return tag > rhs.tag; } 86 | }; 87 | 88 | using RuleList = std::vector; 89 | 90 | inline bool ruleContainsBeginOfLine(const Rule& r) 91 | { 92 | return containsBeginOfLine(*r.regexpr); 93 | } 94 | 95 | } // namespace klex::regular 96 | 97 | namespace fmt { 98 | template <> 99 | struct formatter { 100 | template 101 | constexpr auto parse(ParseContext& ctx) 102 | { 103 | return ctx.begin(); 104 | } 105 | 106 | template 107 | constexpr auto format(const klex::regular::Rule& v, FormatContext& ctx) 108 | { 109 | if (!v.conditions.empty()) 110 | { 111 | format_to(ctx.out(), "<"); 112 | for (size_t i = 0; i < v.conditions.size(); ++i) 113 | if (i != 0) 114 | format_to(ctx.out(), ", {}", v.conditions[i]); 115 | else 116 | format_to(ctx.out(), "{}", v.conditions[i]); 117 | format_to(ctx.out(), ">"); 118 | } 119 | if (v.tag == klex::regular::IgnoreTag) 120 | return format_to(ctx.out(), "{}({}) ::= {}", v.name, "ignore", v.pattern); 121 | else 122 | return format_to(ctx.out(), "{}({}) ::= {}", v.name, v.tag, v.pattern); 123 | } 124 | }; 125 | } // namespace fmt 126 | -------------------------------------------------------------------------------- /src/klex/util/AnsiColor.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0> 2 | // (c) 2009-2019 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace AnsiColor { 12 | 13 | enum Code : unsigned { 14 | Clear = 0, 15 | Reset = Clear, 16 | Bold = 0x0001, // 1 17 | Dark = 0x0002, // 2 18 | Undef1 = 0x0004, 19 | Underline = 0x0008, // 4 20 | Blink = 0x0010, // 5 21 | Undef2 = 0x0020, 22 | Reverse = 0x0040, // 7 23 | Concealed = 0x0080, // 8 24 | AllFlags = 0x00FF, 25 | Black = 0x0100, 26 | Red = 0x0200, 27 | Green = 0x0300, 28 | Yellow = 0x0400, 29 | Blue = 0x0500, 30 | Magenta = 0x0600, 31 | Cyan = 0x0700, 32 | White = 0x0800, 33 | AnyFg = 0x0F00, 34 | OnBlack = 0x1000, 35 | OnRed = 0x2000, 36 | OnGreen = 0x3000, 37 | OnYellow = 0x4000, 38 | OnBlue = 0x5000, 39 | OnMagenta = 0x6000, 40 | OnCyan = 0x7000, 41 | OnWhite = 0x8000, 42 | AnyBg = 0xF000 43 | }; 44 | 45 | /// Combines two ANSI escape sequences into one Code. 46 | constexpr inline Code operator|(Code a, Code b) 47 | { 48 | return Code{unsigned(a) | unsigned(b)}; 49 | } 50 | 51 | /** 52 | * Counts the number of ANSI escape sequences in @p codes. 53 | */ 54 | constexpr unsigned count(Code codes) 55 | { 56 | if (codes == Clear) 57 | return 1; 58 | 59 | unsigned i = 0; 60 | 61 | if (codes & AllFlags) 62 | for (int k = 0; k < 8; ++k) 63 | if (codes & (1 << k)) 64 | ++i; 65 | 66 | if (codes & AnyFg) 67 | ++i; 68 | 69 | if (codes & AnyBg) 70 | ++i; 71 | 72 | return i; 73 | } 74 | 75 | /** 76 | * Retrieves the number of bytes required to store the ANSI escape sequences of @p codes 77 | * without prefix/suffix notation. 78 | */ 79 | constexpr unsigned capacity(Code codes) 80 | { 81 | if (codes == Clear) 82 | return 1; 83 | 84 | unsigned i = 0; 85 | 86 | if (codes & AllFlags) 87 | for (int k = 0; k < 8; ++k) 88 | if (codes & (1 << k)) 89 | ++i; 90 | 91 | if (codes & AnyFg) 92 | i += 2; 93 | 94 | if (codes & AnyBg) 95 | i += 2; 96 | 97 | return i + (count(codes) - 1); 98 | } 99 | 100 | /// Constructs a sequence of ANSI codes for the colors in this @p codes. 101 | template 102 | constexpr auto codes() 103 | { 104 | std::array result{}; 105 | 106 | size_t n = 0; // n'th escape sequence being iterate through 107 | size_t i = 0; // i'th byte in output array 108 | 109 | result[i++] = '\x1B'; 110 | result[i++] = '['; 111 | 112 | if constexpr (value != 0) 113 | { 114 | if (value & AllFlags) 115 | { 116 | for (int k = 0; k < 8; ++k) 117 | { 118 | if (value & (1 << k)) 119 | { 120 | if (n++) 121 | result[i++] = ';'; 122 | result[i++] = k + '1'; 123 | } 124 | } 125 | } 126 | 127 | if (value & AnyFg) 128 | { 129 | if (n++) 130 | result[i++] = ';'; 131 | unsigned const val = ((value >> 8) & 0x0F) + 29; // 36 -> {'3', '6'} 132 | result[i++] = (val / 10) + '0'; 133 | result[i++] = (val % 10) + '0'; 134 | } 135 | 136 | if (value & AnyBg) 137 | { 138 | if (n++) 139 | result[i++] = ';'; 140 | unsigned const val = ((value >> 12) & 0x0F) + 39; 141 | result[i++] = (val / 10) + '0'; 142 | result[i++] = (val % 10) + '0'; 143 | } 144 | } 145 | else 146 | result[i++] = '0'; // reset/clear 147 | 148 | result[i++] = 'm'; 149 | 150 | return result; 151 | } 152 | 153 | } // namespace AnsiColor 154 | -------------------------------------------------------------------------------- /cmdlineTests.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -e 4 | 5 | TMP=${TMP:-/tmp} 6 | WORKDIR="$(mktemp -d ${TMP}/cmdlineTests.XXXXXXXX)" 7 | OUTFILE="${WORKDIR}/stdout.txt" 8 | TESTDIR="../test" 9 | MKLEX="./mklex" 10 | # TESTDIR="$(realpath "$(dirname $0)/test")" 11 | # MKLEX="$(realpath "${MKLEX:-./mklex}")" 12 | 13 | cleanup() { 14 | rm -rf ${WORKDIR} 15 | } 16 | 17 | einfo() { 18 | echo "*** ${*}" 19 | } 20 | 21 | fail() { 22 | # echo 1>&2 "Fail. ${*}" 23 | echo "Fail. ${*}" 24 | exit 1 25 | } 26 | 27 | test_invalid_arguments() { 28 | einfo "test_invalid_arguments" 29 | if $MKLEX --invalid &>${OUTFILE}; then 30 | fail "Invalid argument test failed" 31 | fi 32 | grep -q "Unknown Option" ${OUTFILE} || fail 33 | } 34 | 35 | test_help() { 36 | einfo "test_help" 37 | 38 | $MKLEX --help &>${OUTFILE} 39 | grep -q "output-table" ${OUTFILE} || fail 40 | 41 | $MKLEX -h &>${OUTFILE} 42 | grep -q "output-table" ${OUTFILE} || fail 43 | } 44 | 45 | test_cxx_without_namespaces() { 46 | einfo "test_cxx_without_namespaces" 47 | $MKLEX -f "${TESTDIR}/good.klex" \ 48 | --output-table="${WORKDIR}/table.cc" \ 49 | --output-token="${WORKDIR}/token.h" \ 50 | --table-name="lexerDef" \ 51 | --token-name="Token" 52 | } 53 | 54 | test_cxx_with_namespaces() { 55 | einfo "test_cxx_with_namespaces" 56 | $MKLEX -f "${TESTDIR}/good.klex" \ 57 | --output-table="${WORKDIR}/table.cc" \ 58 | --output-token="${WORKDIR}/token.h" \ 59 | --table-name="myns::lexerDef" \ 60 | --token-name="myns::Token" 61 | } 62 | 63 | test_cxx_output_stderr() { 64 | einfo "test_cxx_with_namespaces" 65 | $MKLEX -f "${TESTDIR}/good.klex" \ 66 | --output-table=- \ 67 | --output-token=- \ 68 | --table-name="lexerDef" \ 69 | --token-name="Token" \ 70 | 2>"${WORKDIR}/output.inc" 71 | 72 | test -f "${WORKDIR}/output.inc" 73 | } 74 | 75 | test_debug_nfa() { 76 | einfo "test_debug_nfa" 77 | $MKLEX -f "${TESTDIR}/good.klex" \ 78 | --output-table="${WORKDIR}/table.cc" \ 79 | --output-token="${WORKDIR}/token.h" \ 80 | --table-name="myns::lexerDef" \ 81 | --token-name="myns::Token" \ 82 | --debug-nfa > "${WORKDIR}/nfa.dot" 83 | test -f "${WORKDIR}/nfa.dot" 84 | } 85 | 86 | test_debug_nfa_multi() { 87 | einfo "test_debug_nfa_multi" 88 | $MKLEX -f "${TESTDIR}/multiple_conditions.klex" \ 89 | --output-table="${WORKDIR}/table.cc" \ 90 | --output-token="${WORKDIR}/token.h" \ 91 | --table-name="lexerDef" \ 92 | --token-name="Token" \ 93 | --debug-nfa > "${WORKDIR}/nfa.dot" 94 | test -f "${WORKDIR}/nfa.dot" 95 | } 96 | 97 | test_debug_dfa() { 98 | einfo "test_debug_dfa" 99 | $MKLEX -f "${TESTDIR}/good.klex" \ 100 | --output-table="${WORKDIR}/table.cc" \ 101 | --output-token="${WORKDIR}/token.h" \ 102 | --table-name="myns::lexerDef" \ 103 | --token-name="myns::Token" \ 104 | --debug-dfa="${WORKDIR}/dfa.dot" 105 | test -f "${WORKDIR}/dfa.dot" 106 | } 107 | 108 | test_debug_dfa_stdout() { 109 | einfo "test_debug_dfa_stdout" 110 | $MKLEX -f "${TESTDIR}/good.klex" \ 111 | --output-table="${WORKDIR}/table.cc" \ 112 | --output-token="${WORKDIR}/token.h" \ 113 | --table-name="myns::lexerDef" \ 114 | --token-name="myns::Token" \ 115 | --debug-dfa=- >"${WORKDIR}/dfa.dot" 116 | test -f "${WORKDIR}/dfa.dot" 117 | } 118 | 119 | test_overshadowed() { 120 | einfo "test_overshadowed" 121 | $MKLEX -f "${TESTDIR}/overshadowed.klex" \ 122 | --output-table="${WORKDIR}/table.cc" \ 123 | --output-token="${WORKDIR}/token.h" \ 124 | --table-name="lexerDef" \ 125 | --token-name="Token" \ 126 | &>${OUTFILE} && fail "Failure expected." 127 | grep -q "Rule If cannot be matched as rule" ${OUTFILE} || fail "missing error string" 128 | } 129 | 130 | main() { 131 | einfo "WORKDIR: ${WORKDIR}" 132 | einfo "TESTDIR: ${TESTDIR}" 133 | einfo "mklex: ${MKLEX}" 134 | 135 | trap cleanup INT TERM 136 | 137 | test_invalid_arguments 138 | test_help 139 | test_cxx_without_namespaces 140 | test_cxx_with_namespaces 141 | test_cxx_output_stderr 142 | test_debug_nfa 143 | test_debug_nfa_multi 144 | test_debug_dfa 145 | test_debug_dfa_stdout 146 | test_overshadowed 147 | } 148 | 149 | main 150 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarLexer.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace klex; 18 | using namespace klex::cfg; 19 | 20 | GrammarLexer::GrammarLexer(string content): 21 | content_ { std::move(content) }, offset_ { 0 }, currentLiteral_ {}, currentToken_ { Token::Illegal } 22 | { 23 | } 24 | 25 | GrammarLexer::Token GrammarLexer::recognize() 26 | { 27 | for (;;) 28 | { 29 | if (Token t = recognizeOne(); t != Token::Spacing) 30 | { 31 | // cout << "recognize: " << fmt::format("{}", t) << "\n"; 32 | return currentToken_ = t; 33 | } 34 | } 35 | } 36 | 37 | GrammarLexer::Token GrammarLexer::recognizeOne() 38 | { 39 | currentLiteral_.clear(); 40 | 41 | switch (currentChar()) 42 | { 43 | case -1: return Token::Eof; 44 | case ' ': 45 | case '\t': 46 | case '\n': 47 | do 48 | consumeChar(); 49 | while (!eof() && isspace(currentChar())); 50 | return Token::Spacing; 51 | case '{': consumeChar(); return Token::SetOpen; 52 | case '}': consumeChar(); return Token::SetClose; 53 | case '|': consumeChar(); return Token::Or; 54 | case ';': consumeChar(); return Token::Semicolon; 55 | case ':': 56 | if (peekChar(1) == ':' && peekChar(2) == '=') 57 | { 58 | consumeChar(3); 59 | return Token::Assoc; 60 | } 61 | return Token::Illegal; 62 | case '\'': 63 | case '"': return consumeLiteral(); 64 | default: 65 | if (isalpha(currentChar()) || currentChar() == '_') 66 | { 67 | return consumeIdentifier(); 68 | } 69 | consumeChar(); 70 | return Token::Illegal; 71 | } 72 | } 73 | 74 | string GrammarLexer::consumeLiteralUntilLF() 75 | { 76 | currentLiteral_.clear(); 77 | 78 | while (!eof() && currentChar() != '\n') 79 | { 80 | currentLiteral_ += static_cast(currentChar()); 81 | consumeChar(); 82 | } 83 | 84 | if (!eof()) 85 | { 86 | currentLiteral_ += static_cast(currentChar()); 87 | consumeChar(); 88 | } 89 | 90 | return currentLiteral_; 91 | } 92 | 93 | GrammarLexer::Token GrammarLexer::consumeIdentifier() 94 | { 95 | assert(!eof() && (isalpha(currentChar()) || currentChar() == '_')); 96 | 97 | do 98 | { 99 | currentLiteral_ += static_cast(currentChar()); 100 | consumeChar(); 101 | } while (!eof() && (isalnum(currentChar()) || currentChar() == '_')); 102 | 103 | if (currentLiteral_ == "token") 104 | return Token::Token; 105 | 106 | return Token::Identifier; 107 | } 108 | 109 | // ' ... ' | " ... " 110 | GrammarLexer::Token GrammarLexer::consumeLiteral() 111 | { 112 | assert(!eof() && (currentChar() == '"' || currentChar() == '\'')); 113 | const int delimiter = currentChar(); 114 | consumeChar(); 115 | 116 | while (!eof() && currentChar() != delimiter) 117 | { 118 | currentLiteral_ += static_cast(currentChar()); 119 | consumeChar(); 120 | } 121 | 122 | if (eof()) 123 | return Token::Illegal; // Unexpected EOF 124 | 125 | consumeChar(); // delimiter 126 | 127 | return Token::Literal; 128 | } 129 | 130 | int GrammarLexer::currentChar() const 131 | { 132 | if (offset_ < content_.size()) 133 | return content_[offset_]; 134 | else 135 | return -1; // EOF 136 | } 137 | 138 | int GrammarLexer::peekChar(size_t offset) const 139 | { 140 | if (offset_ + offset < content_.size()) 141 | return content_[offset_ + offset]; 142 | else 143 | return -1; // EOF 144 | } 145 | 146 | int GrammarLexer::consumeChar(size_t count) 147 | { 148 | offset_ += min(count, content_.size() - offset_); 149 | return currentChar(); 150 | } 151 | 152 | // vim:ts=4:sw=4:noet 153 | -------------------------------------------------------------------------------- /src/klex/util/iterator-detail.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace klex::util::detail { 13 | 14 | template 15 | struct reversed { 16 | const Container container; 17 | 18 | auto begin() { return container.crbegin(); } 19 | auto end() { return container.crend(); } 20 | }; 21 | 22 | template 23 | struct indexed { 24 | Container& container; 25 | 26 | struct iterator { 27 | typename Container::iterator iter; 28 | std::size_t index = 0; 29 | 30 | iterator& operator++() 31 | { 32 | ++iter; 33 | ++index; 34 | return *this; 35 | } 36 | 37 | iterator& operator++(int) 38 | { 39 | ++*this; 40 | return *this; 41 | } 42 | 43 | auto operator*() const { return std::make_pair(index, *iter); } 44 | 45 | bool operator==(const iterator& rhs) const noexcept { return iter == rhs.iter; } 46 | bool operator!=(const iterator& rhs) const noexcept { return iter != rhs.iter; } 47 | }; 48 | 49 | struct const_iterator { 50 | typename Container::const_iterator iter; 51 | std::size_t index = 0; 52 | 53 | const_iterator& operator++() 54 | { 55 | ++iter; 56 | ++index; 57 | return *this; 58 | } 59 | 60 | const_iterator& operator++(int) 61 | { 62 | ++*this; 63 | return *this; 64 | } 65 | 66 | auto operator*() const { return std::make_pair(index, *iter); } 67 | 68 | bool operator==(const const_iterator& rhs) const noexcept { return iter == rhs.iter; } 69 | bool operator!=(const const_iterator& rhs) const noexcept { return iter != rhs.iter; } 70 | }; 71 | 72 | auto begin() const 73 | { 74 | if constexpr (std::is_const::value) 75 | return const_iterator{container.cbegin()}; 76 | else 77 | return iterator{container.begin()}; 78 | } 79 | 80 | auto end() const 81 | { 82 | if constexpr (std::is_const::value) 83 | return const_iterator{container.cend()}; 84 | else 85 | return iterator{container.end()}; 86 | } 87 | }; 88 | 89 | template 90 | struct filter { 91 | Container& container; 92 | Lambda proc; 93 | 94 | struct iterator { 95 | typename Container::iterator i; 96 | typename Container::iterator e; 97 | Lambda filter; 98 | 99 | auto operator*() const { return *i; } 100 | 101 | iterator& operator++() 102 | { 103 | ++i; 104 | while (i != e && !filter(*i)) 105 | ++i; 106 | return *this; 107 | } 108 | 109 | iterator& operator++(int) { return ++*this; } 110 | 111 | bool operator==(const iterator& rhs) const noexcept { return i == rhs.i; } 112 | bool operator!=(const iterator& rhs) const noexcept { return !(*this == rhs); } 113 | }; 114 | 115 | struct const_iterator { 116 | typename Container::const_iterator i; 117 | typename Container::const_iterator e; 118 | Lambda filter; 119 | 120 | auto operator*() const { return *i; } 121 | 122 | const_iterator& operator++() 123 | { 124 | ++i; 125 | while (i != e && !filter(*i)) 126 | ++i; 127 | return *this; 128 | } 129 | 130 | const_iterator& operator++(int) { return ++*this; } 131 | 132 | bool operator==(const const_iterator& rhs) const noexcept { return i == rhs.i; } 133 | bool operator!=(const const_iterator& rhs) const noexcept { return !(*this == rhs); } 134 | }; 135 | 136 | auto begin() const 137 | { 138 | if constexpr (std::is_const::value) 139 | { 140 | auto i = const_iterator{std::cbegin(container), std::cend(container), proc}; 141 | while (i != end() && !proc(*i)) 142 | ++i; 143 | return i; 144 | } 145 | else 146 | { 147 | auto i = iterator{std::begin(container), std::end(container), proc}; 148 | while (i != end() && !proc(*i)) 149 | ++i; 150 | return i; 151 | } 152 | } 153 | 154 | auto end() const 155 | { 156 | if constexpr (std::is_const::value) 157 | return const_iterator{std::cend(container), std::cend(container), proc}; 158 | else 159 | return iterator{std::end(container), std::end(container), proc}; 160 | } 161 | }; 162 | 163 | } // namespace klex::util::detail 164 | -------------------------------------------------------------------------------- /src/klex/cfg/LeftRecursion.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | using namespace std; 14 | 15 | namespace klex::cfg 16 | { 17 | 18 | LeftRecursion::LeftRecursion(Grammar& _grammar): grammar_ { _grammar } 19 | { 20 | } 21 | 22 | bool LeftRecursion::isLeftRecursive(const Grammar& grammar) 23 | { 24 | const vector nonterminals = cfg::nonterminals(grammar); 25 | 26 | return any_of(begin(nonterminals), end(nonterminals), [&](const NonTerminal& nt) { 27 | const vector productions = grammar.getProductions(nt); 28 | 29 | return any_of(begin(productions), end(productions), [](const Production* p) { 30 | auto syms = symbols(p->handle); 31 | 32 | return !syms.empty() && holds_alternative(syms[0]) 33 | && get(syms[0]) == p->name && syms.size() > 1; 34 | }); 35 | }); 36 | } 37 | 38 | void LeftRecursion::direct() 39 | { 40 | for (const NonTerminal& nt: cfg::nonterminals(grammar_)) 41 | eliminateDirect(nt); 42 | } 43 | 44 | void LeftRecursion::indirect() 45 | { 46 | const vector nonterminals = cfg::nonterminals(grammar_); 47 | 48 | for (size_t i = 0; i < nonterminals.size(); ++i) 49 | { 50 | for (size_t k = 0; k < i; ++k) 51 | { 52 | for (Production* p: select(nonterminals[i], nonterminals[k])) 53 | { 54 | (void) p; // TODO 55 | for (Production* q: grammar_.getProductions(nonterminals[k])) 56 | { 57 | (void) q; // TODO 58 | // replace first non-terminal 59 | ; // p->replaceSymbolAt(0, NonTerminal{q->name}); 60 | } 61 | } 62 | } 63 | 64 | eliminateDirect(nonterminals[i]); 65 | } 66 | } 67 | 68 | list LeftRecursion::select(const NonTerminal& lhs, const NonTerminal& first) 69 | { 70 | list out; 71 | 72 | for (Production* p: grammar_.getProductions(lhs)) 73 | if (const optional nt = firstNonTerminal(p->handle); nt.has_value() && *nt == first) 74 | out.emplace_back(p); 75 | 76 | return out; 77 | } 78 | 79 | void LeftRecursion::eliminateDirect(const NonTerminal& nt) 80 | { 81 | if (auto [head, tail] = split(grammar_.getProductions(nt)); !tail.empty()) 82 | { 83 | const NonTerminal tailSymbol = createRelatedNonTerminal(nt); 84 | for (Production* p: head) // b -> b A' 85 | p->handle.emplace_back(tailSymbol); 86 | 87 | for (Production* p: tail) 88 | { 89 | p->name = tailSymbol.name; 90 | p->handle.emplace_back(tailSymbol); 91 | p->handle.erase(p->handle.begin()); 92 | } 93 | 94 | // inject new epsilon-production. 95 | grammar_.productions.emplace_back(Production { tailSymbol.name, {} }); 96 | // TODO: don't emplace at the back of all but at the back of the last NT's tail symbol. 97 | // TODO: fix injected EOF rule, omfg 98 | } 99 | } 100 | 101 | NonTerminal LeftRecursion::createRelatedNonTerminal(const NonTerminal& nt) const 102 | { 103 | string tail = nt.name + "_"; 104 | 105 | while (any_of(begin(grammar_.productions), end(grammar_.productions), [&](const Production& p) { 106 | return p.name == tail; 107 | })) 108 | tail += "_"; 109 | 110 | return NonTerminal { tail }; 111 | } 112 | 113 | pair, vector> LeftRecursion::split(vector productions) const 114 | { 115 | vector head; 116 | vector tail; 117 | 118 | for (Production* p: productions) 119 | { 120 | const optional nt = firstNonTerminal(p->handle); 121 | if (nt.has_value() && *nt == p->name && symbols(p->handle).size() > 1) 122 | tail.emplace_back(p); 123 | else 124 | head.emplace_back(p); 125 | } 126 | 127 | return make_pair(std::move(head), std::move(tail)); 128 | } 129 | 130 | } // namespace klex::cfg 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # klex - A Scanner Generator 2 | [![Build Status](https://travis-ci.org/christianparpart/klex.svg?branch=master)](https://travis-ci.org/christianparpart/klex) [![Build Status](https://ci.appveyor.com/api/projects/status/l8isxx0k38kdnatq?svg=true)](https://ci.appveyor.com/project/christianparpart/klex) [![codecov](https://codecov.io/gh/christianparpart/klex/branch/master/graph/badge.svg)](https://codecov.io/gh/christianparpart/klex) [![Language grade: C/C++](https://img.shields.io/lgtm/grade/cpp/g/christianparpart/klex.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/christianparpart/klex/context:cpp) 3 | 4 | 5 | 6 | - mklex: CLI tool for compiling regular expressions into state transition tables 7 | - libklex: C++ library for lexing 8 | 9 | ### mklex CLI 10 | ``` 11 | mklex - klex lexer generator 12 | (c) 2018 Christian Parpart 13 | 14 | -v, --verbose Prints some more verbose output 15 | -h, --help Prints this help and exits 16 | -f, --file=PATTERN_FILE Input file with lexer rules 17 | -t, --output-table=FILE Output file that will contain the compiled tables (use - to represent stderr) 18 | -T, --output-token=FILE Output file that will contain the compiled tables (use - to represent stderr) 19 | -n, --table-name=IDENTIFIER Symbol name for generated table (may include namespace). [lexerDef] 20 | -N, --token-name=IDENTIFIER Symbol name for generated token enum type (may include namespace). [Token] 21 | -M, --machine-name=IDENTIFIER 22 | Symbol name for generated machine enum type (must not include namespace). [Machine] 23 | -x, --debug-dfa=DOT_FILE Writes dot graph of final finite automaton. Use - to represent stdout. [] 24 | -d, --debug-nfa Writes dot graph of non-deterministic finite automaton to stdout and exits. 25 | --no-dfa-minimize Do not minimize the DFA 26 | -p, --perf Print performance counters to stderr. 27 | ``` 28 | 29 | ### Example klex Grammar 30 | 31 | ``` 32 | # specials 33 | Spacing(ignore) ::= "[\t\s]+" 34 | Eof ::= <> 35 | 36 | # symbols 37 | Plus ::= \+ 38 | RndOpen ::= \( 39 | RndClose ::= \) 40 | 41 | # keywords 42 | If ::= if 43 | Then ::= then 44 | Else ::= else 45 | 46 | # literals & identifiers 47 | NumberLiteral ::= 0|[1-9][0-9]* 48 | Identifier ::= [a-zA-Z_][a-zA-Z0-9_]* 49 | ``` 50 | 51 | ### klex Lexer API 52 | 53 | The great thing about the Lexer API is, that it is header-only, as the most complex parts are done 54 | at compilation already. 55 | 56 | You can compile the above grammar with `klex -f rules.klex -t myrules.h -T mytokens.h` 57 | and then compile the code below: 58 | 59 | ```cpp 60 | #include 61 | #include 62 | #include 63 | #include "myrules.h" 64 | #include "mytokens.h" 65 | 66 | int main(int argc, const char* argv[]) { 67 | klex::Lexer lexer {lexerDef, std::make_unique(argv[1])}; 68 | 69 | for (Token t = lexer.recognize(); t != Token::Eof; t = lexer.recognize()) { 70 | std::cerr << fmt::format("[{}-{}]: token {} (\"{}\")\n", 71 | lexer.offset().first, 72 | lexer.offset().second, 73 | to_string(t), lexer.word()); 74 | } 75 | 76 | return EXIT_SUCCESS; 77 | } 78 | ``` 79 | 80 | ### klex lexer generator API 81 | 82 | See [examples/mathexpr.cc](https://github.com/christianparpart/klex/blob/master/examples/mathexpr.cc) 83 | as a great example. Here's a snippet: 84 | 85 | ```cpp 86 | enum class Token { Eof = 1, Plus, Minus, Mul, Div, RndOpen, RndClose, Number, INVALID }; 87 | std::string RULES = R"( 88 | Space(ignore) ::= [\s\t]+ 89 | Eof ::= <> 90 | Plus ::= "+" 91 | Minus ::= "-" 92 | Mul ::= "*" 93 | Div ::= "/" 94 | RndOpen ::= "(" 95 | RndClose ::= \) 96 | Number ::= -?([0-9]+|[0-9]{1,3}(_[0-9]{3})*) 97 | INVALID ::= . 98 | )"; 99 | 100 | using Number = long long int; 101 | Number expr(Lexer& lexer) { 102 | // [... consume lexer tokens here ...] 103 | return 42; 104 | } 105 | 106 | int main(int argc, const char* argv[]) { 107 | klex::Compiler cc; 108 | cc.declareAll(std::make_unique(RULES)); 109 | 110 | Lexer lexer { cc.compile(), std::make_unique("2 + 3 * (5 - 1)") }; 111 | 112 | lexer.recognize(); // recognize first token 113 | Number y = expr(lexer); 114 | 115 | std::cerr << fmt::format("{} = {}\n", input, y); 116 | 117 | return EXIT_SUCCESS; 118 | } 119 | ``` 120 | 121 | ### References 122 | 123 | - https://swtch.com/~rsc/regexp/ 124 | -------------------------------------------------------------------------------- /src/klex/cfg/GrammarParser_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | using namespace std; 17 | using namespace klex; 18 | using namespace klex::cfg; 19 | using namespace klex::util::literals; 20 | 21 | const static std::string simpleGrammarSpec = 22 | R"(`Start ::= A | B; 23 | `A ::= 'a'; 24 | `B ::= 'b' {b1} 25 | ` | 'b' B {b2}; 26 | `)"_multiline; 27 | 28 | TEST(cfg_GrammarParser, parserSimple) 29 | { 30 | ConsoleReport report; 31 | GrammarParser parser(GrammarLexer { simpleGrammarSpec }, &report); 32 | Grammar grammar = parser.parse(); 33 | ASSERT_EQ(5, grammar.productions.size()); 34 | 35 | ASSERT_EQ("Start", grammar.productions[0].name); 36 | ASSERT_EQ("A", to_string(grammar.productions[0].handle)); 37 | ASSERT_EQ("Start", grammar.productions[1].name); 38 | ASSERT_EQ("B", to_string(grammar.productions[1].handle)); 39 | 40 | ASSERT_EQ("A", grammar.productions[2].name); 41 | ASSERT_EQ("\"a\"", to_string(grammar.productions[2].handle)); 42 | 43 | ASSERT_EQ("B", grammar.productions[3].name); 44 | ASSERT_EQ("\"b\" {b1}", to_string(grammar.productions[3].handle)); 45 | 46 | ASSERT_EQ("B", grammar.productions[4].name); 47 | ASSERT_EQ("\"b\" B {b2}", to_string(grammar.productions[4].handle)); 48 | } 49 | 50 | TEST(cfg_GrammarParser, unresolved_nonterminals) 51 | { 52 | BufferedReport report; 53 | Grammar grammar = GrammarParser(GrammarLexer { "Start ::= Another" }, &report).parse(); 54 | ASSERT_TRUE(report.containsFailures()); 55 | 56 | // TODO: make sure the failure reported is the unresolved-nonterminals case. 57 | } 58 | 59 | TEST(cfg_GrammarParser, action) 60 | { 61 | ConsoleReport report; 62 | GrammarParser parser = GrammarParser("E ::= 'a' {a};", &report); 63 | Grammar grammar = parser.parse(); 64 | ASSERT_FALSE(report.containsFailures()); 65 | } 66 | 67 | TEST(cfg_GrammarParser, action_on_epsilon) 68 | { 69 | ConsoleReport report; 70 | GrammarParser parser = GrammarParser("Rule ::= {action};", &report); 71 | Grammar grammar = parser.parse(); 72 | ASSERT_FALSE(report.containsFailures()); 73 | } 74 | 75 | struct CheckTerminalPattern 76 | { 77 | string pattern; 78 | bool operator()(const Terminal& w) const { return pattern == w.pattern(); } 79 | }; 80 | 81 | TEST(cfg_GrammarParser, customTokens) 82 | { 83 | BufferedReport report; 84 | Grammar grammar = GrammarParser( 85 | GrammarLexer { 86 | R"(`token { 87 | ` Spacing(ignore) ::= [\s\t]+ 88 | ` Number ::= [0-9]+ 89 | `} 90 | ` 91 | `Start ::= '(' Number ')'; 92 | `)"_multiline }, 93 | &report) 94 | .parse(); 95 | 96 | ASSERT_FALSE(report.containsFailures()); 97 | grammar.finalize(); 98 | 99 | log(grammar.dump()); 100 | 101 | for (const Terminal& w: grammar.terminals) 102 | logf("Terminal: {}", w); 103 | 104 | // verify presense of all terminals in the grammar 105 | ASSERT_EQ(5, grammar.terminals.size()); 106 | ASSERT_TRUE(any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { "[0-9]+" })); 107 | ASSERT_TRUE( 108 | any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { "[\\s\\t]+" })); 109 | ASSERT_TRUE(any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { "(" })); 110 | ASSERT_TRUE(any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { ")" })); 111 | 112 | // verify production rule to be in the form as the input mandates 113 | const auto symbols = klex::cfg::symbols(grammar.productions[0].handle); 114 | ASSERT_EQ(4, symbols.size()); 115 | 116 | ASSERT_TRUE(holds_alternative(symbols[0])); 117 | ASSERT_TRUE(holds_alternative(symbols[1])); 118 | ASSERT_TRUE(holds_alternative(symbols[2])); 119 | ASSERT_TRUE(holds_alternative(symbols[3])); 120 | 121 | ASSERT_EQ("(", get(symbols[0]).pattern()); 122 | 123 | ASSERT_EQ("Number", get(symbols[1]).name); 124 | ASSERT_EQ("[0-9]+", get(symbols[1]).pattern()); 125 | 126 | ASSERT_EQ(")", get(symbols[2]).pattern()); 127 | 128 | ASSERT_EQ("EOF", get(symbols[3]).name); 129 | ASSERT_EQ("<>", get(symbols[3]).pattern()); 130 | } 131 | 132 | // vim:ts=4:sw=4:noet 133 | -------------------------------------------------------------------------------- /src/klex/util/iterator_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace std; 17 | using namespace klex::util; 18 | 19 | TEST(util_iterator_reversed, empty) 20 | { 21 | const vector v; 22 | auto x = reversed(v); 23 | auto i = begin(x); 24 | ASSERT_TRUE(i == end(x)); 25 | } 26 | 27 | TEST(util_iterator_reversed, one) 28 | { 29 | const vector v { 1 }; 30 | auto x = reversed(v); 31 | auto i = begin(x); 32 | ASSERT_EQ(1, *i); 33 | i++; 34 | ASSERT_TRUE(i == end(x)); 35 | } 36 | 37 | TEST(util_iterator_reversed, many) 38 | { 39 | const vector v { 1, 2, 3 }; 40 | auto x = reversed(v); 41 | auto i = begin(x); 42 | ASSERT_EQ(3, *i); 43 | i++; 44 | ASSERT_EQ(2, *i); 45 | i++; 46 | ASSERT_EQ(1, *i); 47 | i++; 48 | ASSERT_TRUE(i == end(x)); 49 | } 50 | 51 | TEST(util_iterator_indexed, many_const) 52 | { 53 | const vector v { 10, 20, 30 }; 54 | const auto x = indexed(v); 55 | static_assert(is_const::value); 56 | auto i = begin(x); 57 | 58 | ASSERT_EQ(0, (*i).first); 59 | ASSERT_EQ(10, (*i).second); 60 | i++; 61 | 62 | ASSERT_EQ(1, (*i).first); 63 | ASSERT_EQ(20, (*i).second); 64 | i++; 65 | 66 | ASSERT_EQ(2, (*i).first); 67 | ASSERT_EQ(30, (*i).second); 68 | i++; 69 | 70 | ASSERT_TRUE(i == end(x)); 71 | } 72 | 73 | TEST(util_iterator_indexed, many) 74 | { 75 | vector v { "zero", "one", "two" }; 76 | auto x = indexed(v); 77 | auto i = begin(x); 78 | 79 | ASSERT_EQ(0, (*i).first); 80 | ASSERT_EQ("zero", (*i).second); 81 | i++; 82 | 83 | ASSERT_EQ(1, (*i).first); 84 | ASSERT_EQ("one", (*i).second); 85 | i++; 86 | 87 | ASSERT_EQ(2, (*i).first); 88 | ASSERT_EQ("two", (*i).second); 89 | i++; 90 | 91 | ASSERT_TRUE(i == end(x)); 92 | } 93 | 94 | TEST(util_iterator_indexed, range_based_for_loop) 95 | { 96 | log("const:"); 97 | const vector v1 { 10, 20, 30 }; 98 | for (const auto&& [index, value]: indexed(v1)) 99 | logf("index {}, value {}", index, value); 100 | 101 | log("non-const:"); 102 | vector v2 { 10, 20, 30 }; 103 | for (const auto&& [index, value]: indexed(v2)) 104 | logf("index {}, value {}", index, value); 105 | } 106 | 107 | TEST(util_iterator_filter, for_range) 108 | { 109 | const vector nums = { 1, 2, 3, 4 }; 110 | vector odds; 111 | for (const int i: filter(nums, [](int x) { return x % 2 != 0; })) 112 | odds.push_back(i); 113 | 114 | ASSERT_EQ(2, odds.size()); 115 | EXPECT_EQ(1, odds[0]); 116 | EXPECT_EQ(3, odds[1]); 117 | } 118 | 119 | TEST(util_iterator_filter, count_proc_invocations) 120 | { 121 | static const array numbers = { 1, 2, 3, 4 }; 122 | int count = 0; 123 | auto counter = [&](int) { 124 | ++count; 125 | return true; 126 | }; 127 | const auto f = filter(numbers, counter); 128 | for_each(begin(f), end(f), [](int) {}); 129 | ASSERT_EQ(4, count); 130 | } 131 | 132 | TEST(util_iterator_filter, for_range_initializer_list) 133 | { 134 | static const array numbers = { 1, 2, 3, 4 }; 135 | vector odds; 136 | auto f_odd = [&](int x) { 137 | logf("f_odd: x={0}", x); 138 | return x % 2 != 0; 139 | }; 140 | for (const int i: filter(numbers, f_odd)) 141 | odds.push_back(i); 142 | 143 | ASSERT_EQ(2, odds.size()); 144 | EXPECT_EQ(1, odds[0]); 145 | EXPECT_EQ(3, odds[1]); 146 | } 147 | 148 | TEST(util_iterator_translate, vector) 149 | { 150 | const vector in { 1, 2, 3, 4 }; 151 | const vector out = translate(in, [](int i) -> int { return i * 2; }); 152 | 153 | for (const auto&& [i, v]: indexed(out)) 154 | logf("out[{}] = {}", i, v); 155 | 156 | ASSERT_EQ(4, out.size()); 157 | 158 | EXPECT_EQ(2, out[0]); 159 | EXPECT_EQ(4, out[1]); 160 | EXPECT_EQ(6, out[2]); 161 | EXPECT_EQ(8, out[3]); 162 | } 163 | 164 | TEST(util_iterator_translate, chain_translate_join) 165 | { 166 | const vector in { 1, 2, 3, 4 }; 167 | const string out { join(translate(in, [](int i) -> string { return to_string(i); }), ", ") }; 168 | 169 | ASSERT_EQ("1, 2, 3, 4", out); 170 | } 171 | 172 | TEST(util_iterator, find_last) 173 | { 174 | const vector v { 1, 2, 3, 4 }; 175 | const auto i = find_last(v, [](int i) { return i % 2 != 0; }); // find last odd value -> 3 176 | 177 | ASSERT_TRUE(i != end(v)); 178 | ASSERT_EQ(3, *i); 179 | } 180 | -------------------------------------------------------------------------------- /src/klex/regular/Symbols.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | 10 | #include 11 | 12 | using namespace std; 13 | 14 | namespace klex::regular 15 | { 16 | 17 | string prettySymbol(Symbol input) 18 | { 19 | switch (input) 20 | { 21 | case Symbols::Error: return "<>"; 22 | case Symbols::BeginOfLine: return "<>"; 23 | case Symbols::EndOfLine: return "<>"; 24 | case Symbols::EndOfFile: return "<>"; 25 | case Symbols::Epsilon: return "ε"; 26 | case '\a': return "\\a"; 27 | case '\b': return "\\b"; 28 | case '\f': return "\\f"; 29 | case '\n': return "\\n"; 30 | case '\r': return "\\r"; 31 | case ' ': return "\\s"; 32 | case '\t': return "\\t"; 33 | case '\v': return "\\v"; 34 | case '\0': return "\\0"; 35 | case '.': return "\\."; // so we can distinguish from dot-operator 36 | default: 37 | if (isprint(input)) 38 | { 39 | return fmt::format("{}", (char) input); 40 | } 41 | else 42 | { 43 | return fmt::format("\\x{:02x}", input); 44 | } 45 | } 46 | } 47 | 48 | string prettyCharRange(Symbol ymin, Symbol ymax) 49 | { 50 | assert(ymin <= ymax); 51 | 52 | stringstream sstr; 53 | switch (ymax - ymin) 54 | { 55 | case 0: sstr << prettySymbol(ymin); break; 56 | case 1: sstr << prettySymbol(ymin) << prettySymbol(ymin + 1); break; 57 | case 2: sstr << prettySymbol(ymin) << prettySymbol(ymin + 1) << prettySymbol(ymax); break; 58 | default: sstr << prettySymbol(ymin) << '-' << prettySymbol(ymax); break; 59 | } 60 | return sstr.str(); 61 | } 62 | 63 | string groupCharacterClassRanges(const vector& syms) 64 | { 65 | // {1,3,5,a,b,c,d,e,f,z] 66 | // -> 67 | // {{1}, {3}, {5}, {a-f}, {z}} 68 | 69 | stringstream sstr; 70 | Symbol ymin = '\0'; 71 | Symbol ymax = ymin; 72 | int k = 0; 73 | 74 | for (size_t i = 0, e = syms.size(); i != e; ++i) 75 | { 76 | if (!syms[i]) 77 | continue; 78 | 79 | const Symbol c = (Symbol) i; 80 | if (c == ymax + 1) 81 | { // range growing 82 | ymax = c; 83 | } 84 | else 85 | { // gap found 86 | if (k) 87 | { 88 | sstr << prettyCharRange(ymin, ymax); 89 | } 90 | ymin = ymax = c; 91 | } 92 | k++; 93 | } 94 | sstr << prettyCharRange(ymin, ymax); 95 | 96 | return sstr.str(); 97 | } 98 | 99 | string groupCharacterClassRanges(vector chars) 100 | { 101 | // we took a copy in tgroup here, so I can sort() later 102 | sort(chars.begin(), chars.end()); 103 | 104 | if (chars.size() == 1) 105 | return prettySymbol(chars.front()); 106 | 107 | // {1,3,5,a,b,c,d,e,f,z] 108 | // -> 109 | // "123a-fz" 110 | 111 | stringstream sstr; 112 | Symbol ymin = 0; 113 | Symbol ymax = ymin; 114 | int i = 0; 115 | 116 | for (Symbol c: chars) 117 | { 118 | if (c == ymax + 1) 119 | { // range growing 120 | ymax = c; 121 | } 122 | else 123 | { // gap found 124 | if (i) 125 | { 126 | sstr << prettyCharRange(ymin, ymax); 127 | } 128 | ymin = ymax = c; 129 | } 130 | i++; 131 | } 132 | sstr << prettyCharRange(ymin, ymax); 133 | 134 | return sstr.str(); 135 | } 136 | 137 | SymbolSet::SymbolSet(DotMode): set_(256, true), size_ { 255 }, hash_ { 2166136261 } 138 | { 139 | set_[(size_t) '\n'] = false; 140 | for (Symbol s: *this) 141 | { 142 | hash_ = (hash_ * 16777619) ^ s; 143 | } 144 | } 145 | 146 | bool SymbolSet::isDot() const noexcept 147 | { 148 | static SymbolSet dot(SymbolSet::Dot); 149 | return *this == dot; 150 | } 151 | 152 | string SymbolSet::to_string() const 153 | { 154 | if (isDot()) 155 | return "."; 156 | 157 | return groupCharacterClassRanges(set_); 158 | } 159 | 160 | void SymbolSet::complement() 161 | { 162 | // flip bits 163 | for (size_t i = 0, e = set_.size(); i != e; ++i) 164 | { 165 | set_[i] = !set_[i]; 166 | } 167 | 168 | // flip size 169 | size_ = set_.size() - size_; 170 | 171 | recalculateHash(); 172 | } 173 | 174 | void SymbolSet::recalculateHash() 175 | { 176 | // recalculate hash 177 | hash_ = 2166136261; 178 | for (Symbol s: *this) 179 | { 180 | hash_ = (hash_ * 16777619) ^ s; 181 | } 182 | } 183 | 184 | } // namespace klex::regular 185 | -------------------------------------------------------------------------------- /src/klex/regular/DFA.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace klex::regular { 17 | 18 | class NFA; 19 | class DFABuilder; 20 | class DotVisitor; 21 | 22 | /** 23 | * Represents a deterministic finite automaton. 24 | */ 25 | class DFA { 26 | public: 27 | using TransitionMap = std::map; 28 | struct State { 29 | // std::vector states; 30 | TransitionMap transitions; 31 | }; 32 | using StateVec = std::vector; 33 | 34 | //! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream 35 | //! to. 36 | using BacktrackingMap = std::map; 37 | 38 | DFA(const DFA& other) = delete; 39 | DFA& operator=(const DFA& other) = delete; 40 | DFA(DFA&&) = default; 41 | DFA& operator=(DFA&&) = default; 42 | ~DFA() = default; 43 | 44 | DFA() : states_{}, initialState_{0}, backtrackStates_{}, acceptTags_{} {} 45 | 46 | [[nodiscard]] bool empty() const noexcept { return states_.empty(); } 47 | [[nodiscard]] size_t size() const noexcept { return states_.size(); } 48 | 49 | [[nodiscard]] StateId lastState() const noexcept 50 | { 51 | assert(!empty()); 52 | return states_.size() - 1; 53 | } 54 | 55 | //! Retrieves the alphabet of this finite automaton. 56 | Alphabet alphabet() const; 57 | 58 | //! Retrieves the initial state. 59 | StateId initialState() const { return initialState_; } 60 | 61 | //! Retrieves the list of available states. 62 | const StateVec& states() const { return states_; } 63 | StateVec& states() { return states_; } 64 | 65 | StateIdVec stateIds() const 66 | { 67 | StateIdVec v; 68 | v.reserve(states_.size()); 69 | for (size_t i = 0, e = states_.size(); i != e; ++i) 70 | v.push_back(i); // funny, I know 71 | return v; 72 | } 73 | 74 | //! Retrieves the list of accepting states. 75 | std::vector acceptStates() const; 76 | 77 | /** 78 | * Traverses all states and edges in this NFA and calls @p visitor for each state & edge. 79 | * 80 | * Use this function to e.g. get a GraphViz dot-file drawn. 81 | */ 82 | void visit(DotVisitor& visitor) const; 83 | 84 | void createStates(size_t count); 85 | 86 | void setInitialState(StateId state); 87 | 88 | const TransitionMap& stateTransitions(StateId id) const 89 | { 90 | return states_[static_cast(id)].transitions; 91 | } 92 | 93 | // {{{ backtracking (for lookahead) 94 | void setBacktrack(StateId from, StateId to) { backtrackStates_[from] = to; } 95 | 96 | std::optional backtrack(StateId acceptState) const 97 | { 98 | if (auto i = backtrackStates_.find(acceptState); i != backtrackStates_.end()) 99 | return i->second; 100 | 101 | return std::nullopt; 102 | } 103 | 104 | const BacktrackingMap& backtracking() const noexcept { return backtrackStates_; } 105 | // }}} 106 | 107 | //! Flags given state as accepting-state with given Tag @p acceptTag. 108 | void setAccept(StateId state, Tag acceptTag) { acceptTags_[state] = acceptTag; } 109 | 110 | bool isAccepting(StateId s) const { return acceptTags_.find(s) != acceptTags_.end(); } 111 | 112 | std::optional acceptTag(StateId s) const 113 | { 114 | if (auto i = acceptTags_.find(s); i != acceptTags_.end()) 115 | return i->second; 116 | 117 | return std::nullopt; 118 | } 119 | 120 | std::optional delta(StateId state, Symbol symbol) const 121 | { 122 | const auto& T = states_[state].transitions; 123 | if (auto i = T.find(symbol); i != T.end()) 124 | return i->second; 125 | 126 | return std::nullopt; 127 | } 128 | 129 | void setTransition(StateId from, Symbol symbol, StateId to); 130 | void removeTransition(StateId from, Symbol symbol); 131 | 132 | StateIdVec nonAcceptStates() const 133 | { 134 | StateIdVec result; 135 | result.reserve( 136 | std::abs(static_cast(states_.size()) - static_cast(acceptTags_.size()))); 137 | 138 | for (StateId s = 0, sE = size(); s != sE; ++s) 139 | if (!isAccepting(s)) 140 | result.push_back(s); 141 | 142 | return result; 143 | } 144 | 145 | bool isAcceptor(Tag t) const 146 | { 147 | for (const std::pair& p : acceptTags_) 148 | if (p.second == t) 149 | return true; 150 | 151 | return false; 152 | } 153 | 154 | StateId append(DFA&& other, StateId q0); 155 | 156 | private: 157 | void prepareStateIds(StateId baseId, StateId q0); 158 | 159 | private: 160 | StateVec states_; 161 | StateId initialState_; 162 | BacktrackingMap backtrackStates_; 163 | AcceptMap acceptTags_; 164 | }; 165 | 166 | } // namespace klex::regular 167 | -------------------------------------------------------------------------------- /src/klex/regular/RegExpr.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | /* 20 | REGULAR EXPRESSION SYNTAX: 21 | -------------------------- 22 | 23 | expr := alternation 24 | alternation := concatenation ('|' concatenation)* 25 | concatenation := closure (closure)* 26 | closure := atom ['*' | '?' | '{' NUM [',' NUM] '}'] 27 | atom := character | characterClass | '(' expr ')' 28 | characterClass := '[' ['^'] characterClassFragment+ ']' 29 | characterClassFragment := character | character '-' character 30 | */ 31 | 32 | namespace klex::regular 33 | { 34 | 35 | auto embrace(const RegExpr& outer, const RegExpr& inner) 36 | { 37 | if (precedence(outer) > precedence(inner)) 38 | return "(" + to_string(inner) + ")"; 39 | else 40 | return to_string(inner); 41 | } 42 | 43 | std::string to_string(const RegExpr& re) 44 | { 45 | return visit( 46 | overloaded { 47 | [&](const ClosureExpr& e) { 48 | stringstream sstr; 49 | sstr << embrace(re, *e.subExpr); 50 | if (e.minimumOccurrences == 0 && e.maximumOccurrences == 1) 51 | sstr << '?'; 52 | else if (e.minimumOccurrences == 0 && e.maximumOccurrences == numeric_limits::max()) 53 | sstr << '*'; 54 | else if (e.minimumOccurrences == 1 && e.maximumOccurrences == numeric_limits::max()) 55 | sstr << '+'; 56 | else 57 | sstr << '{' << e.minimumOccurrences << ',' << e.maximumOccurrences << '}'; 58 | return sstr.str(); 59 | }, 60 | [&](const AlternationExpr& e) { return embrace(re, *e.left) + "|" + embrace(re, *e.right); }, 61 | [&](const ConcatenationExpr& e) { return embrace(re, *e.left) + embrace(re, *e.right); }, 62 | [&](const LookAheadExpr& e) { return embrace(re, *e.left) + "/" + embrace(re, *e.right); }, 63 | [](const CharacterExpr& e) { return string(1, e.value); }, 64 | [](const EndOfFileExpr& e) { return string { "<>" }; }, 65 | [](const BeginOfLineExpr& e) { return string { "^" }; }, 66 | [](const EndOfLineExpr& e) { return string { "$" }; }, 67 | [](const CharacterClassExpr& e) { return e.symbols.to_string(); }, 68 | [](const DotExpr& e) { return string { "." }; }, 69 | [](const EmptyExpr& e) { return string {}; }, 70 | }, 71 | re); 72 | } 73 | 74 | int precedence(const RegExpr& regex) 75 | { 76 | return visit(overloaded { 77 | [](const AlternationExpr& e) { return 1; }, 78 | [](const BeginOfLineExpr& e) { return 4; }, 79 | [](const CharacterClassExpr& e) { return 4; }, 80 | [](const CharacterExpr& e) { return 4; }, 81 | [](const ClosureExpr& e) { return 3; }, 82 | [](const ConcatenationExpr& e) { return 2; }, 83 | [](const DotExpr& e) { return 4; }, 84 | [](const EmptyExpr& e) { return 4; }, 85 | [](const EndOfFileExpr& e) { return 4; }, 86 | [](const EndOfLineExpr& e) { return 4; }, 87 | [](const LookAheadExpr& e) { return 0; }, 88 | }, 89 | regex); 90 | } 91 | 92 | bool containsBeginOfLine(const RegExpr& regex) 93 | { 94 | return visit(overloaded { 95 | [](const AlternationExpr& e) { 96 | return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right); 97 | }, 98 | [](const BeginOfLineExpr& e) { return true; }, 99 | [](const CharacterClassExpr& e) { return false; }, 100 | [](const CharacterExpr& e) { return false; }, 101 | [](const ClosureExpr& e) { return containsBeginOfLine(*e.subExpr); }, 102 | [](const ConcatenationExpr& e) { 103 | return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right); 104 | }, 105 | [](const DotExpr& e) { return false; }, 106 | [](const EmptyExpr& e) { return false; }, 107 | [](const EndOfFileExpr& e) { return false; }, 108 | [](const EndOfLineExpr& e) { return false; }, 109 | [](const LookAheadExpr& e) { 110 | return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right); 111 | }, 112 | }, 113 | regex); 114 | } 115 | 116 | } // namespace klex::regular 117 | -------------------------------------------------------------------------------- /src/klex/regular/RuleParser.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace klex::regular { 20 | 21 | class RuleParser { 22 | public: 23 | explicit RuleParser(std::unique_ptr input, int firstTerminalId = FirstUserTag); 24 | explicit RuleParser(std::string input, int firstTerminalId = FirstUserTag); 25 | 26 | RuleList parseRules(); 27 | 28 | class UnexpectedChar; 29 | class UnexpectedToken; 30 | class InvalidRuleOption; 31 | class InvalidRefRuleWithConditions; 32 | class DuplicateRule; 33 | 34 | private: 35 | void parseRule(RuleList& rules); 36 | std::vector parseRuleConditions(); 37 | void parseBasicRule(RuleList& rules, std::vector&& conditions); 38 | std::string parseExpression(); 39 | 40 | private: 41 | std::string consumeToken(); 42 | void consumeAnySP(); 43 | void consumeSP(); 44 | void consumeAssoc(); 45 | void consumeSpace(); 46 | char currentChar() const noexcept; 47 | char consumeChar(char ch); 48 | char consumeChar(); 49 | bool eof() const noexcept; 50 | std::string replaceRefs(const std::string& pattern); 51 | 52 | private: 53 | std::unique_ptr stream_; 54 | std::map refRules_; 55 | Rule* lastParsedRule_; 56 | bool lastParsedRuleIsRef_; 57 | char currentChar_; 58 | unsigned int line_; 59 | unsigned int column_; 60 | unsigned int offset_; 61 | int nextTag_; 62 | }; 63 | 64 | class RuleParser::InvalidRefRuleWithConditions : public std::runtime_error { 65 | public: 66 | InvalidRefRuleWithConditions(unsigned line, unsigned column, Rule&& rule) 67 | : std::runtime_error{fmt::format( 68 | "{}:{}: Invalid rule \"{}\". Reference rules must not be labelled with conditions.", line, 69 | column, rule.name)}, 70 | rule_{std::move(rule)} 71 | { 72 | } 73 | 74 | const Rule& rule() const noexcept { return rule_; } 75 | 76 | private: 77 | const Rule rule_; 78 | }; 79 | 80 | class RuleParser::DuplicateRule : public std::runtime_error { 81 | public: 82 | DuplicateRule(Rule&& duplicate, const Rule& other) 83 | : std::runtime_error{fmt::format( 84 | "{}:{}: Duplicated rule definition with name \"{}\", previously defined in {}:{}.", 85 | duplicate.line, duplicate.column, duplicate.name, other.line, other.column)}, 86 | duplicate_{std::move(duplicate)}, 87 | other_{other} 88 | { 89 | } 90 | 91 | const Rule& duplicate() const noexcept { return duplicate_; } 92 | const Rule& other() const noexcept { return other_; } 93 | 94 | private: 95 | const Rule duplicate_; 96 | const Rule& other_; 97 | }; 98 | 99 | class RuleParser::UnexpectedToken : public std::runtime_error { 100 | public: 101 | UnexpectedToken(unsigned offset, char actual, std::string expected) 102 | : std::runtime_error{fmt::format("{}: Unexpected token {}, expected <{}> instead.", offset, actual, 103 | expected)}, 104 | offset_{offset}, 105 | actual_{std::move(actual)}, 106 | expected_{std::move(expected)} 107 | { 108 | } 109 | 110 | unsigned offset() const noexcept { return offset_; } 111 | char actual() const noexcept { return actual_; } 112 | const std::string& expected() const noexcept { return expected_; } 113 | 114 | private: 115 | unsigned offset_; 116 | char actual_; 117 | std::string expected_; 118 | }; 119 | 120 | class RuleParser::UnexpectedChar : public std::runtime_error { 121 | public: 122 | UnexpectedChar(unsigned int line, unsigned int column, char actual, char expected) 123 | : std::runtime_error{fmt::format("[{}:{}] Unexpected char {}, expected {} instead.", line, column, 124 | quoted(actual), quoted(expected))}, 125 | line_{line}, 126 | column_{column}, 127 | actual_{actual}, 128 | expected_{expected} 129 | { 130 | } 131 | 132 | unsigned int line() const noexcept { return line_; } 133 | unsigned int column() const noexcept { return column_; } 134 | char actual() const noexcept { return actual_; } 135 | char expected() const noexcept { return expected_; } 136 | 137 | private: 138 | static std::string quoted(char ch) 139 | { 140 | if (ch < 0) 141 | return "<>"; 142 | else 143 | return fmt::format("'{}'", ch); 144 | } 145 | 146 | private: 147 | unsigned int line_; 148 | unsigned int column_; 149 | char actual_; 150 | char expected_; 151 | }; 152 | 153 | class RuleParser::InvalidRuleOption : public std::runtime_error { 154 | public: 155 | InvalidRuleOption(unsigned offset, std::string option) 156 | : std::runtime_error{fmt::format("{}: Invalid rule option \"{}\".", offset, option)}, 157 | offset_{offset}, 158 | option_{option} 159 | { 160 | } 161 | 162 | unsigned offset() const noexcept { return offset_; } 163 | const std::string& option() const noexcept { return option_; } 164 | 165 | private: 166 | unsigned offset_; 167 | std::string option_; 168 | }; 169 | 170 | } // namespace klex::regular 171 | -------------------------------------------------------------------------------- /src/klex/regular/DFA.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #if 0 19 | #define DEBUG(msg, ...) \ 20 | do \ 21 | { \ 22 | cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \ 23 | } while (0) 24 | #else 25 | #define DEBUG(msg, ...) \ 26 | do \ 27 | { \ 28 | } while (0) 29 | #endif 30 | 31 | using namespace std; 32 | 33 | namespace klex::regular 34 | { 35 | 36 | Alphabet DFA::alphabet() const 37 | { 38 | Alphabet alphabet; 39 | for (const State& state: states_) 40 | for (const pair& t: state.transitions) 41 | alphabet.insert(t.first); 42 | 43 | return alphabet; 44 | } 45 | 46 | vector DFA::acceptStates() const 47 | { 48 | vector states; 49 | states.reserve(acceptTags_.size()); 50 | for_each(begin(acceptTags_), end(acceptTags_), [&](const pair& s) { 51 | states.push_back(s.first); 52 | }); 53 | return states; 54 | } 55 | 56 | // -------------------------------------------------------------------------- 57 | 58 | void DFA::createStates(size_t count) 59 | { 60 | states_.resize(states_.size() + count); 61 | } 62 | 63 | void DFA::setInitialState(StateId s) 64 | { 65 | // TODO: assert (s is having no predecessors) 66 | initialState_ = s; 67 | } 68 | 69 | void DFA::setTransition(StateId from, Symbol symbol, StateId to) 70 | { 71 | // if (auto i = states_[from].transitions.find(symbol); i != states_[from].transitions.end()) 72 | // fmt::print("overwriting transition! {} --({})--> {} (new: {})\n", from, prettySymbol(symbol), 73 | // i->second, to); 74 | 75 | // XXX assert(s.transitions.find(symbol) == s.transitions.end()); 76 | states_[from].transitions[symbol] = to; 77 | } 78 | 79 | void DFA::removeTransition(StateId from, Symbol symbol) 80 | { 81 | State& s = states_[from]; 82 | if (auto i = s.transitions.find(symbol); i != s.transitions.end()) 83 | s.transitions.erase(i); 84 | } 85 | 86 | StateId DFA::append(DFA&& other, StateId q0) 87 | { 88 | assert(other.initialState() == 0); 89 | 90 | other.prepareStateIds(states_.size(), q0); 91 | 92 | states_.reserve(size() + other.size() - 1); 93 | states_[q0] = other.states_[0]; 94 | states_.insert(states_.end(), next(other.states_.begin()), other.states_.end()); 95 | backtrackStates_.insert(other.backtrackStates_.begin(), other.backtrackStates_.end()); 96 | acceptTags_.insert(other.acceptTags_.begin(), other.acceptTags_.end()); 97 | 98 | return other.initialState(); 99 | } 100 | 101 | void DFA::prepareStateIds(StateId baseId, StateId q0) 102 | { 103 | // adjust transition state IDs 104 | // traverse through each state's transition set 105 | // traverse through each transition in the transition set 106 | // traverse through each element and add BASE_ID 107 | 108 | auto transformId = [baseId, q0, this](StateId s) -> StateId { 109 | // we subtract 1, because we already have a slot for q0 elsewhere (pre-allocated) 110 | return s != initialState_ ? baseId + s - 1 : q0; 111 | }; 112 | 113 | // for each state's transitions 114 | for (State& state: states_) 115 | for (pair& t: state.transitions) 116 | t.second = transformId(t.second); 117 | 118 | AcceptMap remapped; 119 | for (auto& a: acceptTags_) 120 | remapped[transformId(a.first)] = a.second; 121 | acceptTags_ = move(remapped); 122 | 123 | BacktrackingMap backtracking; 124 | for (const auto& bt: backtrackStates_) 125 | backtracking[transformId(bt.first)] = transformId(bt.second); 126 | backtrackStates_ = move(backtracking); 127 | 128 | initialState_ = q0; 129 | } 130 | 131 | void DFA::visit(DotVisitor& v) const 132 | { 133 | v.start(initialState_); 134 | 135 | // STATE: initial 136 | v.visitNode(initialState_, true, isAccepting(initialState_)); 137 | 138 | // STATE: accepting 139 | for (StateId s: acceptStates()) 140 | if (s != initialState_) 141 | v.visitNode(s, false, true); 142 | 143 | // STATE: any other 144 | for (StateId s = 0, sE = lastState(); s != sE; ++s) 145 | if (s != initialState_ && !isAccepting(s)) 146 | v.visitNode(s, false, false); 147 | 148 | // TRANSITIONS 149 | for (StateId s = 0, sE = size(); s != sE; ++s) 150 | { 151 | const TransitionMap& T = states_[s].transitions; 152 | for_each(T.begin(), T.end(), [&](const auto& t) { v.visitEdge(s, t.second, t.first); }); 153 | for_each(T.begin(), T.end(), [&](const auto& t) { v.endVisitEdge(s, t.second); }); 154 | } 155 | v.end(); 156 | } 157 | 158 | } // namespace klex::regular 159 | -------------------------------------------------------------------------------- /examples/mathexpr.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | enum class Token 24 | { 25 | Eof = 1, 26 | Plus, 27 | Minus, 28 | Mul, 29 | Div, 30 | RndOpen, 31 | RndClose, 32 | Number, 33 | INVALID 34 | }; 35 | std::string RULES = R"( 36 | Space(ignore) ::= [\s\t]+ 37 | Eof ::= <> 38 | Plus ::= "+" 39 | Minus ::= "-" 40 | Mul ::= "*" 41 | Div ::= "/" 42 | RndOpen ::= "(" 43 | RndClose ::= \) 44 | Number ::= ([0-9]+|[0-9]{1,3}(_[0-9]{3})*) 45 | INVALID ::= . 46 | )"; 47 | 48 | using Lexable = klex::regular::Lexable; 49 | using Lexer = Lexable::iterator; 50 | using Number = long long int; 51 | 52 | std::string_view to_string(Token t) 53 | { 54 | switch (t) 55 | { 56 | case Token::INVALID: return "<>"; 57 | case Token::Eof: return "<>"; 58 | case Token::RndOpen: return "'('"; 59 | case Token::RndClose: return "')'"; 60 | case Token::Plus: return "'+'"; 61 | case Token::Minus: return "'-'"; 62 | case Token::Mul: return "'*'"; 63 | case Token::Div: return "'/'"; 64 | case Token::Number: return "<>"; 65 | default: abort(); 66 | } 67 | } 68 | 69 | namespace fmt 70 | { 71 | template <> 72 | struct formatter: formatter 73 | { 74 | template 75 | auto format(Token v, FormatContext& ctx) 76 | { 77 | return formatter::format(to_string(v), ctx); 78 | } 79 | }; 80 | } // namespace fmt 81 | 82 | Number expr(Lexer&); 83 | 84 | void consume(Lexer& lexer, Token t) 85 | { 86 | if (lexer.token() != t) 87 | throw std::runtime_error { fmt::format( 88 | "Unexpected token {}. Expected {} instead.", lexer.token(), t) }; 89 | ++lexer; 90 | } 91 | 92 | auto primaryExpr(Lexer& lexer) 93 | { 94 | switch (lexer.token()) 95 | { 96 | case Token::Number: { 97 | std::string s; 98 | std::for_each(begin(literal(lexer)), end(literal(lexer)), [&](char ch) { 99 | if (ch != '_') 100 | s += ch; 101 | }); 102 | auto y = Number { std::stoi(s) }; 103 | ++lexer; 104 | return y; 105 | } 106 | case Token::Minus: return -1 * primaryExpr(++lexer); 107 | case Token::RndOpen: { 108 | auto y = expr(++lexer); 109 | consume(lexer, Token::RndClose); 110 | return y; 111 | } 112 | default: 113 | throw std::runtime_error { fmt::format( 114 | "Unexpected token {}. Expected primary expression instead.", lexer.token()) }; 115 | } 116 | } 117 | 118 | auto mulExpr(Lexer& lexer) 119 | { 120 | auto lhs = primaryExpr(lexer); 121 | for (;;) 122 | { 123 | switch (lexer.token()) 124 | { 125 | case Token::Mul: lhs = lhs * primaryExpr(++lexer); break; 126 | case Token::Div: lhs = lhs / primaryExpr(++lexer); break; 127 | default: return lhs; 128 | } 129 | } 130 | } 131 | 132 | auto addExpr(Lexer& lexer) 133 | { 134 | auto lhs = mulExpr(lexer); 135 | for (;;) 136 | { 137 | switch (lexer.token()) 138 | { 139 | case Token::Plus: lhs = lhs + mulExpr(++lexer); break; 140 | case Token::Minus: lhs = lhs - mulExpr(++lexer); break; 141 | default: return lhs; 142 | } 143 | } 144 | } 145 | 146 | Number expr(Lexer& lexer) 147 | { 148 | return addExpr(lexer); 149 | } 150 | 151 | Number parseExpr(Lexable&& lexer) 152 | { 153 | auto it = begin(lexer); 154 | auto n = expr(it); 155 | consume(it, Token::Eof); 156 | return n; 157 | } 158 | 159 | int main(int argc, const char* argv[]) 160 | { 161 | auto flags = klex::util::Flags {}; 162 | flags.defineBool("dfa", 'x', "Dumps DFA dotfile and exits."); 163 | flags.enableParameters("EXPRESSION", "Mathematical expression to calculate"); 164 | flags.parse(argc, argv); 165 | 166 | auto cc = klex::regular::Compiler {}; 167 | cc.parse(std::make_unique(RULES)); 168 | 169 | if (flags.getBool("dfa")) 170 | { 171 | auto writer = klex::regular::DotWriter { std::cout, "n" }; 172 | auto dfa = cc.compileMinimalDFA(); 173 | dfa.visit(writer); 174 | return EXIT_SUCCESS; 175 | } 176 | 177 | auto input = std::string { argc == 1 ? std::string("2+3*4") : flags.parameters()[0] }; 178 | auto ld = cc.compile(); 179 | 180 | auto n = parseExpr(Lexable { ld, std::make_unique(input) }); 181 | std::cerr << fmt::format("{} = {}\n", input, n); 182 | 183 | return EXIT_SUCCESS; 184 | } 185 | -------------------------------------------------------------------------------- /src/klex/regular/Symbols.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | namespace klex::regular { 22 | 23 | //! input symbol as used for transitions 24 | using Symbol = int; 25 | 26 | std::string prettySymbol(Symbol input); 27 | std::string prettyCharRange(Symbol ymin, Symbol ymax); 28 | std::string groupCharacterClassRanges(const std::vector& syms); 29 | std::string groupCharacterClassRanges(std::vector syms); 30 | 31 | // new way of wrapping up Symbols 32 | struct Symbols { 33 | constexpr static Symbol Epsilon = -1; 34 | constexpr static Symbol Error = -2; 35 | constexpr static Symbol BeginOfLine = -3; 36 | constexpr static Symbol EndOfLine = -4; 37 | constexpr static Symbol EndOfFile = -5; 38 | constexpr static Symbol Character(char ch) { return Symbol(ch); } 39 | 40 | constexpr static bool isSpecial(Symbol s) 41 | { 42 | switch (s) 43 | { 44 | case Symbols::EndOfFile: 45 | case Symbols::EndOfLine: 46 | case Symbols::BeginOfLine: 47 | case Symbols::Epsilon: 48 | case Symbols::Error: 49 | return true; 50 | default: 51 | return false; 52 | } 53 | } 54 | }; 55 | 56 | /** 57 | * Represents a set of symbols. 58 | */ 59 | class SymbolSet { 60 | public: 61 | enum DotMode { Dot }; 62 | 63 | explicit SymbolSet(DotMode); 64 | SymbolSet() : set_(256, false), size_{0}, hash_{2166136261} {} 65 | 66 | explicit SymbolSet(std::initializer_list list) : SymbolSet() 67 | { 68 | std::for_each(list.begin(), list.end(), [this](Symbol s) { insert(s); }); 69 | } 70 | 71 | bool empty() const noexcept { return size_ == 0; } 72 | size_t size() const noexcept { return size_; } 73 | 74 | //! Transforms into the complement set. 75 | void complement(); 76 | 77 | //! Inserts given Symbol @p s into this set. 78 | void insert(Symbol s) 79 | { 80 | if (!contains(s)) 81 | { 82 | set_[s] = true; 83 | hash_ = (hash_ * 16777619) ^ s; 84 | size_++; 85 | } 86 | } 87 | 88 | //! Inserts a range of Simples between [a, b]. 89 | void insert(const std::pair& range) 90 | { 91 | for (Symbol s = range.first; s <= range.second; ++s) 92 | { 93 | insert(s); 94 | } 95 | } 96 | 97 | //! @returns whether or not given Symbol @p s is in this set. 98 | bool contains(Symbol s) const 99 | { 100 | assert(s >= 0 && s <= 255 && "Only ASCII allowed."); 101 | return set_[(size_t) s]; 102 | } 103 | 104 | //! Tests whether or not this SymbolSet can be represented as dot (.), i.e. all but \n. 105 | bool isDot() const noexcept; 106 | 107 | //! @returns a human readable representation of this set 108 | std::string to_string() const; 109 | 110 | bool operator==(const SymbolSet& rhs) const noexcept { return hash_ == rhs.hash_ && set_ == rhs.set_; } 111 | bool operator!=(const SymbolSet& rhs) const noexcept { return !(*this == rhs); } 112 | 113 | class const_iterator { // {{{ 114 | public: 115 | const_iterator(std::vector::const_iterator beg, std::vector::const_iterator end, size_t n) 116 | : beg_{std::move(beg)}, end_{std::move(end)}, offset_{n} 117 | { 118 | while (beg_ != end_ && !*beg_) 119 | { 120 | ++beg_; 121 | ++offset_; 122 | } 123 | } 124 | 125 | Symbol operator*() const { return static_cast(offset_); } 126 | 127 | const_iterator& operator++(int) 128 | { 129 | do 130 | { 131 | ++beg_; 132 | ++offset_; 133 | } while (beg_ != end_ && !*beg_); 134 | return *this; 135 | } 136 | 137 | const_iterator& operator++() 138 | { 139 | do 140 | { 141 | beg_++; 142 | offset_++; 143 | } while (beg_ != end_ && !*beg_); 144 | return *this; 145 | } 146 | 147 | bool operator==(const const_iterator& rhs) const noexcept { return beg_ == rhs.beg_; } 148 | bool operator!=(const const_iterator& rhs) const noexcept { return beg_ != rhs.beg_; } 149 | 150 | private: 151 | std::vector::const_iterator beg_; 152 | std::vector::const_iterator end_; 153 | size_t offset_; 154 | }; // }}} 155 | 156 | const_iterator begin() const { return const_iterator(set_.begin(), set_.end(), 0); } 157 | const_iterator end() const { return const_iterator(set_.end(), set_.end(), set_.size()); } 158 | 159 | size_t hash() const noexcept { return hash_; } 160 | 161 | private: 162 | void recalculateHash(); 163 | 164 | private: 165 | // XXX we chose vector as it is an optimized bit vector 166 | std::vector set_; 167 | size_t size_; 168 | size_t hash_; 169 | }; 170 | 171 | } // namespace klex::regular 172 | 173 | namespace fmt { 174 | template <> 175 | struct formatter { 176 | template 177 | constexpr auto parse(ParseContext& ctx) 178 | { 179 | return ctx.begin(); 180 | } 181 | 182 | template 183 | constexpr auto format(const klex::regular::SymbolSet& v, FormatContext& ctx) 184 | { 185 | return format_to(ctx.out(), "{}", v.to_string()); 186 | } 187 | }; 188 | } // namespace fmt 189 | 190 | namespace std { 191 | template <> 192 | struct hash { 193 | size_t operator()(const klex::regular::SymbolSet& set) const { return set.hash(); } 194 | }; 195 | } // namespace std 196 | -------------------------------------------------------------------------------- /src/klex/cfg/ll/Analyzer_test.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex> 2 | // (c) 2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | using namespace std; 22 | using namespace klex; 23 | using namespace klex::cfg; 24 | using namespace klex::cfg::ll; 25 | using namespace klex::util::literals; 26 | 27 | const string balancedParentheses = "A ::= '(' A ')' | '(' ')'"; 28 | 29 | TEST(cfg_ll_Analyzer, ETF) 30 | { 31 | ConsoleReport report; 32 | Grammar grammar = GrammarParser(R"(`token { 33 | ` Spacing(ignore) ::= [\s\t\n]+ 34 | ` Number ::= [0-9]+ 35 | `} 36 | `Start ::= Expr; 37 | `Expr ::= Term Expr_; 38 | `Expr_ ::= '+' Term Expr_ 39 | ` | ; 40 | `Term ::= Factor Term_; 41 | `Term_ ::= '*' Factor Term_ 42 | ` | ; 43 | `Factor ::= Number 44 | ` | '(' Expr ')' 45 | ` ; 46 | `)"_multiline, 47 | &report) 48 | .parse(); 49 | 50 | ASSERT_FALSE(report.containsFailures()); 51 | grammar.finalize(); 52 | log("GRAMMAR:"); 53 | log(grammar.dump()); 54 | 55 | SyntaxTable st = SyntaxTable::construct(grammar); 56 | 57 | log("SYNTAX TABLE:"); 58 | log(st.dump(grammar)); 59 | 60 | Analyzer parser(st, &report, "2 + 3"); 61 | 62 | const optional result = parser.analyze(); 63 | 64 | ASSERT_FALSE(report.containsFailures()); 65 | ASSERT_TRUE(result.has_value()); 66 | } 67 | 68 | TEST(cfg_ll_Analyzer, action1) 69 | { 70 | BufferedReport report; 71 | Grammar grammar = GrammarParser(R"(` 72 | `token { 73 | ` Spacing(ignore) ::= [\s\t\n]+ 74 | ` Number ::= [0-9]+ 75 | `} 76 | `Start ::= F '+' F {add}; 77 | `F ::= Number {num}; 78 | `)"_multiline, 79 | &report) 80 | .parse(); 81 | ASSERT_FALSE(report.containsFailures()); 82 | grammar.finalize(); 83 | 84 | log("GRAMMAR:"); 85 | log(grammar.dump()); 86 | 87 | SyntaxTable st = SyntaxTable::construct(grammar); 88 | 89 | log("SYNTAX TABLE:"); 90 | log(st.dump(grammar)); 91 | 92 | deque> valueStack; 93 | valueStack.emplace_back(vector()); 94 | const auto actionHandler = [&](int id, const Analyzer& analyzer) -> int { 95 | log(fmt::format("-> run action({}): {}", id, analyzer.actionName(id))); 96 | if (analyzer.actionName(id) == "add") 97 | // S = F '+' F <> {add} 98 | return analyzer.semanticValue(-2) + analyzer.semanticValue(-4); 99 | else if (analyzer.actionName(id) == "num") 100 | return stoi(analyzer.lastLiteral()); // return valueStack[-1] 101 | else 102 | { 103 | log("!!! UNKNOWN ACTION !!!"); 104 | return -1; 105 | } 106 | }; 107 | 108 | Analyzer parser(st, &report, "2 + 3", actionHandler); 109 | optional result = parser.analyze(); 110 | 111 | ASSERT_TRUE(result.has_value()); 112 | ASSERT_EQ(5, *result); 113 | } 114 | 115 | TEST(cfg_ll_Analyzer, ETF_with_actions) 116 | { 117 | ConsoleReport report; 118 | Grammar grammar = GrammarParser( 119 | R"(`token { 120 | ` Spacing(ignore) ::= [\s\t\n]+ 121 | ` Number ::= [0-9]+ 122 | `} 123 | `Start ::= Expr; 124 | `Expr ::= Term Expr_ 125 | ` ; 126 | `Expr_ ::= '+' Term Expr_ {add} 127 | ` | 128 | ` ; 129 | `Term ::= Factor Term_ 130 | ` ; 131 | `Term_ ::= '*' Factor Term_ {mul} 132 | ` | 133 | ` ; 134 | `Factor ::= Number {num} 135 | ` | '(' Expr ')' 136 | ` ; 137 | `)"_multiline, 138 | &report) 139 | .parse(); 140 | 141 | ASSERT_FALSE(report.containsFailures()); 142 | grammar.finalize(); 143 | log("GRAMMAR:"); 144 | log(grammar.dump()); 145 | 146 | SyntaxTable st = SyntaxTable::construct(grammar); 147 | log("SYNTAX TABLE:"); 148 | log(st.dump(grammar)); 149 | 150 | stack stack; 151 | const map&)>> actionMap { 152 | { st.actionId("num"), 153 | [&](const Analyzer& analyzer) -> int { 154 | return stoi(analyzer.lastLiteral()); 155 | } }, 156 | { st.actionId("add"), 157 | [&](const Analyzer& analyzer) -> int { 158 | return analyzer.semanticValue(-2) + analyzer.semanticValue(-4); 159 | } }, 160 | { st.actionId("mul"), 161 | [&](const Analyzer& analyzer) -> int { 162 | return analyzer.semanticValue(-2) * analyzer.semanticValue(-4); 163 | } }, 164 | }; 165 | 166 | const auto actionHandler = [&](int id, const Analyzer& analyzer) -> int { 167 | if (const auto x = actionMap.find(id); x != actionMap.end()) 168 | { 169 | log(fmt::format("-> run action({}): {}", id, analyzer.actionName(id))); 170 | return x->second(analyzer); 171 | } 172 | assert(!"woot"); 173 | return 0; 174 | }; 175 | 176 | ASSERT_FALSE(report.containsFailures()); 177 | Analyzer parser(st, &report, "2 + 3 * 4", actionHandler); 178 | optional result = parser.analyze(); 179 | 180 | EXPECT_FALSE(report.containsFailures()); 181 | ASSERT_TRUE(result.has_value()); 182 | // TODO EXPECT_EQ(14, *result); 183 | } 184 | 185 | // vim:ts=4:sw=4:noet 186 | -------------------------------------------------------------------------------- /src/klex/util/Flags.h: -------------------------------------------------------------------------------- 1 | // This file is part of the "x0" project, // http://github.com/christianparpart/x0> 2 | // (c) 2009-2018 Christian Parpart 3 | // 4 | // Licensed under the MIT License (the "License"); you may not use this 5 | // file except in compliance with the License. You may obtain a copy of 6 | // the License at: http://opensource.org/licenses/MIT 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | namespace klex::util { 20 | 21 | class Flags { 22 | public: 23 | enum class FlagType { 24 | String, 25 | Number, 26 | Float, 27 | Bool, 28 | }; 29 | 30 | // FlagPassingStyle 31 | enum FlagStyle { ShortSwitch, LongSwitch, ShortWithValue, LongWithValue, UnnamedParameter }; 32 | 33 | enum class ErrorCode { 34 | TypeMismatch, 35 | UnknownOption, 36 | MissingOption, 37 | MissingOptionValue, 38 | NotFound, 39 | }; 40 | 41 | class Error : public std::runtime_error { 42 | public: 43 | Error(ErrorCode code, std::string arg); 44 | 45 | ErrorCode code() const noexcept { return code_; } 46 | const std::string& arg() const noexcept { return arg_; } 47 | 48 | private: 49 | ErrorCode code_; 50 | std::string arg_; 51 | }; 52 | 53 | struct FlagDef; 54 | class Flag; 55 | 56 | Flags(); 57 | 58 | std::string getString(const std::string& flag) const; 59 | std::string asString(const std::string& flag) const; 60 | long int getNumber(const std::string& flag) const; 61 | float getFloat(const std::string& flag) const; 62 | bool getBool(const std::string& flag) const; 63 | 64 | const std::vector& parameters() const; 65 | void setParameters(const std::vector& v); 66 | 67 | size_t size() const { return set_.size(); } 68 | 69 | std::string to_s() const; 70 | 71 | void set(const Flag& flag); 72 | void set(const std::string& opt, const std::string& val, FlagStyle fs, FlagType ft); 73 | bool isSet(const std::string& flag) const; 74 | 75 | Flags& defineString(const std::string& longOpt, char shortOpt, const std::string& valuePlaceholder, 76 | const std::string& helpText, std::optional defaultValue = std::nullopt, 77 | std::function callback = nullptr); 78 | 79 | Flags& defineNumber(const std::string& longOpt, char shortOpt, const std::string& valuePlaceholder, 80 | const std::string& helpText, std::optional defaultValue = std::nullopt, 81 | std::function callback = nullptr); 82 | 83 | Flags& defineFloat(const std::string& longOpt, char shortOpt, const std::string& valuePlaceholder, 84 | const std::string& helpText, std::optional defaultValue = std::nullopt, 85 | std::function callback = nullptr); 86 | 87 | Flags& defineBool(const std::string& longOpt, char shortOpt, const std::string& helpText, 88 | std::function callback = nullptr); 89 | 90 | Flags& enableParameters(const std::string& valuePlaceholder, const std::string& helpText); 91 | 92 | std::string helpText(std::string_view const& header = "") const { return helpText(header, 78, 30); } 93 | std::string helpText(std::string_view const& header, size_t width, size_t helpTextOffset) const; 94 | 95 | const FlagDef* findDef(const std::string& longOption) const; 96 | const FlagDef* findDef(char shortOption) const; 97 | 98 | void parse(int argc, const char* argv[]); 99 | void parse(const std::vector& args); 100 | 101 | // Attempts to parse given arguments and returns an error code in case of parsing errors instead 102 | // of throwing. 103 | std::error_code tryParse(const std::vector& args); 104 | 105 | private: 106 | Flags& define(const std::string& longOpt, char shortOpt, bool required, FlagType type, 107 | const std::string& helpText, const std::string& valuePlaceholder, 108 | const std::optional& defaultValue, 109 | std::function callback); 110 | 111 | private: 112 | std::list flagDefs_; 113 | bool parametersEnabled_; // non-option parameters enabled? 114 | std::string parametersPlaceholder_; 115 | std::string parametersHelpText_; 116 | 117 | typedef std::pair FlagValue; 118 | std::unordered_map set_; 119 | std::vector raw_; 120 | }; 121 | 122 | struct Flags::FlagDef { 123 | FlagType type; 124 | std::string longOption; 125 | char shortOption; 126 | bool required; 127 | std::string valuePlaceholder; 128 | std::string helpText; 129 | std::optional defaultValue; 130 | std::function callback; 131 | 132 | std::string makeHelpText(size_t width, size_t helpTextOffset) const; 133 | }; 134 | 135 | class Flags::Flag { 136 | public: 137 | Flag(const std::string& opt, const std::string& val, FlagStyle fs, FlagType ft); 138 | 139 | explicit Flag(char shortOpt); 140 | Flag(char shortOpt, const std::string& val); 141 | Flag(const std::string& longOpt); 142 | Flag(const std::string& longOpt, const std::string& val); 143 | 144 | FlagType type() const { return type_; } 145 | const std::string& name() const { return name_; } 146 | const std::string& value() const { return value_; } 147 | 148 | private: 149 | FlagType type_; 150 | FlagStyle style_; 151 | std::string name_; 152 | std::string value_; 153 | }; 154 | 155 | class FlagsErrorCategory : public std::error_category { 156 | public: 157 | static FlagsErrorCategory& get(); 158 | 159 | const char* name() const noexcept override; 160 | std::string message(int ec) const override; 161 | }; 162 | 163 | std::error_code make_error_code(Flags::ErrorCode errc); 164 | 165 | } // namespace klex::util 166 | 167 | namespace std { 168 | template <> 169 | struct is_error_code_enum : public std::true_type { 170 | }; 171 | } // namespace std 172 | --------------------------------------------------------------------------------