├── .dockerignore
├── examples
    ├── demo.flow
    ├── flowlexer.cpp
    ├── cxx.klex
    ├── wordcount.cpp
    ├── flow.klax
    ├── flow.klex
    └── mathexpr.cpp
├── test
    ├── good.klex
    ├── overshadowed.klex
    └── multiple_conditions.klex
├── .gitignore
├── .gitmodules
├── .github
    └── FUNDING.yml
├── klex.pc.cmake
├── src
    └── klex
    │   ├── sysconfig.h.cmake
    │   ├── klex_test.cpp
    │   ├── regular
    │       ├── State_test.cpp
    │       ├── MultiDFA.h
    │       ├── DotVisitor.h
    │       ├── State.cpp
    │       ├── MultiDFA.cpp
    │       ├── DFABuilder_test.cpp
    │       ├── State.h
    │       ├── TransitionMap-inl.h
    │       ├── Alphabet.cpp
    │       ├── Alphabet.h
    │       ├── NFABuilder.h
    │       ├── DotWriter_test.cpp
    │       ├── DFAMinimizer.h
    │       ├── TransitionMap.h
    │       ├── DFABuilder.h
    │       ├── RegExpr.h
    │       ├── DotWriter.h
    │       ├── LexerDef.h
    │       ├── NFA_test.cpp
    │       ├── Symbols_test.cpp
    │       ├── RegExprParser.h
    │       ├── Compiler.h
    │       ├── DotWriter.cpp
    │       ├── NFABuilder.cpp
    │       ├── Rule.h
    │       ├── Symbols.cpp
    │       ├── DFA.h
    │       ├── RegExpr.cpp
    │       ├── RuleParser.h
    │       ├── DFA.cpp
    │       └── Symbols.h
    │   ├── cfg
    │       ├── Grammar-inl.h
    │       ├── GrammarValidator.h
    │       ├── GrammarValidator.cpp
    │       ├── GrammarParser.h
    │       ├── LeftRecursion.h
    │       ├── ll
    │       │   ├── SyntaxTable_test.cpp
    │       │   ├── README.md
    │       │   ├── SyntaxTable.h
    │       │   ├── Analyzer.h
    │       │   └── Analyzer_test.cpp
    │       ├── GrammarLexer_test.cpp
    │       ├── GrammarLexer.h
    │       ├── LeftRecursion_test.cpp
    │       ├── GrammarLexer.cpp
    │       ├── LeftRecursion.cpp
    │       └── GrammarParser_test.cpp
    │   ├── util
    │       ├── overloaded.h
    │       ├── IntVector.h
    │       ├── literals.h
    │       ├── UnboxedRange.h
    │       ├── iterator.h
    │       ├── AnsiColor.h
    │       ├── iterator-detail.h
    │       ├── iterator_test.cpp
    │       └── Flags.h
    │   ├── SourceLocation.cpp
    │   ├── SourceLocation.h
    │   ├── CharStream.h
    │   └── Report.cpp
├── .editorconfig
├── cmake
    ├── ClangTidy.cmake
    ├── EnableCcache.cmake
    └── mklex.cmake
├── .travis.yml
├── klex.vim
├── TODO.md
├── appveyor.yml
├── klax.vim
├── Dockerfile
├── autogen.sh
├── .circleci
    └── config.yml
├── klex2flex.sh
├── .clang-format
├── .clang-tidy
├── cmdlineTests.sh
└── README.md


/.dockerignore:
--------------------------------------------------------------------------------
1 | /build
2 | 


--------------------------------------------------------------------------------
/examples/demo.flow:
--------------------------------------------------------------------------------
1 | handler main {
2 |   echo "hello";
3 | }
4 | 


--------------------------------------------------------------------------------
/test/good.klex:
--------------------------------------------------------------------------------
1 | # vim:syntax=klex
2 | 
3 | A ::= a
4 | B ::= b
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /.vscode
3 | /.vs
4 | /.cache
5 | compile_commands.json
6 | 


--------------------------------------------------------------------------------
/test/overshadowed.klex:
--------------------------------------------------------------------------------
1 | # vim:syntax=klex
2 | 
3 | Ident ::= [a-z]+
4 | If    ::= aa
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "fmtlib"]
2 | 	path = 3rdparty/fmt
3 | 	url = https://github.com/fmtlib/fmt.git
4 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [christianparpart]
4 | custom: ['https://paypal.me/ChristianParpart']
5 | 


--------------------------------------------------------------------------------
/test/multiple_conditions.klex:
--------------------------------------------------------------------------------
1 | # vim:syntax=klex
2 | 
3 | Main               ::= main
4 | <Cond>Cond         ::= cond
5 | <*>Spacing(ignore) ::= [\t\n\s]+
6 | 


--------------------------------------------------------------------------------
/klex.pc.cmake:
--------------------------------------------------------------------------------
1 | # klex library
2 | Name: klex
3 | Description: klex compiler frontend library
4 | Version: @klex_VERSION@
5 | # Requires:
6 | # Conflicts: 
7 | Libs: -L@CMAKE_INSTALL_PREFIX@/lib -lklex @LDFLAGS@
8 | Cflags: -I@CMAKE_INSTALL_PREFIX@/include @CXXFLAGS@
9 | 


--------------------------------------------------------------------------------
/src/klex/sysconfig.h.cmake:
--------------------------------------------------------------------------------
1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
3 | //
4 | // Licensed under the MIT License (the "License"); you may not use this
5 | // file except in compliance with the License. You may obtain a copy of
6 | // the License at: http://opensource.org/licenses/MIT
7 | 
8 | #pragma once
9 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 4
 6 | insert_final_newline = true
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | trim_trailing_whitespace = true
10 | 
11 | [*.md]
12 | indent_style = space
13 | indent_size = 2
14 | 
15 | [*.xml]
16 | indent_style = space
17 | indent_size = 2
18 | 
19 | [*.yml]
20 | indent_style = space
21 | indent_size = 4
22 | 
23 | [.github/**/*.yml]
24 | indent_style = space
25 | indent_size = 2
26 | 


--------------------------------------------------------------------------------
/cmake/ClangTidy.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | option(ENABLE_TIDY "Enable clang-tidy [default: OFF]" OFF)
 3 | if(ENABLE_TIDY)
 4 |     find_program(CLANG_TIDY_EXE
 5 |         NAMES clang-tidy-8 clang-tidy-7 clang-tidy-6.0 clang-tidy
 6 |         DOC "Path to clang-tidy executable")
 7 |     if(NOT CLANG_TIDY_EXE)
 8 |         message(STATUS "clang-tidy not found.")
 9 |     else()
10 |         message(STATUS "clang-tidy found: ${CLANG_TIDY_EXE}")
11 |         set(DO_CLANG_TIDY "${CLANG_TIDY_EXE}")
12 |     endif()
13 | endif()
14 | 


--------------------------------------------------------------------------------
/src/klex/klex_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2009-2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/util/testing.h>
 9 | 
10 | int main(int argc, const char* argv[])
11 | {
12 |     return klex::util::testing::main(argc, argv);
13 | }
14 | 


--------------------------------------------------------------------------------
/cmake/EnableCcache.cmake:
--------------------------------------------------------------------------------
 1 | # Setup ccache.
 2 | #
 3 | # The ccache is auto-enabled if the tool is found.
 4 | # To disable set -DCCACHE=OFF option.
 5 | if(NOT DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
 6 |     find_program(CCACHE ccache DOC "ccache tool path; set to OFF to disable")
 7 |     if(CCACHE)
 8 |         set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE})
 9 |         if(COMMAND cotire)
10 |             # Change ccache config to meet cotire requirements.
11 |             set(ENV{CCACHE_SLOPPINESS} pch_defines,time_macros)
12 |         endif()
13 |         message(STATUS "[ccache] Enabled: ${CCACHE}")
14 |     endif()
15 | endif()
16 | 


--------------------------------------------------------------------------------
/src/klex/regular/State_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/State.h>
 9 | #include <klex/util/testing.h>
10 | 
11 | #include <fmt/format.h>
12 | 
13 | TEST(regular_State, to_string)
14 | {
15 |     klex::regular::StateIdVec v { 1, 2, 3 };
16 |     EXPECT_EQ("{n1, n2, n3}", fmt::format("{}", v));
17 | }
18 | 


--------------------------------------------------------------------------------
/src/klex/cfg/Grammar-inl.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <algorithm>
 9 | 
10 | namespace klex::cfg {
11 | 
12 | inline bool _Symbols::empty() const noexcept
13 | {
14 | 	return begin() == end();
15 | }
16 | 
17 | inline size_t _Symbols::size() const noexcept
18 | {
19 | 	return std::distance(begin(), end());
20 | }
21 | 
22 | }  // namespace klex::cfg
23 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarValidator.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <klex/Report.h>
11 | 
12 | namespace klex::cfg {
13 | 
14 | struct Grammar;
15 | 
16 | class GrammarValidator {
17 |   public:
18 | 	GrammarValidator(Report* _report) : report_{_report} {}
19 | 
20 | 	void validate(const Grammar& G);
21 | 
22 |   private:
23 | 	Report* report_;
24 | };
25 | 
26 | }  // namespace klex::cfg
27 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | dist: trusty
 3 | 
 4 | compiler:
 5 |   - gcc
 6 | 
 7 | before_install:
 8 |   - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
 9 |   - sudo apt-get update -qq
10 | 
11 | install:
12 |   - sudo apt-get install -qqy g++-7
13 |   - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 90
14 |   - sudo apt install -qqy python-pip
15 |   - sudo pip install codecov
16 | 
17 | before_script:
18 |   - git submodule update --init --recursive
19 |   - mkdir build
20 |   - cd build
21 |   - cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DKLEX_COVERAGE=ON ..
22 | 
23 | script:
24 |   - cmake --build .
25 |   - ./klex_test -v
26 |   - ../cmdlineTests.sh
27 | 
28 | after_success:
29 |   - codecov --flags all --gcov-glob '*/src/klex/util/*'
30 | 


--------------------------------------------------------------------------------
/src/klex/util/overloaded.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | // This is a nice helper for conviniently using std::visit() with an arbitrary list of lambdas as
11 | // overload for pattern matching the variant's input type
12 | 
13 | template<class... Ts>
14 | struct overloaded : Ts...
15 | {
16 | 	using Ts::operator()...;
17 | };
18 | 
19 | template<class... Ts>
20 | overloaded(Ts...) -> overloaded<Ts...>;
21 | 
22 | 


--------------------------------------------------------------------------------
/src/klex/SourceLocation.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/SourceLocation.h>
 9 | 
10 | #include <fstream>
11 | 
12 | using namespace std;
13 | 
14 | namespace klex
15 | {
16 | 
17 | string SourceLocation::source() const // TODO
18 | {
19 |     string code;
20 |     ifstream ifs(filename);
21 |     ifs.seekg(offset, ifs.beg);
22 |     code.resize(count);
23 |     ifs.read(&code[0], count);
24 |     return code;
25 | }
26 | 
27 | } // namespace klex
28 | 


--------------------------------------------------------------------------------
/src/klex/regular/MultiDFA.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/DFA.h>
10 | #include <klex/regular/State.h>
11 | #include <klex/regular/Symbols.h>
12 | #include <map>
13 | #include <string>
14 | 
15 | namespace klex::regular {
16 | 
17 | struct MultiDFA {
18 | 	using InitialStateMap = std::map<std::string, StateId>;
19 | 
20 | 	InitialStateMap initialStates;
21 | 	DFA dfa;
22 | };
23 | 
24 | MultiDFA constructMultiDFA(std::map<std::string, DFA> many);
25 | 
26 | }  // namespace klex::regular
27 | 


--------------------------------------------------------------------------------
/cmake/mklex.cmake:
--------------------------------------------------------------------------------
 1 | # mklex cmake integration
 2 | 
 3 | function(klex_generate_cpp KLEX_FILE TOKEN_FILE TABLE_FILE)
 4 |   set(${TABLE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${KLEX_FILE}.table.cc")
 5 |   set(${TABLE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${KLEX_FILE}.table.cc" PARENT_SCOPE)
 6 |   set(dot_file "${CMAKE_CURRENT_BINARY_DIR}/${KLEX_FILE}.dot")
 7 |   set(klex_file "${CMAKE_CURRENT_SOURCE_DIR}/${KLEX_FILE}")
 8 | 
 9 |   add_custom_command(
10 |       OUTPUT "${TOKEN_FILE}" "${${TABLE_FILE}}"
11 |       COMMAND mklex -f "${klex_file}" -t "${${TABLE_FILE}}" -T "${TOKEN_FILE}" -x "${dot_file}" -p
12 |       DEPENDS mklex ${klex_file}
13 |       COMMENT "Generating lexer table and tokens for ${KLEX_FILE}"
14 |       VERBATIM)
15 |   set_source_files_properties(${TOKEN_FILE} PROPERTIES GENERATED TRUE)
16 |   set_source_files_properties(${${TABLE_FILE}} PROPERTIES GENERATED TRUE)
17 | endfunction()
18 | 
19 | 


--------------------------------------------------------------------------------
/src/klex/regular/DotVisitor.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <klex/regular/State.h>
11 | #include <string_view>
12 | 
13 | namespace klex::regular {
14 | 
15 | class DotVisitor {
16 |   public:
17 | 	virtual ~DotVisitor() {}
18 | 
19 | 	virtual void start(StateId initialState) = 0;
20 | 	virtual void visitNode(StateId number, bool start, bool accept) = 0;
21 | 	virtual void visitEdge(StateId from, StateId to, Symbol s) = 0;
22 | 	virtual void endVisitEdge(StateId from, StateId to) = 0;
23 | 	virtual void end() = 0;
24 | };
25 | 
26 | }  // namespace klex::regular
27 | 


--------------------------------------------------------------------------------
/src/klex/util/IntVector.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | /**
 4 |  * Encapsulates std::vector<INT> with speed improvements.
 5 |  *
 6 |  */
 7 | template<typename T>
 8 | class IntVector {
 9 |  public:
10 |   using value_type = T;
11 |   using vector = std::vector<T>;
12 |   using iterator = Vector::iterator;
13 |   using const_iterator = Vector::const_iterator;
14 | 
15 |   IntVector() : vector_{}, hash_{2166136261llu} {}
16 | 
17 |   void clear() {
18 |     vector_.clear();
19 |     hash_ = 2166136261llu;
20 |   }
21 | 
22 |   void push_back(T v) {
23 |     vector_.push_back(v);
24 | 
25 |     hash_ ^= v;
26 |     hash_ *= 16777619llu;
27 |   }
28 | 
29 |   bool operator==(const IntVector& rhs) const noexcept {
30 |     return hash_ == rhs.hash_ && vector_ == rhs.vector_;
31 |   }
32 | 
33 |   bool operator!=(const IntVector& rhs) const noexcept {
34 |     return !(*this == rhs);
35 |   }
36 | 
37 |  private:
38 |   Vector vector_;
39 |   unsigned hash_;
40 | };
41 | 


--------------------------------------------------------------------------------
/src/klex/regular/State.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/State.h>
 9 | 
10 | #include <sstream>
11 | 
12 | using namespace std;
13 | 
14 | namespace klex::regular
15 | {
16 | 
17 | string to_string(const StateIdVec& S, string_view stateLabelPrefix)
18 | {
19 |     StateIdVec names = S;
20 |     sort(names.begin(), names.end());
21 | 
22 |     stringstream sstr;
23 |     sstr << "{";
24 |     int i = 0;
25 |     for (StateId name: names)
26 |     {
27 |         if (i)
28 |             sstr << ", ";
29 |         sstr << stateLabelPrefix << name;
30 |         i++;
31 |     }
32 |     sstr << "}";
33 | 
34 |     return sstr.str();
35 | }
36 | 
37 | } // namespace klex::regular
38 | 


--------------------------------------------------------------------------------
/klex.vim:
--------------------------------------------------------------------------------
 1 | " klex syntax highlighting
 2 | "
 3 | 
 4 | " quit when a syntax file was already loaded
 5 | if exists("b:current_syntax")
 6 |   finish
 7 | endif
 8 | 
 9 | " # comment LF
10 | " RuleName(option) ::= PATTERN
11 | 
12 | " Options Section
13 | syn keyword klexTodo contained TODO FIXME XXX NOTE BUG
14 | syn match klexComment "#.*$" contains=klexTodo
15 | syn match klexOptions '^%\s*pragma\>.*$'
16 | syn match klexRuleName '^\s*\(<[a-zA-Z,]\+>\)\?[a-zA-Z_][a-zA-Z0-9_]*'
17 | syn match klexOperator "(\|)\||"
18 | syn match klexAssign "::="
19 | syn match klexRulePattern /\".*\"/
20 | syn match lexEof "<<EOF>>"
21 | 
22 | " The default highlighting.
23 | hi def link klexComment       Comment
24 | hi def link klexOperator      Operator
25 | hi def link klexAssign        Operator
26 | hi def link klexTodo          Todo
27 | hi def link klexRuleName      Function
28 | hi def link klexRulePattern   Constant
29 | hi def link klexOptions       PreProc
30 | hi def link klexEof           Special
31 | 
32 | let b:current_syntax = "klex"
33 | 
34 | " vim:ts=10
35 | 


--------------------------------------------------------------------------------
/src/klex/regular/MultiDFA.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/MultiDFA.h>
 9 | 
10 | using namespace std;
11 | 
12 | namespace klex::regular
13 | {
14 | 
15 | MultiDFA constructMultiDFA(map<string, DFA> many)
16 | {
17 |     MultiDFA multiDFA {};
18 |     multiDFA.dfa.createStates(1 + many.size());
19 |     multiDFA.dfa.setInitialState(0);
20 | 
21 |     StateId q0 = 1;
22 |     for (pair<const string, DFA>& p: many)
23 |     {
24 |         multiDFA.dfa.append(move(p.second), q0);
25 |         multiDFA.initialStates[p.first] = q0;
26 |         multiDFA.dfa.setTransition(0, static_cast<Symbol>(q0), q0);
27 |         q0++;
28 |     }
29 | 
30 |     return multiDFA;
31 | }
32 | 
33 | } // namespace klex::regular
34 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | # REG
 2 | 
 3 | - ignore whitespaces in REGEX rules
 4 | - `LookaheadLexer<const size_t N=1>`
 5 | 
 6 | # CFG
 7 | 
 8 | - klex::LeftFactoring
 9 | 	Rewrites rules to eliminate common prefixes in order to reduce lookahead from k>1 to k=1
10 | - basic actions
11 | 
12 | # Incomplete TODO items: Lexer
13 | 
14 | - [ ] proper file offset reporting
15 | - [ ] distinguish between Token ID, TokenTraits, and Token class
16 | 
17 | # Incomplete TODO list
18 | 
19 | - [ ] cfg::ll::SyntaxTable::dump() MUST NOT depend on Grammar
20 | - [ ] left-recursion-elimination (direct)
21 |   - call it: struct LeftToRightRecursion {}; that can idealy be used with std::transform()
22 |   - first all left-recursive rules need to be collected
23 | - [ ] left-recursion-elimination (indirect)
24 | - [ ] Analyzer production matching hooks (check ANTLR)
25 | 
26 | ### left-recursion
27 | 
28 | ```
29 | # left
30 | A  ::= A b
31 |      | b;
32 | 
33 | # right
34 | A  ::= A' b;
35 | A' ::= b A';
36 |      | ;
37 | 
38 | # LEFT
39 | Expr ::= Expr '+' Term
40 |        | Expr '-' Term
41 | 	   | Term;
42 | 
43 | Expr ::= Term Expr';
44 | Expr' ::= '+' Expr'
45 |        | '-' Expr';
46 | ```
47 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarValidator.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/cfg/Grammar.h>
 9 | #include <klex/cfg/GrammarValidator.h>
10 | 
11 | #include <variant>
12 | 
13 | using namespace std;
14 | using namespace klex;
15 | using namespace klex::cfg;
16 | 
17 | void GrammarValidator::validate(const Grammar& G)
18 | {
19 |     for (const Production& p: G.productions)
20 |         for (const Symbol b: symbols(p.handle))
21 |             if (holds_alternative<NonTerminal>(b))
22 |                 if (!G.containsProduction(get<NonTerminal>(b)))
23 |                     report_->typeError(SourceLocation { /*TODO: b.location()*/ },
24 |                                        "Non-terminal {} is missing a production rule.",
25 |                                        b);
26 | 
27 |     // TODO: check for unwanted infinite recursions
28 |     // such as: E ::= E
29 | }
30 | 


--------------------------------------------------------------------------------
/src/klex/regular/DFABuilder_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/Compiler.h>
 9 | #include <klex/regular/DFA.h>
10 | #include <klex/regular/DFABuilder.h>
11 | #include <klex/regular/MultiDFA.h>
12 | #include <klex/util/testing.h>
13 | 
14 | #include <memory>
15 | #include <sstream>
16 | 
17 | using namespace klex::regular;
18 | 
19 | TEST(regular_DFABuilder, shadowing)
20 | {
21 |     Compiler cc;
22 |     cc.parse(std::make_unique<std::stringstream>(R"(
23 |     Identifier  ::= [a-z][a-z0-9]*
24 |     TrueLiteral ::= "true"
25 |   )"));
26 |     // rule 2 is overshadowed by rule 1
27 |     Compiler::OvershadowMap overshadows;
28 |     DFA dfa = cc.compileDFA(&overshadows);
29 |     ASSERT_EQ(1, overshadows.size());
30 |     EXPECT_EQ(2, overshadows[0].first);  // overshadowee
31 |     EXPECT_EQ(1, overshadows[0].second); // overshadower
32 | }
33 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: '{build}'
 2 | branches:
 3 |   only:
 4 |   - master
 5 | clone_folder: c:\projects\klex
 6 | image:
 7 | - Visual Studio 2017
 8 | configuration:
 9 | - Release
10 | - Debug
11 | platform:
12 | - x64
13 | - x86
14 | 
15 | matrix:
16 |   fast_finish: true
17 | 
18 | # skip unsupported combinations
19 | init:
20 | - set arch=
21 | - if "%PLATFORM%"=="x64" (set arch= Win64)
22 | - echo %arch%
23 | - echo %APPVEYOR_BUILD_WORKER_IMAGE%
24 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" ( set generator="Visual Studio 15 2017%arch%" )
25 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2015" ( set generator="Visual Studio 14 2015%arch%" )
26 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2013" ( set generator="Visual Studio 12 2013%arch%" )
27 | - echo %generator%
28 | 
29 | build_script:
30 |   - git submodule update --init --recursive
31 |   - mkdir build
32 |   - cd build
33 |   - echo %generator%
34 |   - echo %CONFIGURATION%
35 |   - cmake -G %generator% --config %CONFIGURATION% ..
36 |   - cmake --build . --config %CONFIGURATION%
37 | 
38 | test_script:
39 |   - .\%CONFIGURATION%\klex_test.exe
40 | 
41 | only_commits:
42 |   files:
43 |     - CMakeLists.txt
44 |     - appveyor.yml
45 |     - src/
46 |     - examples/
47 |     - docs/
48 |     - cmake/
49 |     - 3rdparty/
50 | 


--------------------------------------------------------------------------------
/klax.vim:
--------------------------------------------------------------------------------
 1 | " klax syntax highlighting
 2 | "
 3 | 
 4 | " quit when a syntax file was already loaded
 5 | if exists("b:current_syntax")
 6 |   finish
 7 | endif
 8 | 
 9 | " # comment LF
10 | " RuleName(option) ::= PATTERN
11 | 
12 | syn match klaxSpecial display contained "\\\(t\|v\|r\|n\|s\)\||\|\[\|\]\|\.\|+\|*\|?\|(\|)"
13 | "syn region klaxString start=+L\="+ skip=+\\\\\|\\"+ end=+"+ contains=klaxSpecial
14 | syn region klaxString start=/\v"/ skip=/\v\\./ end=/\v"/ contains=klaxSpecial
15 | syn region klaxRawString start="'" end="'"
16 | 
17 | " Options Section
18 | syn keyword klaxTodo contained TODO FIXME XXX NOTE BUG
19 | syn match klaxComment "#.*$" contains=klaxTodo
20 | syn match klaxOptions '^%\s*pragma\>.*$'
21 | syn match klaxRuleName '^\s*\(<[a-zA-Z,]\+>\)\?[a-zA-Z_][a-zA-Z0-9_]*'
22 | syn match klaxOperator "(\|)\||"
23 | syn match klaxAssign "::="
24 | syn match lexEof "<<EOF>>"
25 | 
26 | " The default highlighting.
27 | hi def link klaxComment       Comment
28 | hi def link klaxOperator      Operator
29 | hi def link klaxAssign        Operator
30 | hi def link klaxTodo          Todo
31 | hi def link klaxRuleName      Function
32 | hi def link klaxOptions       PreProc
33 | hi def link klaxEof           Special
34 | hi def link klaxString        String
35 | hi def link klaxRawString     String
36 | hi def link klaxSpecial       Special
37 | 
38 | let b:current_syntax = "klax"
39 | 
40 | " vim:ts=10
41 | 
42 | 


--------------------------------------------------------------------------------
/src/klex/SourceLocation.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <string>
11 | 
12 | namespace klex {
13 | 
14 | struct SourceLocation {
15 | 	std::string filename;
16 | 	size_t offset;
17 | 	size_t count;
18 | 
19 | 	[[nodiscard]] long long int compare(const SourceLocation& other) const noexcept
20 | 	{
21 | 		if (filename == other.filename)
22 | 			return (long) offset - (long) other.offset;
23 | 		else if (filename < other.filename)
24 | 			return -1;
25 | 		else
26 | 			return 1;
27 | 	}
28 | 
29 | 	[[nodiscard]] std::string source() const;
30 | 
31 | 	bool operator==(const SourceLocation& other) const noexcept { return compare(other) == 0; }
32 | 	bool operator<=(const SourceLocation& other) const noexcept { return compare(other) <= 0; }
33 | 	bool operator>=(const SourceLocation& other) const noexcept { return compare(other) >= 0; }
34 | 	bool operator<(const SourceLocation& other) const noexcept { return compare(other) < 0; }
35 | 	bool operator>(const SourceLocation& other) const noexcept { return compare(other) > 0; }
36 | };
37 | 
38 | }  // namespace klex
39 | 


--------------------------------------------------------------------------------
/examples/flowlexer.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/Lexable.h>
 9 | 
10 | #include <fmt/format.h>
11 | 
12 | #include <fstream>
13 | #include <iostream>
14 | 
15 | #include "token.h" // generated via mklex
16 | 
17 | extern klex::regular::LexerDef lexerDef; // generated via mklex
18 | 
19 | int main(int argc, const char* argv[])
20 | {
21 |     auto ls = argc == 2 ? klex::regular::Lexable<Token, Machine> { lexerDef,
22 |                                                                    std::make_unique<std::ifstream>(argv[1]) }
23 |                         : klex::regular::Lexable<Token, Machine> { lexerDef, std::cin };
24 | 
25 |     for (const auto& token: ls)
26 |     {
27 |         std::cerr << fmt::format("[{}-{}]: token {} (\"{}\")\n",
28 |                                  token.offset,
29 |                                  token.offset + token.literal.length(),
30 |                                  lexerDef.tagName(static_cast<klex::regular::Tag>(token.token)),
31 |                                  token.literal);
32 |     }
33 | 
34 |     return EXIT_SUCCESS;
35 | }
36 | 


--------------------------------------------------------------------------------
/src/klex/regular/State.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/Symbols.h>
10 | 
11 | #include <fmt/format.h>
12 | 
13 | #include <map>
14 | #include <memory>
15 | #include <string>
16 | #include <string_view>
17 | #include <unordered_map>
18 | #include <vector>
19 | 
20 | namespace klex::regular {
21 | 
22 | using Tag = int;
23 | using StateId = size_t;
24 | using StateIdVec = std::vector<StateId>;
25 | 
26 | using AcceptMap = std::map<StateId, Tag>;
27 | 
28 | /**
29 |  * Returns a human readable string of @p S, such as "{n0, n1, n2}".
30 |  */
31 | std::string to_string(const StateIdVec& S, std::string_view stateLabelPrefix = "n");
32 | 
33 | }  // namespace klex::regular
34 | 
35 | namespace fmt {
36 | template <>
37 | struct formatter<klex::regular::StateIdVec> {
38 | 	template <typename ParseContext>
39 | 	constexpr auto parse(ParseContext& ctx)
40 | 	{
41 | 		return ctx.begin();
42 | 	}
43 | 
44 | 	template <typename FormatContext>
45 | 	constexpr auto format(const klex::regular::StateIdVec& v, FormatContext& ctx)
46 | 	{
47 | 		return format_to(ctx.out(), "{}", klex::regular::to_string(v));
48 | 	}
49 | };
50 | }  // namespace fmt
51 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04 AS build
 2 | MAINTAINER Christian Parpart <christian@parpart.family>
 3 | 
 4 | RUN apt-get -qqy update
 5 | RUN apt-get install -qqy cmake make g++-7
 6 | 
 7 | WORKDIR /app/src
 8 | 
 9 | COPY /3rdparty /app/src/3rdparty
10 | COPY /cmake /app/src/cmake
11 | COPY /src /app/src/src
12 | COPY /CMakeLists.txt $WORKDIR
13 | RUN ls -hlaF
14 | 
15 | ARG BUILD_CONCURRENCY="0"
16 | 
17 | RUN cmake -DCMAKE_BUILD_TYPE=Release \
18 |           -DKLEX_EXAMPLES=OFF \
19 |           -DKLEX_TESTS=OFF \
20 |           -DMKLEX_LINK_STATIC=ON \
21 |           -DCMAKE_CXX_COMPILER=g++-7 \
22 |           $WORKDIR
23 | 
24 | RUN make \
25 |     -j$(awk "BEGIN {                                       \
26 |         if (${BUILD_CONCURRENCY} != 0) {                   \
27 |             print(${BUILD_CONCURRENCY});                   \
28 |         } else {                                           \
29 |             x=($(grep -c ^processor /proc/cpuinfo) * 2/3); \
30 |             if (x > 1) {                                   \
31 |                 printf(\"%d\n\", x);                       \
32 |             } else {                                       \
33 |                 print(1);                                  \
34 |             }                                              \
35 |         }                                                  \
36 |     }")
37 | 
38 | RUN strip mklex
39 | 
40 | FROM scratch
41 | COPY --from=build /app/src/mklex /usr/bin/mklex
42 | ENTRYPOINT ["/usr/bin/mklex"]
43 | CMD ["--help"]
44 | 


--------------------------------------------------------------------------------
/src/klex/regular/TransitionMap-inl.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/State.h>
 9 | #include <klex/regular/TransitionMap.h>
10 | #include <algorithm>
11 | 
12 | namespace klex::regular {
13 | 
14 | inline void TransitionMap::define(StateId currentState, Symbol charCat, StateId nextState)
15 | {
16 | 	mapping_[currentState][charCat] = nextState;
17 | }
18 | 
19 | inline StateId TransitionMap::apply(StateId currentState, Symbol charCat) const
20 | {
21 | 	if (auto i = mapping_.find(currentState); i != mapping_.end())
22 | 		if (auto k = i->second.find(charCat); k != i->second.end())
23 | 			return k->second;
24 | 
25 | 	return ErrorState;
26 | }
27 | 
28 | inline std::vector<StateId> TransitionMap::states() const
29 | {
30 | 	std::vector<StateId> v;
31 | 	v.reserve(mapping_.size());
32 | 	for (const auto& i : mapping_)
33 | 		v.push_back(i.first);
34 | 	std::sort(v.begin(), v.end());
35 | 	return v;
36 | }
37 | 
38 | inline std::map<Symbol, StateId> TransitionMap::map(StateId s) const
39 | {
40 | 	std::map<Symbol, StateId> m;
41 | 	if (auto mapping = mapping_.find(s); mapping != mapping_.end())
42 | 		for (const auto& i : mapping->second)
43 | 			m[i.first] = i.second;
44 | 	return m;
45 | }
46 | 
47 | }  // namespace klex::regular
48 | 


--------------------------------------------------------------------------------
/src/klex/regular/Alphabet.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/Alphabet.h>
 9 | #include <klex/regular/Symbols.h>
10 | 
11 | #include <iomanip>
12 | #include <iostream>
13 | #include <sstream>
14 | 
15 | using namespace std;
16 | 
17 | namespace klex::regular
18 | {
19 | 
20 | #if 0
21 |     #define DEBUG(msg, ...)                                \
22 |         do                                                 \
23 |         {                                                  \
24 |             cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
25 |         } while (0)
26 | #else
27 |     #define DEBUG(msg, ...) \
28 |         do                  \
29 |         {                   \
30 |         } while (0)
31 | #endif
32 | 
33 | void Alphabet::insert(Symbol ch)
34 | {
35 |     if (alphabet_.find(ch) == alphabet_.end())
36 |     {
37 |         DEBUG("Alphabet: insert '{:}'", prettySymbol(ch));
38 |         alphabet_.insert(ch);
39 |     }
40 | }
41 | 
42 | string Alphabet::to_string() const
43 | {
44 |     stringstream sstr;
45 | 
46 |     sstr << '{';
47 | 
48 |     for (Symbol c: alphabet_)
49 |         sstr << prettySymbol(c);
50 | 
51 |     sstr << '}';
52 | 
53 |     return sstr.str();
54 | }
55 | 
56 | } // namespace klex::regular
57 | 


--------------------------------------------------------------------------------
/src/klex/regular/Alphabet.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/Symbols.h>
10 | #include <fmt/format.h>
11 | #include <set>
12 | #include <string>
13 | 
14 | namespace klex::regular {
15 | 
16 | /**
17 |  * Represents the alphabet of a finite automaton or regular expression.
18 |  */
19 | class Alphabet {
20 |   public:
21 | 	using set_type = std::set<Symbol>;
22 | 	using iterator = set_type::iterator;
23 | 
24 | 	size_t size() const noexcept { return alphabet_.size(); }
25 | 
26 | 	void insert(Symbol ch);
27 | 
28 | 	std::string to_string() const;
29 | 
30 | 	const iterator begin() const { return alphabet_.begin(); }
31 | 	const iterator end() const { return alphabet_.end(); }
32 | 
33 |   private:
34 | 	set_type alphabet_;
35 | };
36 | 
37 | }  // namespace klex::regular
38 | 
39 | namespace fmt {
40 | template <>
41 | struct formatter<klex::regular::Alphabet> {
42 | 	template <typename ParseContext>
43 | 	constexpr auto parse(ParseContext& ctx)
44 | 	{
45 | 		return ctx.begin();
46 | 	}
47 | 
48 | 	template <typename FormatContext>
49 | 	constexpr auto format(const klex::regular::Alphabet& v, FormatContext& ctx)
50 | 	{
51 | 		return format_to(ctx.out(), "{}", v.to_string());
52 | 	}
53 | };
54 | }  // namespace fmt
55 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarParser.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <klex/cfg/Grammar.h>
11 | #include <klex/cfg/GrammarLexer.h>
12 | #include <fmt/format.h>
13 | #include <iostream>
14 | 
15 | namespace klex {
16 | class Report;
17 | }
18 | 
19 | namespace klex::cfg {
20 | 
21 | /**
22 |  * Parses a context-free-grammar specification.
23 |  */
24 | class GrammarParser
25 | {
26 |   public:
27 | 	GrammarParser(GrammarLexer&& lexer, Report* report);
28 | 	GrammarParser(std::string source, Report* report);
29 | 
30 | 	Grammar parse();
31 | 	void parseRule();
32 | 	Handle parseHandle();
33 | 
34 |   private:
35 | 	using Token = GrammarLexer::Token;
36 | 
37 | 	void parseTokenBlock();
38 | 
39 | 	[[nodiscard]] const std::string& currentLiteral() const noexcept { return lexer_.currentLiteral(); }
40 | 	[[nodiscard]] Token currentToken() const noexcept { return lexer_.currentToken(); }
41 | 	void consumeToken();
42 | 	void consumeToken(Token expectedToken);
43 | 
44 | 	[[nodiscard]] std::optional<const regular::Rule*> findExplicitTerminal(const std::string& terminalName) const;
45 | 
46 |   private:
47 | 	Report* report_;
48 | 	GrammarLexer lexer_;
49 | 	Grammar grammar_;
50 | };
51 | 
52 | }  // namespace klex::cfg
53 | 
54 | // vim:ts=4:sw=4:noet
55 | 


--------------------------------------------------------------------------------
/examples/cxx.klex:
--------------------------------------------------------------------------------
 1 | # vim:syntax=klex
 2 | 
 3 | # keywords
 4 | If              ::= "if"
 5 | Else            ::= "else"
 6 | While           ::= "while"
 7 | Do              ::= "do"
 8 | 
 9 | # builtin types
10 | Void            ::= "void"
11 | Int             ::= "int"
12 | Signed          ::= "signed"
13 | Unsigned        ::= "unsigned"
14 | 
15 | Auto            ::= "auto"
16 | Const           ::= "const"
17 | ConstExpr       ::= "constexpr"
18 | 
19 | # symbols
20 | CurlyOpen       ::= "{"
21 | CurlyClose      ::= "}"
22 | RndOpen         ::= "("
23 | RndClose        ::= ")"
24 | BrOpen          ::= "["
25 | BrClose         ::= "]"
26 | Assign          ::= "="
27 | Not             ::= "!"
28 | NotEqual        ::= "!="
29 | Equal           ::= "=="
30 | Less            ::= "<"
31 | Greater         ::= ">"
32 | LessEqu         ::= "<="
33 | GreaterEqu      ::= ">="
34 | Shl             ::= "<<"
35 | Shr             ::= ">>"
36 | Plus            ::= "+"
37 | Minus           ::= "-"
38 | Mul             ::= "*"
39 | Div             ::= "/"
40 | PlusPlus        ::= "++"
41 | MinusMinus      ::= "--"
42 | PlusAssign      ::= "+="
43 | MinusAssign     ::= "-="
44 | MulAssign       ::= "*="
45 | DivAssign       ::= "/="
46 | Modulo          ::= "%"
47 | ModuloAssign    ::= "%="
48 | 
49 | # Misc
50 | Spacing(ignore)     ::= [\s\t\n]+
51 | CxxComment(ignore)  ::= "//"[^$]*
52 | CComment(ignore)    ::= "/*".*"*/"
53 | Identifier          ::= [a-zA-Z_][a-zA-Z_0-9]*
54 | NumberLiteral       ::= [0-9]|0x[0-9a-fA-F]+
55 | Eof                 ::= <<EOF>>
56 | StringLiteral       ::= \"([^\"\n]|\\\")*\"
57 | CharLiteral         ::= '(.)'
58 | 


--------------------------------------------------------------------------------
/src/klex/cfg/LeftRecursion.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <klex/cfg/Grammar.h>
11 | #include <list>
12 | #include <utility>
13 | #include <vector>
14 | 
15 | namespace klex::cfg {
16 | 
17 | /**
18 |  * Eliminates left-recursion by rewriting a Grammar into an equivalent right-recursion grammar.
19 |  *
20 |  * @note This transformation is required for LL parsers.
21 |  */
22 | class LeftRecursion {
23 |   public:
24 | 	explicit LeftRecursion(Grammar& _grammar);
25 | 
26 | 	static bool isLeftRecursive(const Grammar& grammar);
27 | 
28 | 	void direct();
29 | 	void indirect();
30 | 
31 |   private:
32 | 	std::list<Production*> select(const NonTerminal& lhs, const NonTerminal& first);
33 | 	void eliminateDirect(const NonTerminal& nt);
34 | 
35 | 	/**
36 | 	 * Creates a unique nonterminal symbol that by name relates to @p nt.
37 | 	 */
38 | 	[[nodiscard]] NonTerminal createRelatedNonTerminal(const NonTerminal& nt) const;
39 | 
40 | 	/**
41 | 	 * Splits all productions of the same nonterminal into a vector of left-recursives and the rest.
42 | 	 */
43 | 	[[nodiscard]] std::pair<std::vector<Production*>, std::vector<Production*>> split(std::vector<Production*> productions) const;
44 | 
45 |   private:
46 | 	Grammar& grammar_;
47 | };
48 | 
49 | }  // namespace klex::cfg
50 | 


--------------------------------------------------------------------------------
/examples/wordcount.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/Compiler.h>
 9 | #include <klex/regular/Lexable.h>
10 | 
11 | #include <cstdlib>
12 | #include <iostream>
13 | #include <memory>
14 | #include <sstream>
15 | #include <string>
16 | 
17 | int main(int argc, const char* argv[])
18 | {
19 |     klex::regular::Compiler cc;
20 |     cc.parse(R"(
21 | 		Word  ::= [a-zA-Z]+
22 | 		LF    ::= \n
23 | 		Other ::= .
24 | 		Eof   ::= <<EOF>>
25 | 	)");
26 | 
27 |     size_t words = 0;
28 |     size_t chars = 0;
29 |     size_t lines = 0;
30 | 
31 |     auto ld = cc.compile();
32 |     klex::regular::Lexable<int, int, false, false> lexer { ld, std::cin };
33 |     for (const auto& ti: lexer)
34 |     {
35 |         switch (token(ti))
36 |         {
37 |             case 4: // EOF
38 |                 break;
39 |             case 3: // Other
40 |                 chars++;
41 |                 break;
42 |             case 2: // LF
43 |                 chars++;
44 |                 lines++;
45 |                 break;
46 |             case 1: // Word
47 |                 words++;
48 |                 chars += literal(ti).size();
49 |                 break;
50 |         }
51 |     }
52 | 
53 |     std::cout << "newlines: " << lines << ", words: " << words << ", characters: " << chars << "\n";
54 | 
55 |     return EXIT_SUCCESS;
56 | }
57 | 


--------------------------------------------------------------------------------
/src/klex/util/literals.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0>
 2 | //   (c) 2009-2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <cstdint>
11 | #include <sstream>
12 | #include <string>
13 | 
14 | namespace klex::util::literals {
15 | 
16 | /**
17 |  * Strips a multiline string's indentation prefix.
18 |  *
19 |  * Example:
20 |  * \code
21 |  * string s = R"(|line one
22 |  *               |line two
23 |  *               |line three
24 |  *               )"_multiline;
25 |  * fmt::print(s);
26 |  * \endcode
27 |  *
28 |  * This prints three lines: @c "line one\nline two\nline three\n"
29 |  */
30 | inline std::string operator""_multiline(const char* text, size_t size)
31 | {
32 | 	if (!*text)
33 | 		return {};
34 | 
35 | 	enum class State {
36 | 		LineData,
37 | 		SkipUntilPrefix,
38 | 	};
39 | 
40 | 	constexpr char LF = '\n';
41 | 	State state = State::LineData;
42 | 	std::stringstream sstr;
43 | 	char sep = *text++;
44 | 
45 | 	while (*text)
46 | 	{
47 | 		switch (state)
48 | 		{
49 | 			case State::LineData:
50 | 				if (*text == LF)
51 | 				{
52 | 					state = State::SkipUntilPrefix;
53 | 					sstr << *text++;
54 | 				}
55 | 				else
56 | 					sstr << *text++;
57 | 				break;
58 | 			case State::SkipUntilPrefix:
59 | 				if (*text == sep)
60 | 				{
61 | 					state = State::LineData;
62 | 					text++;
63 | 				}
64 | 				else
65 | 					text++;
66 | 				break;
67 | 		}
68 | 	}
69 | 
70 | 	return sstr.str();
71 | }
72 | 
73 | }  // namespace klex::util::literals
74 | 


--------------------------------------------------------------------------------
/examples/flow.klax:
--------------------------------------------------------------------------------
 1 | # vim:syntax=klax
 2 | 
 3 | token {
 4 |   # explicit token definitions, in klex lexer format:
 5 |   Space(ignore) ::= [\s\t\n]+
 6 |   IDENT         ::= [a-zA-Z_][a-zA-Z0-9_]*
 7 | }
 8 | 
 9 | Start       ::= FlowProgram
10 | FlowProgram ::= HandlerDef*
11 | HandlerDef  ::= 'handler' IDENT BlockStmt
12 | VarDecl     ::= 'var' IDENT '=' Expr ';'
13 | 
14 | # statements
15 | Stmt        ::= IfStmt | WhileStmt | AssignStmt | CallStmt | EmptyStmt
16 | IfStmt      ::= 'if' Expr BlockStmt
17 |               | 'if' Expr BlockStmt 'else' BlockStmt
18 | WhileStmt   ::= 'while' Expr BlockStmt
19 | AssignStmt  ::= IDENT '=' Expr ';'
20 | BlockStmt   ::= '{' VarDecl* Stmt '}'
21 | CallStmt    ::= IDENT ';'
22 |               | IDENT CallArgs ';'
23 |               | IDENT '(' CallArgs ')' ';'
24 | CallArgs    ::= Expr (',' Expr)*
25 |               | NamedArg (',' NamedArg)*
26 | NamedArg    ::= IDENT ':' Expr
27 | 
28 | EmptyStmt   ::= ';'
29 | 
30 | # expressions
31 | Expr        ::= LogicExpr
32 | LogicExpr   ::= NotExpr 'and' NotExpr
33 |               | NotExpr 'xor' NotExpr
34 |               | NotExpr 'or' NotExpr
35 | NotExpr     ::= '?' NotExpr
36 |               | RelExpr
37 | RelExpr     ::= AddExpr _RelOp AddExpr
38 |               | AddExpr
39 | _RelOp      ::= '==' | '!=' | '<=' | '>=' | '<' | '>' | '=~' | '=^' | '=$' | 'in'
40 | 
41 | # TODO the parser must automatically rewrite the rule
42 | AddExpr     ::= MulExpr ('*' MulExpr)*
43 | 
44 | MulExpr     ::= BitNotExpr ('*' BitNotExpr)+
45 |               | BitNotExpr ('/' BitNotExpr)+
46 |               | BitNotExpr
47 | BitNotExpr  ::=
48 | NegExpr     ::=
49 | PrimaryExpr ::=
50 | LiteralExpr ::= Number | IPv4
51 | CastExpr    ::= TypeName '(' Expr ')'
52 | TypeName    ::= 'bool' | 'int' | 'string'
53 | 


--------------------------------------------------------------------------------
/src/klex/regular/NFABuilder.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/Alphabet.h>
10 | #include <klex/regular/NFA.h>
11 | #include <klex/regular/RegExpr.h>
12 | 
13 | #include <fmt/format.h>
14 | #include <list>
15 | #include <map>
16 | #include <memory>
17 | #include <set>
18 | #include <string_view>
19 | #include <tuple>
20 | #include <vector>
21 | 
22 | namespace klex::regular {
23 | 
24 | class DFA;
25 | 
26 | /*!
27 |  * Generates a finite automaton from the given input (a regular expression).
28 |  */
29 | class NFABuilder {
30 |   public:
31 | 	explicit NFABuilder() : fa_{} {}
32 | 
33 | 	NFA construct(const RegExpr& re, Tag tag);
34 | 	NFA construct(const RegExpr& re);
35 | 	void operator()(const LookAheadExpr& lookaheadExpr);
36 | 	void operator()(const ConcatenationExpr& concatenationExpr);
37 | 	void operator()(const AlternationExpr& alternationExpr);
38 | 	void operator()(const CharacterExpr& characterExpr);
39 | 	void operator()(const CharacterClassExpr& characterClassExpr);
40 | 	void operator()(const ClosureExpr& closureExpr);
41 | 	void operator()(const BeginOfLineExpr& bolExpr);
42 | 	void operator()(const EndOfLineExpr& eolExpr);
43 | 	void operator()(const EndOfFileExpr& eofExpr);
44 | 	void operator()(const DotExpr& dotExpr);
45 | 	void operator()(const EmptyExpr& emptyExpr);
46 | 
47 |   private:
48 | 	NFA fa_;
49 | 	std::optional<StateId> acceptState_;
50 | };
51 | 
52 | }  // namespace klex::regular
53 | 


--------------------------------------------------------------------------------
/autogen.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | BUILDDIR=`pwd`
 6 | ROOT=`dirname $0`
 7 | 
 8 | FILES=(
 9 |   /ar-lib
10 |   /aclocal.m4
11 |   /compile
12 |   /autom4te.cache
13 |   /configure
14 |   /install-sh
15 |   /missing
16 |   /depcomp
17 | )
18 | 
19 | if test "$1" == "clean"; then
20 |   find ${ROOT} -name Makefile.in -exec rm {} \;
21 |   for file in ${FILES[*]}; do rm -vrf "${ROOT}${file}"; done
22 |   exit 0
23 | fi
24 | 
25 | findexe() {
26 |   for exe in ${@}; do
27 |     if which $exe 2>/dev/null; then
28 |       return
29 |     fi
30 |   done
31 |   echo $1
32 | }
33 | 
34 | # Mac OSX has a special location for more recent LLVM/clang installations
35 | #   $ brew tap homebrew/versions
36 | #   $ brew install llvm
37 | if [[ -d "/usr/local/opt/llvm/bin" ]]; then
38 |   export PATH="/usr/local/opt/llvm/bin:${PATH}"
39 |   export CXXFLAGS="$CXXFLAGS -nostdinc++ -I/usr/local/opt/llvm/include/c++/v1"
40 |   export LDFLAGS="$LDFLAGS -L/usr/local/opt/llvm/lib"
41 | fi
42 | 
43 | # Mac OS/X has `brew install zlib`'d its zlib.pc somewhere non-standard ;-)
44 | pkgdirs=( "/usr/local/opt/zlib/lib/pkgconfig" )
45 | for pkgdir in ${pkgdirs[*]}; do
46 |   if [[ -d "${pkgdir}" ]]; then
47 |     export PKG_CONFIG_PATH=${PKG_CONFIG_PATH}${PKG_CONFIG_PATH:+:}${pkgdir}
48 |   fi
49 | done
50 | 
51 | export CXX=$(findexe $CXX g++-7 clang++-6.0 clang++ g++)
52 | export CXXFLAGS="${CXXFLAGS:--O0 -g}"
53 | 
54 | echo CXX = $CXX
55 | echo CXXFLAGS = $CXXFLAGS
56 | echo PKG_CONFIG_PATH = $PKG_CONFIG_PATH
57 | 
58 | exec cmake "${ROOT}" \
59 |             -DCMAKE_BUILD_TYPE="debug" \
60 |             -DCMAKE_INSTALL_PREFIX="${HOME}/local" \
61 |             -DCMAKE_VERBOSE_MAKEFILE=OFF \
62 |             -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
63 |             -DENABLE_TIDY=OFF \
64 |             "${@}"
65 | 


--------------------------------------------------------------------------------
/src/klex/cfg/ll/SyntaxTable_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/Report.h>
 9 | #include <klex/cfg/GrammarLexer.h>
10 | #include <klex/cfg/GrammarParser.h>
11 | #include <klex/cfg/ll/SyntaxTable.h>
12 | #include <klex/util/literals.h>
13 | #include <klex/util/testing.h>
14 | 
15 | using namespace std;
16 | using namespace klex;
17 | using namespace klex::cfg;
18 | using namespace klex::cfg::ll;
19 | using namespace klex::util::literals;
20 | 
21 | TEST(cfg_ll_SyntaxTable, construct_right_recursive)
22 | {
23 |     BufferedReport report;
24 |     Grammar grammar = GrammarParser(
25 |                           GrammarLexer {
26 |                               R"(`token {
27 | 		   `  Spacing(ignore) ::= [\s\t]+
28 | 		   `  Number          ::= [0-9]+
29 | 		   `}
30 | 		   `
31 | 		   `Start  ::= Expr;
32 | 		   `Expr   ::= Term Expr_;
33 | 		   `Expr_  ::= '+' Term Expr_
34 | 		   `         | ;
35 | 		   `Term   ::= Factor Term_;
36 | 		   `Term_  ::= '*' Factor Term_
37 | 		   `         | ;
38 | 		   `Factor ::= '(' Expr ')'
39 | 		   `         | Number
40 | 		   `         ;
41 | 		   `)"_multiline },
42 |                           &report)
43 |                           .parse();
44 | 
45 |     ASSERT_FALSE(report.containsFailures());
46 | 
47 |     grammar.finalize();
48 |     log("Grammar:");
49 |     log(grammar.dump());
50 | 
51 |     ll::SyntaxTable st = ll::SyntaxTable::construct(grammar);
52 | 
53 |     log("Syntax Table:");
54 |     log(st.dump(grammar));
55 | 
56 |     // TODO
57 | }
58 | 
59 | // vim:ts=4:sw=4:noet
60 | 


--------------------------------------------------------------------------------
/src/klex/CharStream.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <iosfwd>
10 | #include <istream>
11 | #include <string>
12 | 
13 | namespace klex {
14 | 
15 | class CharStream {
16 |   public:
17 | 	virtual ~CharStream() = default;
18 | 
19 | 	[[nodiscard]] virtual bool isEof() const noexcept = 0;
20 | 	virtual char get() = 0;
21 | 	virtual void rollback(int count) = 0;
22 | 	virtual void rewind() = 0;
23 | };
24 | 
25 | class StringStream : public CharStream {
26 |   public:
27 | 	explicit StringStream(std::string&& s) : source_{std::move(s)} {}
28 | 
29 | 	[[nodiscard]] bool isEof() const noexcept override { return pos_ >= source_.size(); }
30 | 	char get() override { return source_[pos_++]; }
31 | 	void rollback(int count) override { pos_ -= count; }
32 | 	void rewind() override { pos_ = 0; }
33 | 
34 |   private:
35 | 	std::string source_;
36 | 	size_t pos_ = 0;
37 | };
38 | 
39 | class StandardStream : public CharStream {
40 |   public:
41 | 	explicit StandardStream(std::istream* source);
42 | 
43 | 	[[nodiscard]] bool isEof() const noexcept override { return !source_->good(); }
44 | 	char get() override { return static_cast<char>(source_->get()); }
45 | 
46 | 	void rollback(int count) override
47 | 	{
48 | 		source_->clear();
49 | 		source_->seekg(-count, std::ios::cur);
50 | 	}
51 | 
52 | 	void rewind() override
53 | 	{
54 | 		source_->clear();
55 | 		source_->seekg(initialOffset_, std::ios::beg);
56 | 	}
57 | 
58 |   private:
59 | 	std::istream* source_;
60 | 	std::streamoff initialOffset_;
61 | };
62 | 
63 | }  // namespace klex
64 | 


--------------------------------------------------------------------------------
/src/klex/regular/DotWriter_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/DotWriter.h>
 9 | #include <klex/util/testing.h>
10 | 
11 | #include <sstream>
12 | 
13 | using namespace std;
14 | using namespace klex::regular;
15 | 
16 | TEST(regular_DotWriter, simple)
17 | {
18 |     stringstream sstr;
19 |     DotWriter dw(sstr, "n");
20 | 
21 |     dw.start(0);
22 |     dw.visitNode(0, true, true);
23 |     dw.visitEdge(0, 1, 'a');
24 |     dw.endVisitEdge(0, 1);
25 | 
26 |     dw.visitNode(1, false, true);
27 |     dw.visitEdge(1, 1, 'b');
28 |     dw.visitEdge(1, 1, '\r');
29 |     dw.visitEdge(1, 1, '\n');
30 |     dw.visitEdge(1, 1, '\t');
31 |     dw.visitEdge(1, 1, ' ');
32 |     dw.endVisitEdge(1, 1);
33 |     dw.end();
34 | 
35 |     log(sstr.str());
36 |     ASSERT_TRUE(!sstr.str().empty());
37 |     // just make sure it processes
38 | }
39 | 
40 | TEST(regular_DotWriter, multidfa_simple)
41 | {
42 |     stringstream sstr;
43 |     const MultiDFA::InitialStateMap mis { { "foo", 1 }, { "bar", 2 } };
44 |     DotWriter dw(sstr, "n", mis);
45 | 
46 |     dw.start(0);
47 |     dw.visitNode(0, true, false);
48 |     dw.visitNode(1, false, true);
49 |     dw.visitNode(2, false, true);
50 | 
51 |     dw.visitEdge(0, 1, 0x01);
52 |     dw.endVisitEdge(0, 1);
53 | 
54 |     dw.visitEdge(0, 2, 0x02);
55 |     dw.endVisitEdge(0, 2);
56 | 
57 |     dw.visitEdge(1, 1, 'a');
58 |     dw.endVisitEdge(1, 1);
59 | 
60 |     dw.visitEdge(2, 2, 'a');
61 |     dw.endVisitEdge(2, 2);
62 | 
63 |     dw.end();
64 | 
65 |     log(sstr.str());
66 |     ASSERT_TRUE(!sstr.str().empty());
67 |     // just make sure it processes
68 | }
69 | 


--------------------------------------------------------------------------------
/src/klex/regular/DFAMinimizer.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/Alphabet.h>
10 | #include <klex/regular/MultiDFA.h>
11 | #include <klex/regular/State.h>
12 | 
13 | #include <cassert>
14 | #include <cstdlib>
15 | #include <list>
16 | #include <optional>
17 | #include <unordered_map>
18 | #include <vector>
19 | 
20 | namespace klex::regular {
21 | 
22 | class DFA;
23 | 
24 | class DFAMinimizer {
25 |   public:
26 | 	explicit DFAMinimizer(const DFA& dfa);
27 | 	explicit DFAMinimizer(const MultiDFA& multiDFA);
28 | 
29 | 	DFA constructDFA();
30 | 	MultiDFA constructMultiDFA();
31 | 
32 |   private:
33 | 	using PartitionVec = std::list<StateIdVec>;
34 | 
35 | 	void constructPartitions();
36 | 	StateIdVec nonAcceptStates() const;
37 | 	bool containsInitialState(const StateIdVec& S) const;
38 | 	bool isMultiInitialState(StateId s) const;
39 | 	PartitionVec::iterator findGroup(StateId s);
40 | 	int partitionId(StateId s) const;
41 | 	PartitionVec split(const StateIdVec& S) const;
42 | 	DFA constructFromPartitions(const PartitionVec& P) const;
43 | 	std::optional<StateId> containsBacktrackState(const StateIdVec& Q) const;
44 | 
45 | 	static void dumpGroups(const PartitionVec& T);
46 | 
47 | 	StateId targetStateId(StateId oldId) const
48 | 	{
49 | 		auto i = targetStateIdMap_.find(oldId);
50 | 		assert(i != targetStateIdMap_.end());
51 | 		return i->second;
52 | 	}
53 | 
54 |   private:
55 | 	const DFA& dfa_;
56 | 	const MultiDFA::InitialStateMap initialStates_;
57 | 	const Alphabet alphabet_;
58 | 	PartitionVec T;
59 | 	PartitionVec P;
60 | 	std::unordered_map<StateId, StateId> targetStateIdMap_;
61 | };
62 | 
63 | }  // namespace klex::regular
64 | 


--------------------------------------------------------------------------------
/src/klex/regular/TransitionMap.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/State.h>
10 | #include <map>
11 | #include <vector>
12 | 
13 | namespace klex::regular {
14 | 
15 | using CharCatId = int;
16 | 
17 | constexpr CharCatId ErrorCharCat = static_cast<CharCatId>(-1);
18 | 
19 | /**
20 |  * Represents an error-state, such as invalid input character or unexpected EOF.
21 |  */
22 | constexpr StateId ErrorState{808080};  // static_cast<StateId>(-1);
23 | 
24 | /**
25 |  * Transition mapping API to map the input (currentState, charCat) to (newState).
26 |  */
27 | class TransitionMap {
28 |   public:
29 | 	using Container = std::map<StateId, std::map<Symbol, StateId>>;
30 | 
31 | 	TransitionMap() : mapping_{} {}
32 | 
33 | 	TransitionMap(Container mapping) : mapping_{std::move(mapping)} {}
34 | 
35 | 	/**
36 | 	 * Defines a new mapping for (currentState, charCat) to (nextState).
37 | 	 */
38 | 	void define(StateId currentState, Symbol charCat, StateId nextState);
39 | 
40 | 	/**
41 | 	 * Retrieves the next state for the input (currentState, charCat).
42 | 	 *
43 | 	 * @returns the transition from (currentState, charCat) to (nextState) or ErrorState if not defined.
44 | 	 */
45 | 	StateId apply(StateId currentState, Symbol charCat) const;
46 | 
47 | 	/**
48 | 	 * Retrieves a list of all available states.
49 | 	 */
50 | 	std::vector<StateId> states() const;
51 | 
52 | 	/**
53 | 	 * Retrieves a map of all transitions from given state @p inputState.
54 | 	 */
55 | 	std::map<Symbol, StateId> map(StateId inputState) const;
56 | 
57 |   private:
58 | 	Container mapping_;
59 | };
60 | 
61 | }  // namespace klex::regular
62 | 
63 | #include <klex/regular/TransitionMap-inl.h>
64 | 


--------------------------------------------------------------------------------
/src/klex/regular/DFABuilder.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/NFA.h>
10 | #include <map>
11 | #include <utility>
12 | #include <vector>
13 | 
14 | namespace klex::regular {
15 | 
16 | class DFA;
17 | class State;
18 | 
19 | class DFABuilder {
20 |   public:
21 | 	//! Map of rules that shows which rule is overshadowed by which other rule.
22 | 	using OvershadowMap = std::vector<std::pair<Tag, Tag>>;
23 | 
24 | 	explicit DFABuilder(NFA&& nfa) : nfa_{std::move(nfa)} {}
25 | 
26 | 	/**
27 | 	 * Constructs a DFA out of the NFA.
28 | 	 *
29 | 	 * @param overshadows if not nullptr, it will be used to store semantic information about
30 | 	 *                    which rule tags have been overshadowed by which.
31 | 	 */
32 | 	DFA construct(OvershadowMap* overshadows = nullptr);
33 | 
34 |   private:
35 | 	struct TransitionTable;
36 | 
37 | 	DFA constructDFA(const std::vector<StateIdVec>& Q, const TransitionTable& T,
38 | 					 OvershadowMap* overshadows) const;
39 | 
40 | 	/**
41 | 	 * Finds @p t in @p Q and returns its offset (aka configuration number) or -1 if not found.
42 | 	 */
43 | 	static std::optional<StateId> configurationNumber(const std::vector<StateIdVec>& Q, const StateIdVec& t);
44 | 
45 | 	/**
46 | 	 * Determines the tag to use for the deterministic state representing @p q from non-deterministic FA @p
47 | 	 * fa.
48 | 	 *
49 | 	 * @param q the set of states that reflect a single state in the DFA equal to the input FA
50 | 	 *
51 | 	 * @returns the determined tag or std::nullopt if none
52 | 	 */
53 | 	std::optional<Tag> determineTag(const StateIdVec& q, std::map<Tag, Tag>* overshadows) const;
54 | 
55 |   private:
56 | 	const NFA nfa_;
57 | };
58 | 
59 | }  // namespace klex::regular
60 | 


--------------------------------------------------------------------------------
/src/klex/regular/RegExpr.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/Symbols.h>
10 | 
11 | #include <limits>
12 | #include <memory>
13 | #include <set>
14 | #include <string>
15 | #include <string_view>
16 | #include <variant>
17 | 
18 | #include <fmt/format.h>
19 | 
20 | namespace klex::regular {
21 | 
22 | struct AlternationExpr;
23 | struct BeginOfLineExpr;
24 | struct CharacterClassExpr;
25 | struct CharacterExpr;
26 | struct ClosureExpr;
27 | struct ConcatenationExpr;
28 | struct DotExpr;
29 | struct EmptyExpr;
30 | struct EndOfFileExpr;
31 | struct EndOfLineExpr;
32 | struct LookAheadExpr;
33 | 
34 | using RegExpr = std::variant<AlternationExpr, BeginOfLineExpr, CharacterClassExpr, CharacterExpr, ClosureExpr, ConcatenationExpr, DotExpr, EmptyExpr, EndOfFileExpr, EndOfLineExpr, LookAheadExpr>;
35 | 
36 | struct LookAheadExpr {
37 | 	std::unique_ptr<RegExpr> left;
38 | 	std::unique_ptr<RegExpr> right;
39 | };
40 | 
41 | struct AlternationExpr {
42 | 	std::unique_ptr<RegExpr> left;
43 | 	std::unique_ptr<RegExpr> right;
44 | };
45 | 
46 | struct ConcatenationExpr {
47 | 	std::unique_ptr<RegExpr> left;
48 | 	std::unique_ptr<RegExpr> right;
49 | };
50 | 
51 | struct ClosureExpr {
52 | 	std::unique_ptr<RegExpr> subExpr;
53 | 	unsigned minimumOccurrences {0};
54 | 	unsigned maximumOccurrences {std::numeric_limits<unsigned>::max()};
55 | };
56 | 
57 | struct CharacterExpr {
58 | 	Symbol value;
59 | };
60 | 
61 | struct CharacterClassExpr {
62 | 	SymbolSet symbols;
63 | };
64 | 
65 | struct DotExpr {};
66 | struct BeginOfLineExpr {};
67 | struct EndOfLineExpr {};
68 | struct EndOfFileExpr {};
69 | struct EmptyExpr {};
70 | 
71 | std::string to_string(const RegExpr& regex);
72 | int precedence(const RegExpr& regex);
73 | bool containsBeginOfLine(const RegExpr& regex);
74 | 
75 | }  // namespace klex::regular
76 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # vim:ts=2:sw=2:et
 2 | version: 2
 3 | 
 4 | # -----------------------------------------------------------------------------------------------
 5 | defaults:
 6 | 
 7 |   - workflow_trigger_on_tags: &workflow_trigger_on_tags
 8 |       filters:
 9 |         tags:
10 |           only: /.*/
11 | 
12 |   - run_prepare: &run_prepare
13 |       name: Prepare
14 |       command: |
15 |         set -ex
16 |         apt-get -q update
17 |         apt-get -qy install cmake clang++-8
18 | 
19 |   - run_build: &run_build
20 |       name: Build
21 |       command: |
22 |         set -ex
23 |         mkdir -p build
24 |         cd build
25 |         cmake .. -G "Unix Makefiles" \
26 |             -DCMAKE_CXX_COMPILER=$CMAKE_CXX_COMPILER \
27 |             -DCMAKE_C_COMPILER=$CMAKE_C_COMPILER \
28 |             -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
29 |             $CMAKE_OPTIONS
30 |         make -j3
31 | 
32 |   - run_test: &run_test
33 |       name: klex_test
34 |       command: ./build/klex_test
35 | 
36 |   - run_git_sm_init: &run_git_sm_init
37 |       name: git submodule init
38 |       command: git submodule update --init
39 | 
40 | # -----------------------------------------------------------------------------------------------
41 | jobs:
42 | 
43 |   build_ubuntu1904_gcc:
44 |     docker:
45 |       - image: buildpack-deps:disco
46 |     environment:
47 |       CMAKE_BUILD_TYPE: "Release"
48 |       CMAKE_C_COMPILER: "gcc-8"
49 |       CMAKE_CXX_COMPILER: "g++-8"
50 |     steps:
51 |       - checkout
52 |       - run: *run_git_sm_init
53 |       - run: *run_prepare
54 |       - run: *run_build
55 |       - run: *run_test
56 |       - persist_to_workspace:
57 |           root: build
58 |           paths:
59 |             - "*"
60 | 
61 |   build_ubuntu1904_clang:
62 |     docker:
63 |       - image: buildpack-deps:disco
64 |     environment:
65 |       CMAKE_BUILD_TYPE: "Release"
66 |       CMAKE_C_COMPILER: "clang-8"
67 |       CMAKE_CXX_COMPILER: "clang++-8"
68 |     steps:
69 |       - checkout
70 |       - run: *run_git_sm_init
71 |       - run: *run_prepare
72 |       - run: *run_build
73 |       - run: *run_test
74 |       - persist_to_workspace:
75 |           root: build
76 |           paths:
77 |             - "*"
78 | 
79 | workflows:
80 |   version: 2
81 | 
82 |   build_and_test:
83 |     jobs:
84 |       - build_ubuntu1904_gcc: *workflow_trigger_on_tags
85 |       - build_ubuntu1904_clang: *workflow_trigger_on_tags
86 | 


--------------------------------------------------------------------------------
/klex2flex.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | # This file is part of the "klex" project, http://github.com/christianparpart/klex>
 3 | #   (c) 2018 Christian Parpart <christian@parpart.family>
 4 | #
 5 | # Licensed under the MIT License (the "License"); you may not use this
 6 | # file except in compliance with the License. You may obtain a copy of
 7 | # the License at: http://opensource.org/licenses/MIT
 8 | 
 9 | set -e
10 | 
11 | klex_file="$1"
12 | lex_file="out.lex"
13 | table_file="table.cc"
14 | token_file="token.h"
15 | typeName="Token"
16 | 
17 | echo klex file: ${klex_file}
18 | echo table file: ${table_file}
19 | echo token file: ${token_file}
20 | 
21 | generate_token_file() {
22 |   awk >${token_file} <"${klex_file}" -f <(echo '
23 |   BEGIN {
24 |     rule_nr = 0;
25 |     printf("#pragma once\n\n");
26 |     printf("#include <cstdlib>      // abort()\n");
27 |     printf("#include <string_view>\n\n");
28 |     printf("enum class Token {\n");
29 |   }
30 | 
31 |   match($0, /^(\w+)\(ignore\)\s*::=\s*(.*)$/, rule) {
32 |   }
33 | 
34 |   match($0, /^(\w+)\s*::=\s*(.*)$/, rule) {
35 |     name = rule[1];
36 |     pattern = rule[2];
37 |     rule_nr++;
38 |     printf("  %-20s = %4s, // %s\n", name, rule_nr, pattern);
39 |   }
40 | 
41 |   END {
42 |     printf("};\n\n"); # end enum
43 |   }
44 |   ')
45 | 
46 |   awk >>${token_file} <"${klex_file}" -f <(echo "
47 |   BEGIN {
48 |     printf(\"inline constexpr std::string_view to_string(${typeName} t) {\n\");
49 |     printf(\"  switch (t) { \n\");
50 |   }
51 |   match(\$0, /^(\w+)\s*::=\s*(.*)$/, rule) {
52 |     name = rule[1];
53 |     printf(\"    case ${typeName}::%s: return \\\"%s\\\";\n\", name, name);
54 |   }
55 |   END {
56 |     printf(\"    default: abort();\n\");
57 |     printf(\"  }\n\");
58 |     printf(\"}\n\");
59 |   }
60 |   ")
61 | }
62 | 
63 | generate_table_file() {
64 |   awk >${lex_file} <"${klex_file}" -f <(echo '
65 |   BEGIN {
66 |     rule_nr = 0;
67 |     printf("%%%%\n");
68 |     printf("%%option noyywrap\n");
69 |   }
70 | 
71 |   match($0, /^(\w+)\(ignore\)\s*::=\s*(.*)$/, rule) {
72 |     name = rule[1];
73 |     pattern = rule[2];
74 |     printf("%-40s { /* %s */ }\n", pattern, name);
75 |   }
76 | 
77 |   match($0, /^(\w+)\s*::=\s*(.*)$/, rule) {
78 |     name = rule[1];
79 |     pattern = rule[2];
80 |     rule_nr++;
81 |     printf("%-40s { return %d; /* %s */ }\n", pattern, rule_nr, name);
82 |   }')
83 | }
84 | 
85 | generate_table_file
86 | generate_token_file
87 | 
88 | flex -t ${lex_file} >${table_file}
89 | 


--------------------------------------------------------------------------------
/src/klex/regular/DotWriter.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/DotVisitor.h>
10 | #include <klex/regular/MultiDFA.h>
11 | #include <klex/regular/State.h>
12 | 
13 | #include <fstream>
14 | #include <map>
15 | #include <memory>
16 | #include <ostream>
17 | #include <string>
18 | #include <string_view>
19 | 
20 | namespace klex::regular {
21 | 
22 | class DotWriter : public DotVisitor {
23 |   public:
24 | 	DotWriter(std::ostream& os, std::string stateLabelPrefix)
25 | 		: ownedStream_{},
26 | 		  stream_{os},
27 | 		  stateLabelPrefix_{stateLabelPrefix},
28 | 		  transitionGroups_{},
29 | 		  initialStates_{nullptr},
30 | 		  initialState_{0}
31 | 	{
32 | 	}
33 | 
34 | 	DotWriter(const std::string& filename, std::string stateLabelPrefix)
35 | 		: ownedStream_{std::make_unique<std::ofstream>(filename)},
36 | 		  stream_{*ownedStream_.get()},
37 | 		  stateLabelPrefix_{stateLabelPrefix},
38 | 		  transitionGroups_{},
39 | 		  initialStates_{nullptr},
40 | 		  initialState_{0}
41 | 	{
42 | 	}
43 | 
44 | 	DotWriter(std::ostream& os, std::string stateLabelPrefix, const MultiDFA::InitialStateMap& initialStates)
45 | 		: ownedStream_{},
46 | 		  stream_{os},
47 | 		  stateLabelPrefix_{stateLabelPrefix},
48 | 		  transitionGroups_{},
49 | 		  initialStates_{&initialStates},
50 | 		  initialState_{0}
51 | 	{
52 | 	}
53 | 
54 | 	DotWriter(const std::string& filename, std::string stateLabelPrefix,
55 | 			  const MultiDFA::InitialStateMap& initialStates)
56 | 		: ownedStream_{std::make_unique<std::ofstream>(filename)},
57 | 		  stream_{*ownedStream_.get()},
58 | 		  stateLabelPrefix_{stateLabelPrefix},
59 | 		  transitionGroups_{},
60 | 		  initialStates_{&initialStates},
61 | 		  initialState_{0}
62 | 	{
63 | 	}
64 | 
65 |   public:
66 | 	void start(StateId initialState) override;
67 | 	void visitNode(StateId number, bool start, bool accept) override;
68 | 	void visitEdge(StateId from, StateId to, Symbol s) override;
69 | 	void endVisitEdge(StateId from, StateId to) override;
70 | 	void end() override;
71 | 
72 |   private:
73 | 	std::unique_ptr<std::ostream> ownedStream_;
74 | 	std::ostream& stream_;
75 | 	std::string stateLabelPrefix_;
76 | 	std::map<StateId /*target state*/, std::vector<Symbol> /*transition symbols*/> transitionGroups_;
77 | 	const MultiDFA::InitialStateMap* initialStates_;
78 | 	StateId initialState_;
79 | };
80 | 
81 | }  // namespace klex::regular
82 | 


--------------------------------------------------------------------------------
/src/klex/regular/LexerDef.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/TransitionMap.h>
10 | #include <klex/regular/State.h>
11 | #include <map>
12 | #include <string>
13 | #include <sstream>
14 | 
15 | namespace klex::regular {
16 | 
17 | // special tags
18 | constexpr Tag IgnoreTag = static_cast<Tag>(-1);
19 | constexpr Tag FirstUserTag = 1;
20 | 
21 | using AcceptStateMap = std::map<StateId, Tag>;
22 | 
23 | //! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream to.
24 | using BacktrackingMap = std::map<StateId, StateId>;
25 | 
26 | struct LexerDef {
27 |   std::map<std::string, StateId> initialStates;
28 |   bool containsBeginOfLineStates;
29 |   TransitionMap transitions;
30 |   AcceptStateMap acceptStates;
31 |   BacktrackingMap backtrackingStates;
32 |   std::map<Tag, std::string> tagNames;
33 | 
34 |   std::string to_string() const;
35 | 
36 |   bool isValidTag(Tag t) const noexcept {
37 |     return tagNames.find(t) != tagNames.end();
38 |   }
39 | 
40 |   std::string tagName(Tag t) const {
41 |     auto i = tagNames.find(t);
42 |     assert(i != tagNames.end());
43 |     return i->second;
44 |   }
45 | };
46 | 
47 | inline std::string LexerDef::to_string() const {
48 |   std::stringstream sstr;
49 | 
50 |   sstr << fmt::format("initializerStates:\n");
51 |   for (const std::pair<std::string, StateId> q0 : initialStates)
52 |     sstr << fmt::format("  {}: {}\n", q0.first, q0.second);
53 |   sstr << fmt::format("totalStates: {}\n", transitions.states().size());
54 | 
55 |   sstr << "transitions:\n";
56 |   for (StateId inputState : transitions.states()) {
57 |     std::map<StateId, std::vector<Symbol>> T;
58 |     for (const std::pair<Symbol, StateId> p : transitions.map(inputState)) {
59 |       T[p.second].push_back(p.first);
60 |     }
61 |     for (auto& t : T) {
62 |       sstr << fmt::format("- n{} --({})--> n{}\n", inputState, groupCharacterClassRanges(std::move(t.second)), t.first);
63 |     }
64 |   }
65 | 
66 |   sstr << "accepts:\n";
67 |   for (const std::pair<StateId, Tag> a : acceptStates)
68 |     sstr << fmt::format("- n{} to {} ({})\n", a.first, a.second, tagName(a.second));
69 | 
70 |   if (!backtrackingStates.empty()) {
71 |     sstr << "backtracking:\n";
72 |     for (const std::pair<StateId, StateId> bt : backtrackingStates)
73 |       sstr << fmt::format("- n{} to n{}\n", bt.first, bt.second);
74 |   }
75 | 
76 |   return sstr.str();
77 | }
78 | 
79 | } // namespace klex::regular
80 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarLexer_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/cfg/GrammarLexer.h>
 9 | #include <klex/util/literals.h>
10 | #include <klex/util/testing.h>
11 | 
12 | using namespace std;
13 | using namespace klex;
14 | using namespace klex::util::literals;
15 | 
16 | using cfg::Grammar;
17 | using cfg::GrammarLexer;
18 | 
19 | TEST(cfg_GrammarLexer, literals)
20 | {
21 |     GrammarLexer lexer(R"('1' '23' '456' "789")");
22 | 
23 |     ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize());
24 |     ASSERT_EQ("1", lexer.currentLiteral());
25 | 
26 |     ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize());
27 |     ASSERT_EQ("23", lexer.currentLiteral());
28 | 
29 |     ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize());
30 |     ASSERT_EQ("456", lexer.currentLiteral());
31 | 
32 |     ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize());
33 |     ASSERT_EQ("789", lexer.currentLiteral());
34 | 
35 |     ASSERT_EQ(GrammarLexer::Token::Eof, lexer.recognize());
36 | }
37 | 
38 | TEST(cfg_GrammarLexer, tokenization)
39 | {
40 |     GrammarLexer lexer(R"(:
41 | 			:Expr			::= Expr '+' Term			{addExpr}
42 | 			:						| Expr '-' Term			{subExpr}
43 | 			:						;
44 | 			:)"_multiline);
45 | 
46 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
47 |     ASSERT_EQ(GrammarLexer::Token::Assoc, lexer.recognize());
48 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
49 |     ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize());
50 |     ASSERT_EQ("+", lexer.currentLiteral());
51 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
52 |     ASSERT_EQ(GrammarLexer::Token::SetOpen, lexer.recognize());
53 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
54 |     ASSERT_EQ(GrammarLexer::Token::SetClose, lexer.recognize());
55 | 
56 |     ASSERT_EQ(GrammarLexer::Token::Or, lexer.recognize());
57 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
58 |     ASSERT_EQ(GrammarLexer::Token::Literal, lexer.recognize());
59 |     ASSERT_EQ("-", lexer.currentLiteral());
60 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
61 |     ASSERT_EQ(GrammarLexer::Token::SetOpen, lexer.recognize());
62 |     ASSERT_EQ(GrammarLexer::Token::Identifier, lexer.recognize());
63 |     ASSERT_EQ(GrammarLexer::Token::SetClose, lexer.recognize());
64 | 
65 |     ASSERT_EQ(GrammarLexer::Token::Semicolon, lexer.recognize());
66 |     ASSERT_EQ(GrammarLexer::Token::Eof, lexer.recognize());
67 | }
68 | 
69 | // vim:ts=4:sw=4:noet
70 | 


--------------------------------------------------------------------------------
/src/klex/regular/NFA_test.cpp:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #include <klex/regular/Alphabet.h>
 9 | #include <klex/regular/NFA.h>
10 | #include <klex/regular/State.h>
11 | #include <klex/util/testing.h>
12 | 
13 | using namespace std;
14 | using namespace klex::regular;
15 | 
16 | TEST(regular_NFA, emptyCtor)
17 | {
18 |     const NFA nfa;
19 |     ASSERT_EQ(0, nfa.size());
20 |     ASSERT_TRUE(nfa.empty());
21 | }
22 | 
23 | TEST(regular_NFA, characterCtor)
24 | {
25 |     const NFA nfa { 'a' };
26 |     ASSERT_EQ(2, nfa.size());
27 |     ASSERT_EQ(0, nfa.initialStateId());
28 |     ASSERT_EQ(1, nfa.acceptStateId());
29 |     ASSERT_EQ(StateIdVec { 1 }, nfa.delta(StateIdVec { 0 }, 'a'));
30 | }
31 | 
32 | TEST(regular_NFA, concatenate)
33 | {
34 |     const NFA ab = move(NFA { 'a' }.concatenate(NFA { 'b' }));
35 |     ASSERT_EQ(4, ab.size());
36 |     ASSERT_EQ(0, ab.initialStateId());
37 |     ASSERT_EQ(3, ab.acceptStateId());
38 | 
39 |     // TODO: check ab.initial == A.initial
40 |     // TODO: check A.accept == B.initial
41 |     // TODO: check ab.accept == B.accept
42 | }
43 | 
44 | TEST(regular_NFA, alternate)
45 | {
46 |     const NFA ab = move(NFA { 'a' }.alternate(NFA { 'b' }));
47 |     ASSERT_EQ(6, ab.size());
48 |     ASSERT_EQ(2, ab.initialStateId());
49 |     ASSERT_EQ(3, ab.acceptStateId());
50 | 
51 |     // TODO: check acceptState transitions to A and B
52 |     // TODO: check A and B's outgoing edges to final acceptState
53 | }
54 | 
55 | TEST(regular_NFA, epsilonClosure)
56 | {
57 |     const NFA nfa { 'a' };
58 |     ASSERT_EQ(0, nfa.initialStateId());
59 |     ASSERT_EQ(1, nfa.acceptStateId());
60 |     ASSERT_EQ(StateIdVec { 0 }, nfa.epsilonClosure(StateIdVec { 0 }));
61 | 
62 |     const NFA abc = move(NFA { 'a' }.concatenate(move(NFA { 'b' }.alternate(NFA { 'c' }).recurring())));
63 |     ASSERT_EQ(StateIdVec { 0 }, abc.epsilonClosure(StateIdVec { 0 }));
64 | 
65 |     const StateIdVec e1 { 1, 2, 4, 6, 8, 9 };
66 |     ASSERT_EQ(e1, abc.epsilonClosure(StateIdVec { 1 }));
67 | }
68 | 
69 | TEST(regular_NFA, delta)
70 | {
71 |     const NFA nfa { 'a' };
72 |     ASSERT_EQ(0, nfa.initialStateId());
73 |     ASSERT_EQ(1, nfa.acceptStateId());
74 |     ASSERT_EQ(StateIdVec { 1 }, nfa.delta(StateIdVec { 0 }, 'a'));
75 | }
76 | 
77 | TEST(regular_NFA, alphabet)
78 | {
79 |     ASSERT_EQ("{}", NFA {}.alphabet().to_string());
80 |     ASSERT_EQ("{a}", NFA { 'a' }.alphabet().to_string());
81 |     ASSERT_EQ("{ab}", NFA { 'a' }.concatenate(NFA { 'b' }).alphabet().to_string());
82 |     ASSERT_EQ("{abc}", NFA { 'a' }.concatenate(NFA { 'b' }).alternate(NFA { 'c' }).alphabet().to_string());
83 | }
84 | 


--------------------------------------------------------------------------------
/src/klex/regular/Symbols_test.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0>
  2 | //   (c) 2009-2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/Symbols.h>
  9 | #include <klex/util/testing.h>
 10 | 
 11 | using namespace std;
 12 | using klex::regular::SymbolSet;
 13 | 
 14 | TEST(regular_SymbolSet, s0)
 15 | {
 16 |     SymbolSet s0;
 17 |     ASSERT_EQ(0, s0.size());
 18 |     ASSERT_TRUE(s0.empty());
 19 | }
 20 | 
 21 | TEST(regular_SymbolSet, s1)
 22 | {
 23 |     SymbolSet s1;
 24 | 
 25 |     // first add
 26 |     s1.insert('a');
 27 |     ASSERT_EQ(1, s1.size());
 28 |     ASSERT_FALSE(s1.empty());
 29 | 
 30 |     // overwrite
 31 |     s1.insert('a');
 32 |     ASSERT_EQ(1, s1.size());
 33 |     ASSERT_FALSE(s1.empty());
 34 | }
 35 | 
 36 | TEST(regular_SymbolSet, initializer_list)
 37 | {
 38 |     SymbolSet a { 'a' };
 39 |     EXPECT_EQ(1, a.size());
 40 |     EXPECT_TRUE(a.contains('a'));
 41 | 
 42 |     SymbolSet s2 { 'a', 'b', 'b', 'c' };
 43 |     EXPECT_EQ(3, s2.size());
 44 |     EXPECT_EQ("abc", s2.to_string());
 45 | }
 46 | 
 47 | TEST(regular_SymbolSet, dot)
 48 | {
 49 |     SymbolSet dot(SymbolSet::Dot);
 50 |     EXPECT_FALSE(dot.contains('\n'));
 51 |     EXPECT_TRUE(dot.contains('\0'));
 52 |     EXPECT_TRUE(dot.contains(' '));
 53 |     EXPECT_TRUE(dot.isDot());
 54 |     EXPECT_EQ(".", dot.to_string());
 55 | }
 56 | 
 57 | TEST(regular_SymbolSet, complement)
 58 | {
 59 |     SymbolSet s;
 60 |     s.insert('\n');
 61 |     EXPECT_EQ("\\n", s.to_string());
 62 |     s.complement();
 63 |     EXPECT_EQ(".", s.to_string());
 64 | }
 65 | 
 66 | TEST(regular_SymbolSet, range)
 67 | {
 68 |     SymbolSet r;
 69 |     r.insert(make_pair('a', 'f'));
 70 | 
 71 |     EXPECT_EQ(6, r.size());
 72 |     EXPECT_EQ("a-f", r.to_string());
 73 | 
 74 |     r.insert(make_pair('0', '9'));
 75 |     EXPECT_EQ(16, r.size());
 76 |     EXPECT_EQ("0-9a-f", r.to_string());
 77 | }
 78 | 
 79 | TEST(regular_SymbolSet, fmt_format)
 80 | {
 81 |     SymbolSet s;
 82 |     s.insert(make_pair('0', '9'));
 83 |     s.insert(make_pair('a', 'f'));
 84 | 
 85 |     EXPECT_EQ("0-9a-f", fmt::format("{}", s));
 86 | }
 87 | 
 88 | TEST(regular_SymbolSet, hash_map)
 89 | {
 90 |     SymbolSet s0;
 91 |     SymbolSet s1 { 'a' };
 92 |     SymbolSet s2 { 'a', 'b' };
 93 | 
 94 |     unordered_map<SymbolSet, int> map;
 95 |     map[s0] = 0;
 96 |     map[s1] = 1;
 97 |     map[s2] = 2;
 98 | 
 99 |     EXPECT_EQ(0, map[s0]);
100 |     EXPECT_EQ(1, map[s1]);
101 |     EXPECT_EQ(2, map[s2]);
102 | }
103 | 
104 | TEST(regular_SymbolSet, compare)
105 | {
106 |     SymbolSet s1 { 'a', 'b' };
107 |     SymbolSet s2 { 'a', 'b' };
108 |     SymbolSet s3 { 'a', 'c' };
109 |     ASSERT_TRUE(s1 == s2);
110 |     ASSERT_TRUE(s1 != s3);
111 | }
112 | 


--------------------------------------------------------------------------------
/src/klex/cfg/ll/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # LL(1) Syntax Analyzer
 3 | 
 4 | ## Motivations
 5 | 
 6 | - Have a convenience-first API for generating and analyzing context free grammars (of type LL(1) and LL(k)).
 7 | - Rule rewriting to solve various conflicts or improve power & convenience of the input grammar:
 8 |   - Must solve left-recursion by rewriting into right-recursive rules.
 9 |   - Must rewrite iterations into set of right-recursive rules.
10 |   - Must support epsilon rules.
11 | - Keep C++20's constexpr changes in mind to allow early adoption of compile-time table constructions.
12 | 
13 | ## klax-Grammar File Format
14 | 
15 | ```
16 | Start               ::= ExplicitTokenGroup? GrammarRule+
17 | ExplicitTokenGroup  ::= 'token' '{' KLEX_TOKEN_GRAMMAR* '}'
18 | GrammarRule         ::= NonTerminal '::=' Handle ('|' Handle)* ';'
19 | NonTerminal         ::= _*[A-Z][a-zA-Z0-9_]*
20 | Terminal            ::= _*[a-z][A-Za-z0-9_]*
21 |                       | "'" ... "'"
22 |                       | '([^'\n]|\\\\')*'|\"([^\"\n]|\\\")*\"
23 | Handle              ::= (Terminal | NonTerminal)*
24 | ```
25 | 
26 | ## klax example files
27 | 
28 | ### Expression-Term-Factor
29 | 
30 | ```
31 | token {
32 |   Spacing(ignore) ::= [\s\t\n]+
33 |   Number          ::= 0|[1-9][0-9]*
34 |   Ident           ::= [a-z]+
35 |   Eof             ::= <<EOF>>
36 | }
37 | 
38 | # NTS     ::= HANDLES            {ACTION_LABELS}
39 | 
40 | Start     ::= Expr Eof           {expr}
41 | Expr      ::= Expr '+' Term      {addExpr}
42 |             | Expr '-' Term      {subExpr}
43 |             | Term
44 |             ;
45 | Term      ::= Term '*' Factor    {mulExpr}
46 |             | Term '/' Factor    {divExpr}
47 |             | Factor
48 |             ;
49 | Factor    ::= Number             {numberLiteral}
50 |             | Ident              {variable}
51 |             | '(' Expr ')'
52 |             ;
53 | ```
54 | 
55 | ```cpp
56 | using namespace std;
57 | 
58 | klex::ll::Def pd = klex::ll::Compiler{ETF_RULES}.compile();
59 | klex::ll::Analyzer<int> parser{ pd, "2 + 3 * (10 - 6)" };
60 | parser.action("numberLiteral", [](auto& args) { return stoi(args.literal(1)); })
61 |       .action("mulExpr", [](auto const& args) { return args(1) * args(2); })
62 |       .action("divExpr", [](auto const& args) { return args(1) / args(2); })
63 |       .action("addExpr", [](auto const& args) { return args(1) + args(2); })
64 |       .action("subExpr", [](auto const& args) { return args(1) - args(2); });
65 | unique_ptr<Expr> expr = parser.analyze();
66 | ```
67 | 
68 | The parse-table generator needs to rewrite the left-recursion into right-recursion to make
69 | the grammar LL(1) compatible.
70 | 
71 | # Random Brainstorming Thoughts
72 | 
73 | ```
74 | # should be supportable
75 | AddExpr ::= MulExpr ('+' MulExpr)*
76 | 
77 | # and automatically rewritten into right-most derivative grammar
78 | AddExpr ::= MulExpr
79 |           | MulExpr '+' AddExpr
80 |           | MulExpr '-' AddExpr
81 | 
82 | A  -> aX*b
83 |    into
84 | A  -> ab
85 |     | aX'b
86 | X' -> X X'?
87 | ```
88 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | BasedOnStyle: Microsoft
 3 | AccessModifierOffset: '-2'
 4 | AlignAfterOpenBracket: Align
 5 | AlignConsecutiveMacros: 'true'
 6 | AlignConsecutiveDeclarations: 'false'
 7 | AlignEscapedNewlines: Left
 8 | AlignOperands: 'true'
 9 | AlignTrailingComments: 'true'
10 | AllowAllArgumentsOnNextLine: 'true'
11 | AllowAllConstructorInitializersOnNextLine: 'true'
12 | AllowAllParametersOfDeclarationOnNextLine: 'true'
13 | AllowShortBlocksOnASingleLine: 'false'
14 | AllowShortCaseLabelsOnASingleLine: 'true'
15 | AllowShortFunctionsOnASingleLine: InlineOnly
16 | AllowShortIfStatementsOnASingleLine: Never
17 | AllowShortLambdasOnASingleLine: Inline
18 | AllowShortLoopsOnASingleLine: 'false'
19 | AlwaysBreakAfterReturnType: None
20 | AlwaysBreakBeforeMultilineStrings: 'false'
21 | AlwaysBreakTemplateDeclarations: 'Yes'
22 | BinPackArguments: 'false'
23 | BinPackParameters: 'false'
24 | BreakBeforeBinaryOperators: NonAssignment
25 | BreakBeforeBraces: Custom
26 | BreakBeforeTernaryOperators: 'true'
27 | BreakConstructorInitializers: AfterColon
28 | BreakInheritanceList: AfterColon
29 | BreakStringLiterals: 'true'
30 | ColumnLimit: '110'
31 | CompactNamespaces: 'false'
32 | ConstructorInitializerAllOnOneLineOrOnePerLine: 'true'
33 | ConstructorInitializerIndentWidth: '4'
34 | ContinuationIndentWidth: '4'
35 | Cpp11BracedListStyle: 'false'
36 | DerivePointerAlignment: 'false'
37 | FixNamespaceComments: 'true'
38 | IncludeBlocks: Regroup
39 | IndentCaseLabels: true
40 | IndentPPDirectives: BeforeHash
41 | IndentWidth: '4'
42 | IndentWrappedFunctionNames: 'false'
43 | Language: Cpp
44 | MaxEmptyLinesToKeep: '1'
45 | NamespaceIndentation: Inner
46 | PenaltyBreakAssignment: '0'
47 | PointerAlignment: Left
48 | ReflowComments: 'true'
49 | SortIncludes: 'true'
50 | SortUsingDeclarations: 'true'
51 | SpaceAfterCStyleCast: 'true'
52 | SpaceAfterLogicalNot: 'false'
53 | SpaceAfterTemplateKeyword: 'true'
54 | SpaceBeforeAssignmentOperators: 'true'
55 | SpaceBeforeCpp11BracedList: 'true'
56 | SpaceBeforeCtorInitializerColon: 'false'
57 | SpaceBeforeInheritanceColon: 'false'
58 | SpaceBeforeParens: ControlStatements
59 | SpaceBeforeRangeBasedForLoopColon: 'false'
60 | SpaceInEmptyParentheses: 'false'
61 | SpacesInAngles: 'false'
62 | SpacesInCStyleCastParentheses: 'false'
63 | SpacesInContainerLiterals: 'false'
64 | SpacesInParentheses: 'false'
65 | SpacesInSquareBrackets: 'false'
66 | Standard: Cpp11
67 | TabWidth: '4'
68 | UseTab: Never
69 | IncludeCategories:
70 |   - Regex:     '^<(klex)/'
71 |     Priority:  0
72 |   - Regex:     '^<(crispy)/'
73 |     Priority:  4
74 |   - Regex:     '^<(unicode)/'
75 |     Priority:  5
76 |   - Regex:     '^<(fmt)/'
77 |     Priority:  6
78 |   - Regex:     '^<(yaml-cpp)/'
79 |     Priority:  7
80 |   - Regex:     '^<(range)/'
81 |     Priority:  8
82 |   - Regex:     '^<gsl/'
83 |     Priority:  9
84 |   - Regex:     '^<catch2/'
85 |     Priority:  20
86 |   - Regex:     '^<sys/'
87 |     Priority:  30
88 |   - Regex:     '^<[[:alnum:]_]+>'
89 |     Priority:  41
90 |   - Regex:     '<[[:alnum:]_]+\.h>'
91 |     Priority:  42
92 |   - Regex:     '.*'
93 |     Priority:  99
94 | 


--------------------------------------------------------------------------------
/src/klex/regular/RegExprParser.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <klex/regular/Symbols.h>
10 | #include <klex/regular/RegExpr.h>
11 | #include <fmt/format.h>
12 | #include <memory>
13 | #include <string_view>
14 | 
15 | namespace klex::regular {
16 | 
17 | class SymbolSet;
18 | 
19 | class RegExprParser {
20 |   public:
21 | 	RegExprParser();
22 | 
23 | 	RegExpr parse(std::string_view expr, int line, int column);
24 | 
25 | 	RegExpr parse(std::string_view expr) { return parse(std::move(expr), 1, 1); }
26 | 
27 | 	class UnexpectedToken : public std::runtime_error {
28 | 	  public:
29 | 		UnexpectedToken(unsigned int line, unsigned int column, std::string actual, std::string expected)
30 | 			: std::runtime_error{fmt::format("[{}:{}] Unexpected token {}. Expected {} instead.", line,
31 | 											 column, actual, expected)},
32 | 			  line_{line},
33 | 			  column_{column},
34 | 			  actual_{std::move(actual)},
35 | 			  expected_{std::move(expected)}
36 | 		{
37 | 		}
38 | 
39 | 		UnexpectedToken(unsigned int line, unsigned int column, int actual, int expected)
40 | 			: UnexpectedToken{line, column,
41 | 							  actual == -1 ? "EOF" : fmt::format("{}", static_cast<char>(actual)),
42 | 							  std::string(1, static_cast<char>(expected))}
43 | 		{
44 | 		}
45 | 
46 | 		unsigned int line() const noexcept { return line_; }
47 | 		unsigned int column() const noexcept { return column_; }
48 | 		const std::string& actual() const noexcept { return actual_; }
49 | 		const std::string& expected() const noexcept { return expected_; }
50 | 
51 | 	  private:
52 | 		unsigned int line_;
53 | 		unsigned int column_;
54 | 		std::string actual_;
55 | 		std::string expected_;
56 | 	};
57 | 
58 |   private:
59 | 	int currentChar() const;
60 | 	bool eof() const noexcept { return currentChar() == -1; }
61 | 	bool consumeIf(int ch);
62 | 	void consume(int ch);
63 | 	int consume();
64 | 	unsigned parseInt();
65 | 
66 | 	RegExpr parse();                 // expr
67 | 	RegExpr parseExpr();             // lookahead
68 | 	RegExpr parseLookAheadExpr();    // alternation ('/' alternation)?
69 | 	RegExpr parseAlternation();      // concatenation ('|' concatenation)*
70 | 	RegExpr parseConcatenation();    // closure (closure)*
71 | 	RegExpr parseClosure();          // atom ['*' | '?' | '{' NUM [',' NUM] '}']
72 | 	RegExpr parseAtom();             // character | characterClass | '(' expr ')'
73 | 	RegExpr parseCharacterClass();   // '[' characterClassFragment+ ']'
74 | 	void parseCharacterClassFragment(SymbolSet& ss);  // namedClass | character | character '-' character
75 | 	void parseNamedCharacterClass(SymbolSet& ss);     // '[' ':' NAME ':' ']'
76 | 	Symbol parseSingleCharacter();
77 | 
78 |   private:
79 | 	std::string_view input_;
80 | 	std::string_view::iterator currentChar_;
81 | 	unsigned int line_;
82 | 	unsigned int column_;
83 | };
84 | 
85 | }  // namespace klex::regular
86 | 


--------------------------------------------------------------------------------
/src/klex/util/UnboxedRange.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0>
 2 | //   (c) 2009-2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | #pragma once
 8 | 
 9 | #include <iterator>
10 | 
11 | namespace klex::util {
12 | 
13 | template <typename T>
14 | class UnboxedRange {
15 |   public:
16 | 	using BoxedContainer = T;
17 | 	using BoxedIterator = typename BoxedContainer::iterator;
18 | 	using element_type = typename BoxedContainer::value_type::element_type;
19 | 
20 | 	class iterator {  // {{{
21 | 	  public:
22 | 		typedef typename BoxedContainer::iterator::difference_type difference_type;
23 | 		typedef typename BoxedContainer::iterator::value_type::element_type value_type;
24 | 		typedef typename BoxedContainer::iterator::value_type::element_type* pointer;
25 | 		typedef typename BoxedContainer::iterator::value_type::element_type& reference;
26 | 		typedef typename BoxedContainer::iterator::iterator_category iterator_category;
27 | 
28 | 		explicit iterator(BoxedIterator boxed) : it_(boxed) {}
29 | 
30 | 		const element_type& operator->() const { return **it_; }
31 | 		element_type& operator->() { return **it_; }
32 | 
33 | 		const element_type* operator*() const { return (*it_).get(); }
34 | 		element_type* operator*() { return (*it_).get(); }
35 | 
36 | 		iterator& operator++()
37 | 		{
38 | 			++it_;
39 | 			return *this;
40 | 		}
41 | 		iterator& operator++(int)
42 | 		{
43 | 			++it_;
44 | 			return *this;
45 | 		}
46 | 
47 | 		bool operator==(const iterator& other) const { return it_ == other.it_; }
48 | 		bool operator!=(const iterator& other) const { return it_ != other.it_; }
49 | 
50 | 	  private:
51 | 		BoxedIterator it_;
52 | 	};  // }}}
53 | 
54 | 	UnboxedRange(BoxedIterator begin, BoxedIterator end) : begin_(begin), end_(end) {}
55 | 	explicit UnboxedRange(BoxedContainer& c) : begin_(c.begin()), end_(c.end()) {}
56 | 	explicit UnboxedRange(const BoxedContainer& c) : UnboxedRange{const_cast<BoxedContainer&>(c)} {}
57 | 
58 | 	iterator begin() const { return begin_; }
59 | 	iterator end() const { return end_; }
60 | 	iterator cbegin() const { return begin_; }
61 | 	iterator cend() const { return end_; }
62 | 	size_t size() const { return std::distance(begin_, end_); }
63 | 
64 |   private:
65 | 	iterator begin_;
66 | 	iterator end_;
67 | };
68 | 
69 | /**
70 |  * Unboxes boxed element types in containers.
71 |  *
72 |  * Good examples are:
73 |  *
74 |  * \code
75 |  *    std::vector<std::unique_ptr<int>> numbers;
76 |  *    // ...
77 |  *    for (int number: unbox(numbers)) {
78 |  *      // ... juse use number here, instead of number.get() or *number.
79 |  *    };
80 |  * \endcode
81 |  */
82 | template <typename BoxedContainer>
83 | UnboxedRange<BoxedContainer> unbox(BoxedContainer& boxedContainer)
84 | {
85 | 	return UnboxedRange<BoxedContainer>(boxedContainer);
86 | }
87 | 
88 | template <typename BoxedContainer>
89 | UnboxedRange<BoxedContainer> unbox(const BoxedContainer& boxedContainer)
90 | {
91 | 	return UnboxedRange<BoxedContainer>(boxedContainer);
92 | }
93 | 
94 | }  // namespace klex::util
95 | 


--------------------------------------------------------------------------------
/src/klex/Report.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/Report.h>
  9 | 
 10 | #include <iostream>
 11 | #include <sstream>
 12 | 
 13 | using namespace std;
 14 | using namespace klex;
 15 | 
 16 | // {{{ Message
 17 | string Report::Message::to_string() const
 18 | {
 19 |     switch (type)
 20 |     {
 21 |         case Type::Warning: return fmt::format("[{}] {}", sourceLocation, text);
 22 |         case Type::LinkError: return fmt::format("{}: {}", type, text);
 23 |         default: return fmt::format("[{}] {}: {}", sourceLocation, type, text);
 24 |     }
 25 | }
 26 | 
 27 | bool Report::Message::operator==(const Message& other) const noexcept
 28 | {
 29 |     // XXX ignore SourceLocation's filename & end
 30 |     return type == other.type && sourceLocation.offset == other.sourceLocation.offset && text == other.text;
 31 | }
 32 | // }}}
 33 | // {{{ ConsoleReport
 34 | void ConsoleReport::onMessage(Message&& message)
 35 | {
 36 |     switch (message.type)
 37 |     {
 38 |         case Type::Warning: cerr << fmt::format("Warning: {}\n", message); break;
 39 |         default: cerr << fmt::format("Error: {}\n", message); break;
 40 |     }
 41 | }
 42 | // }}}
 43 | // {{{ BufferedReport
 44 | void BufferedReport::onMessage(Message&& msg)
 45 | {
 46 |     messages_.emplace_back(move(msg));
 47 | }
 48 | 
 49 | void BufferedReport::clear()
 50 | {
 51 |     messages_.clear();
 52 | }
 53 | 
 54 | string BufferedReport::to_string() const
 55 | {
 56 |     stringstream sstr;
 57 |     for (const Message& message: messages_)
 58 |     {
 59 |         switch (message.type)
 60 |         {
 61 |             case Type::Warning: sstr << "Warning: " << message.to_string() << "\n"; break;
 62 |             default: sstr << "Error: " << message.to_string() << "\n"; break;
 63 |         }
 64 |     }
 65 |     return sstr.str();
 66 | }
 67 | 
 68 | bool BufferedReport::operator==(const BufferedReport& other) const noexcept
 69 | {
 70 |     if (size() != other.size())
 71 |         return false;
 72 | 
 73 |     for (size_t i = 0, e = size(); i != e; ++i)
 74 |         if (messages_[i] != other.messages_[i])
 75 |             return false;
 76 | 
 77 |     return true;
 78 | }
 79 | 
 80 | bool BufferedReport::contains(const Message& message) const noexcept
 81 | {
 82 |     for (const Message& m: messages_)
 83 |         if (m == message)
 84 |             return true;
 85 | 
 86 |     return false;
 87 | }
 88 | 
 89 | DifferenceReport difference(const BufferedReport& first, const BufferedReport& second)
 90 | {
 91 |     DifferenceReport diff;
 92 | 
 93 |     for (const Report::Message& m: first)
 94 |         if (!second.contains(m))
 95 |             diff.first.push_back(m);
 96 | 
 97 |     for (const Report::Message& m: second)
 98 |         if (!first.contains(m))
 99 |             diff.second.push_back(m);
100 | 
101 |     return diff;
102 | }
103 | 
104 | ostream& operator<<(ostream& os, const BufferedReport& report)
105 | {
106 |     os << report.to_string();
107 |     return os;
108 | }
109 | // }}}
110 | 


--------------------------------------------------------------------------------
/src/klex/util/iterator.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #pragma once
  9 | 
 10 | #include <klex/util/iterator-detail.h>
 11 | #include <algorithm>
 12 | #include <cstdint>
 13 | #include <sstream>
 14 | #include <type_traits>
 15 | #include <utility>
 16 | #include <vector>
 17 | 
 18 | namespace klex::util {
 19 | 
 20 | template <typename Container>
 21 | inline auto reversed(Container&& c)
 22 | {
 23 | 	if constexpr (std::is_reference<Container>::value)
 24 | 		return detail::reversed<Container&>{std::forward<Container>(c)};
 25 | 	else
 26 | 		return detail::reversed<Container>{std::forward<Container>(c)};
 27 | }
 28 | 
 29 | template <typename Container>
 30 | inline auto indexed(const Container& c)
 31 | {
 32 | 	return typename std::add_const<detail::indexed<const Container>>::type{c};
 33 | }
 34 | 
 35 | template <typename Container>
 36 | inline auto indexed(Container& c)
 37 | {
 38 | 	return detail::indexed<Container>{c};
 39 | }
 40 | 
 41 | template <typename Container, typename Lambda>
 42 | inline auto translate(const Container& container, Lambda mapfn) {
 43 | 	using namespace std;
 44 | 	using T = decltype(mapfn(*begin(container)));
 45 | 
 46 | 	vector<T> out;
 47 | 	out.reserve(distance(begin(container), end(container)));
 48 | 	transform(begin(container), end(container), back_inserter(out), move(mapfn));
 49 | 
 50 | 	return out;
 51 | }
 52 | 
 53 | template <typename Container>
 54 | inline std::string join(const Container& container, const std::string& separator = ", ")
 55 | {
 56 | 	std::stringstream out;
 57 | 
 58 | 	for (const auto&& [i, v] : indexed(container))
 59 | 		if (i)
 60 | 			out << separator << v;
 61 | 		else
 62 | 			out << v;
 63 | 
 64 | 	return out.str();
 65 | }
 66 | 
 67 | template <typename T, typename Lambda>
 68 | inline auto filter(std::initializer_list<T>&& c, Lambda proc)
 69 | {
 70 | 	return typename std::add_const<detail::filter<const std::initializer_list<T>, Lambda>>::type{c, proc};
 71 | }
 72 | 
 73 | template <typename Container, typename Lambda>
 74 | inline auto filter(const Container& c, Lambda proc)
 75 | {
 76 | 	return typename std::add_const<detail::filter<const Container, Lambda>>::type{c, proc};
 77 | }
 78 | 
 79 | template <typename Container, typename Lambda>
 80 | inline auto filter(Container& c, Lambda proc)
 81 | {
 82 | 	return detail::filter<Container, Lambda>{c, proc};
 83 | }
 84 | 
 85 | /**
 86 |  * Finds the last occurence of a given element satisfying @p test.
 87 |  *
 88 |  * @returns the iterator representing the last item satisfying @p test or @p end if none found.
 89 |  */
 90 | template<typename Container, typename Test>
 91 | auto find_last(const Container& container, Test test) -> decltype(std::cbegin(container))
 92 | {
 93 | 	auto begin = std::cbegin(container);
 94 | 	auto end = std::cend(container);
 95 | 
 96 | 	for (auto i = std::prev(end); i != begin; --i)
 97 | 		if (test(*i))
 98 | 			return i;
 99 | 
100 | 	if (test(*begin))
101 | 		return begin;
102 | 	else
103 | 		return end;
104 | }
105 | 
106 | }  // namespace klex::util
107 | 


--------------------------------------------------------------------------------
/src/klex/cfg/ll/SyntaxTable.h:
--------------------------------------------------------------------------------
 1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
 2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
 3 | //
 4 | // Licensed under the MIT License (the "License"); you may not use this
 5 | // file except in compliance with the License. You may obtain a copy of
 6 | // the License at: http://opensource.org/licenses/MIT
 7 | 
 8 | #pragma once
 9 | 
10 | #include <klex/regular/LexerDef.h>
11 | 
12 | #include <algorithm>
13 | #include <optional>
14 | #include <stack>
15 | #include <unordered_map>
16 | #include <utility>
17 | #include <vector>
18 | 
19 | namespace klex::cfg {
20 | struct Grammar;
21 | }
22 | 
23 | namespace klex::cfg::ll {
24 | 
25 | // using Symbol = int;
26 | // using Handle = std::vector<Symbol>;
27 | 
28 | /** LL(1)-compatible syntax table.
29 |  */
30 | struct SyntaxTable {
31 | 	using Expression = std::vector<int>;  // non-terminals & terminals
32 | 	using LookAheadMap = std::unordered_map<int /*lookahead*/, int /*production*/>;
33 | 	using NonTerminalMap = std::unordered_map<int /*nonterminals*/, LookAheadMap>;
34 | 	using ProductionVec = std::vector<Expression>;
35 | 
36 | 	std::vector<std::string> names;
37 | 	std::vector<std::string> terminalNames;
38 | 	std::vector<std::string> nonterminalNames;
39 | 	std::vector<std::string> actionNames;
40 | 	std::vector<std::string> productionNames;
41 | 	ProductionVec productions;
42 | 	NonTerminalMap table;
43 | 	int startSymbol;
44 | 	regular::LexerDef lexerDef;
45 | 
46 | 	int actionId(const std::string& name) const
47 | 	{
48 | 		return actionMin() + std::distance(std::begin(actionNames),
49 | 				std::find_if(std::begin(actionNames), std::end(actionNames),
50 | 							 [&](const std::string& n) { return n == name; }));
51 | 	}
52 | 
53 | 	std::optional<int> lookup(int nonterminal, int lookahead) const;
54 | 
55 | 	size_t nonterminalCount() const noexcept { return nonterminalNames.size(); }
56 | 	size_t terminalCount() const noexcept { return terminalNames.size(); }
57 | 
58 | 	int nonterminalMin() const noexcept { return 0; }
59 | 	int nonterminalMax() const noexcept
60 | 	{
61 | 		return nonterminalMin() + static_cast<int>(nonterminalNames.size()) - 1;
62 | 	}
63 | 
64 | 	int terminalMin() const noexcept { return nonterminalMax() + 1; }
65 | 	int terminalMax() const noexcept { return terminalMin() + static_cast<int>(terminalNames.size()) - 1; }
66 | 
67 | 	int actionMin() const noexcept { return terminalMax() + 1; }
68 | 	int actionMax() const noexcept { return actionMin() + static_cast<int>(actionNames.size()) - 1; }
69 | 
70 | 	bool isNonTerminal(int id) const noexcept { return id >= nonterminalMin() && id <= nonterminalMax(); }
71 | 	bool isTerminal(int id) const noexcept { return id >= terminalMin() && id <= terminalMax(); }
72 | 	bool isAction(int id) const noexcept { return id >= actionMin() && id <= actionMax(); }
73 | 
74 | 	const std::string& terminalName(int s) const noexcept { return terminalNames[s - terminalMin()]; }
75 | 
76 | 	const std::string& nonterminalName(int s) const noexcept
77 | 	{
78 | 		return nonterminalNames[s - nonterminalMin()];
79 | 	}
80 | 
81 | 	const std::string& actionName(int s) const noexcept { return actionNames[s - actionMin()]; }
82 | 
83 | 	std::string dump(const Grammar& grammar) const;
84 | 
85 | 	static SyntaxTable construct(const Grammar& grammar);
86 | };
87 | 
88 | }  // namespace klex::cfg::ll
89 | 
90 | // vim:ts=4:sw=4:noet
91 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarLexer.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #pragma once
  9 | 
 10 | #include <klex/cfg/Grammar.h>
 11 | #include <fmt/format.h>
 12 | 
 13 | namespace klex::cfg {
 14 | 
 15 | class GrammarLexer
 16 | {
 17 |   public:
 18 | 	explicit GrammarLexer(std::string content);
 19 | 
 20 | 	enum class Token {
 21 | 		Illegal,
 22 | 		Spacing,     // [\s\t\n]+
 23 | 		Identifier,  // [a-z][a-z0-9]*
 24 | 		Token,       // 'token'
 25 | 		Literal,     // '[^']*'|"[^"]*"
 26 | 		Or,          // '|'
 27 | 		Semicolon,   // ';'
 28 | 		Assoc,       // '::='
 29 | 		SetOpen,     // '{'
 30 | 		SetClose,    // '}'
 31 | 		Eof,         // <<EOF>>
 32 | 	};
 33 | 
 34 | 	[[nodiscard]] bool eof() const noexcept { return offset_ >= content_.size(); }
 35 | 	[[nodiscard]] size_t currentOffset() const { return offset_; }
 36 | 	[[nodiscard]] Token currentToken() const { return currentToken_; }
 37 | 	[[nodiscard]] const std::string& currentLiteral() const noexcept { return currentLiteral_; }
 38 | 
 39 | 	[[nodiscard]] Token recognize();
 40 | 
 41 | 	[[nodiscard]] std::string consumeLiteralUntilLF();  // NB. only used for sub-language (klex)
 42 | 
 43 |   private:
 44 | 	Token recognizeOne();
 45 | 	Token consumeIdentifier();
 46 | 	Token consumeLiteral();
 47 | 	[[nodiscard]] int currentChar() const;
 48 | 	[[nodiscard]] int peekChar(size_t offset) const;
 49 | 	int consumeChar(size_t count = 1);
 50 | 
 51 |   private:
 52 | 	std::string content_;
 53 | 	size_t offset_;
 54 | 	std::string currentLiteral_;
 55 | 	Token currentToken_;
 56 | };
 57 | 
 58 | inline std::string to_string(klex::cfg::GrammarLexer::Token v)
 59 | {
 60 | 	switch (v)
 61 | 	{
 62 | 		case klex::cfg::GrammarLexer::Token::Spacing:
 63 | 			return "Spacing";
 64 | 		case klex::cfg::GrammarLexer::Token::Identifier:
 65 | 			return "Identifier";
 66 | 		case klex::cfg::GrammarLexer::Token::Token:
 67 | 			return "Token";
 68 | 		case klex::cfg::GrammarLexer::Token::Literal:
 69 | 			return "Literal";
 70 | 		case klex::cfg::GrammarLexer::Token::Or:
 71 | 			return "'|'";
 72 | 		case klex::cfg::GrammarLexer::Token::Semicolon:
 73 | 			return "';'";
 74 | 		case klex::cfg::GrammarLexer::Token::Assoc:
 75 | 			return "'::='";
 76 | 		case klex::cfg::GrammarLexer::Token::SetOpen:
 77 | 			return "'{'";
 78 | 		case klex::cfg::GrammarLexer::Token::SetClose:
 79 | 			return "'}'";
 80 | 		case klex::cfg::GrammarLexer::Token::Eof:
 81 | 			return "<<EOF>>";
 82 | 		// case klex::cfg::GrammarLexer::Illegal:
 83 | 		default:
 84 | 			return "Illegal";
 85 | 	}
 86 | }
 87 | 
 88 | }  // namespace klex::cfg
 89 | 
 90 | namespace fmt {
 91 | template <>
 92 | struct formatter<klex::cfg::GrammarLexer::Token> {
 93 | 	template <typename ParseContext>
 94 | 	constexpr auto parse(ParseContext& ctx)
 95 | 	{
 96 | 		return ctx.begin();
 97 | 	}
 98 | 
 99 | 	template <typename FormatContext>
100 | 	constexpr auto format(const klex::cfg::GrammarLexer::Token& v, FormatContext& ctx)
101 | 	{
102 | 		return format_to(ctx.out(), "{}", to_string(v));
103 | 	}
104 | };
105 | }  // namespace fmt
106 | 
107 | // vim:ts=4:sw=4:noet
108 | 


--------------------------------------------------------------------------------
/src/klex/cfg/LeftRecursion_test.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/Report.h>
  9 | #include <klex/cfg/GrammarParser.h>
 10 | #include <klex/cfg/LeftRecursion.h>
 11 | #include <klex/util/literals.h>
 12 | #include <klex/util/testing.h>
 13 | 
 14 | using namespace std;
 15 | using namespace klex;
 16 | using namespace klex::cfg;
 17 | using namespace klex::util::literals;
 18 | 
 19 | Grammar makeGrammar(string G)
 20 | {
 21 |     BufferedReport report;
 22 |     Grammar grammar = GrammarParser(move(G), &report).parse();
 23 |     ASSERT_FALSE(report.containsFailures());
 24 |     return grammar;
 25 | }
 26 | 
 27 | TEST(cfg_LeftRecursion, isLeftRecursive)
 28 | {
 29 |     BufferedReport report;
 30 | 
 31 |     // direct left-recursive
 32 |     const Grammar grammar = GrammarParser("A ::= A 'b' | 'a';", &report).parse();
 33 |     ASSERT_FALSE(report.containsFailures());
 34 |     ASSERT_TRUE(isLeftRecursive(grammar));
 35 | 
 36 |     // direct right recursive
 37 |     const Grammar right = GrammarParser("A ::= 'b' A | 'a';", &report).parse();
 38 |     ASSERT_FALSE(report.containsFailures());
 39 |     ASSERT_FALSE(isLeftRecursive(right));
 40 | 
 41 |     // neither left nor right
 42 |     const Grammar neinor = GrammarParser("A ::= 'b' | 'a';", &report).parse();
 43 |     ASSERT_FALSE(report.containsFailures());
 44 |     ASSERT_FALSE(isLeftRecursive(neinor));
 45 | }
 46 | 
 47 | TEST(cfg_LeftRecursion, simple)
 48 | {
 49 |     ConsoleReport report;
 50 |     Grammar grammar = GrammarParser(R"(`S ::= A;
 51 | 									   `A ::= A 'b'
 52 | 									   `    | 'a';
 53 | 									   `)"_multiline,
 54 |                                     &report)
 55 |                           .parse();
 56 | 
 57 |     ASSERT_FALSE(report.containsFailures());
 58 |     ASSERT_TRUE(isLeftRecursive(grammar));
 59 | 
 60 |     LeftRecursion { grammar }.direct();
 61 | 
 62 |     grammar.finalize();
 63 |     logf("grammar: {}", grammar.dump());
 64 | 
 65 |     ASSERT_FALSE(isLeftRecursive(grammar));
 66 | }
 67 | 
 68 | TEST(cfg_LeftRecursion, ETF)
 69 | {
 70 |     BufferedReport report;
 71 |     Grammar grammar = GrammarParser(R"(`token {
 72 | 									   `  Spacing(ignore) ::= [\s\t]+
 73 | 									   `  Number          ::= [0-9]+
 74 | 									   `}
 75 | 									   `
 76 | 									   `Start  ::= Expr;
 77 | 									   `Expr   ::= Expr '+' Term
 78 | 									   `         | Expr '-' Term
 79 | 									   `         | Term ;
 80 | 									   `Term   ::= Term '*' Factor
 81 | 									   `         | Term '/' Factor
 82 | 									   `         | Factor ;
 83 | 									   `Factor ::= '(' Expr ')'
 84 | 									   `         | Number
 85 | 									   `         ;
 86 | 									   `)"_multiline,
 87 |                                     &report)
 88 |                           .parse();
 89 | 
 90 |     ASSERT_FALSE(report.containsFailures());
 91 |     ASSERT_TRUE(isLeftRecursive(grammar));
 92 | 
 93 |     LeftRecursion { grammar }.direct();
 94 | 
 95 |     grammar.finalize();
 96 |     logf("grammar: {}", grammar.dump());
 97 | 
 98 |     ASSERT_FALSE(isLeftRecursive(grammar));
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/klex/regular/Compiler.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <klex/regular/DFABuilder.h>
 10 | #include <klex/regular/LexerDef.h>
 11 | #include <klex/regular/NFA.h>
 12 | #include <klex/regular/Rule.h>
 13 | #include <klex/regular/State.h>
 14 | 
 15 | #include <istream>
 16 | #include <map>
 17 | #include <memory>
 18 | #include <string>
 19 | #include <string_view>
 20 | 
 21 | namespace klex::regular {
 22 | 
 23 | struct MultiDFA;
 24 | 
 25 | /**
 26 |  * Top-Level API for compiling lexical patterns into table definitions for Lexer.
 27 |  *
 28 |  * @see Lexer
 29 |  */
 30 | class Compiler {
 31 |   public:
 32 | 	using TagNameMap = std::map<Tag, std::string>;
 33 | 	using OvershadowMap = DFABuilder::OvershadowMap;
 34 | 	using AutomataMap = std::map<std::string, NFA>;
 35 | 
 36 | 	Compiler() : rules_{}, containsBeginOfLine_{false}, fa_{}, names_{} {}
 37 | 
 38 | 	/**
 39 | 	 * Parses a @p stream of textual rule definitions to construct their internal data structures.
 40 | 	 */
 41 | 	void parse(std::unique_ptr<std::istream> stream);
 42 | 	void parse(std::string text);
 43 | 
 44 | 	/**
 45 | 	 * Parses a list of @p rules to construct their internal data structures.
 46 | 	 */
 47 | 	void declareAll(RuleList rules);
 48 | 
 49 | 	const RuleList& rules() const noexcept { return rules_; }
 50 | 	const TagNameMap& names() const noexcept { return names_; }
 51 | 	size_t size() const;
 52 | 
 53 | 	/**
 54 | 	 * Compiles all previousely parsed rules into a DFA.
 55 | 	 */
 56 | 	DFA compileDFA(OvershadowMap* overshadows = nullptr);
 57 | 	MultiDFA compileMultiDFA(OvershadowMap* overshadows = nullptr);
 58 | 
 59 | 	/**
 60 | 	 * Compiles all previousely parsed rules into a minimal DFA.
 61 | 	 */
 62 | 	DFA compileMinimalDFA();
 63 | 
 64 | 	/**
 65 | 	 * Compiles all previousely parsed rules into a suitable data structure for Lexer.
 66 | 	 *
 67 | 	 * @see Lexer
 68 | 	 */
 69 | 	LexerDef compile();
 70 | 
 71 | 	/**
 72 | 	 * Compiles all previousely parsed rules into a suitable data structure for Lexer, taking care of
 73 | 	 * multiple conditions as well as begin-of-line.
 74 | 	 */
 75 | 	LexerDef compileMulti(OvershadowMap* overshadows = nullptr);
 76 | 
 77 | 	/**
 78 | 	 * Translates the given DFA @p dfa with a given TagNameMap @p names into trivial table mappings.
 79 | 	 *
 80 | 	 * @see Lexer
 81 | 	 */
 82 | 	static LexerDef generateTables(const DFA& dfa, bool requiresBeginOfLine, const TagNameMap& names);
 83 | 	static LexerDef generateTables(const MultiDFA& dfa, bool requiresBeginOfLine, const TagNameMap& names);
 84 | 
 85 | 	const std::map<std::string, NFA>& automata() const { return fa_; }
 86 | 
 87 | 	bool containsBeginOfLine() const noexcept { return containsBeginOfLine_; }
 88 | 
 89 |   private:
 90 | 	/**
 91 | 	 * Parses a single @p rule to construct their internal data structures.
 92 | 	 */
 93 | 	void declare(const Rule& rule, const std::string& conditionSuffix = "");
 94 | 
 95 |   private:
 96 | 	RuleList rules_;
 97 | 	bool containsBeginOfLine_;
 98 | 	AutomataMap fa_;
 99 | 	TagNameMap names_;
100 | };
101 | 
102 | }  // namespace klex::regular
103 | 


--------------------------------------------------------------------------------
/src/klex/cfg/ll/Analyzer.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #pragma once
  9 | 
 10 | #include <klex/Report.h>
 11 | #include <klex/cfg/ll/SyntaxTable.h>
 12 | #include <klex/regular/Lexer.h>
 13 | 
 14 | #include <deque>
 15 | #include <functional>
 16 | #include <optional>
 17 | #include <utility>
 18 | #include <vector>
 19 | 
 20 | namespace klex::cfg::ll {
 21 | 
 22 | template <typename SemanticValue>
 23 | class Analyzer {
 24 |   public:
 25 | 	using Terminal = regular::Tag;  // typename regular::Lexer<regular::Tag>::value_type;
 26 | 	using NonTerminal = int;
 27 | 	using Action = int;
 28 | 	using Lexer = regular::Lexer<regular::Tag, regular::StateId, true, false>;
 29 | 	using ActionHandler = std::function<SemanticValue(int, const Analyzer<SemanticValue>&)>;
 30 | 
 31 | 	struct StateValue {
 32 | 		int value;
 33 | 		operator int() const noexcept { return value; }
 34 | 		StateValue(int _value) : value{_value} {}
 35 | 	};
 36 | 
 37 | 	Analyzer(const SyntaxTable& table, Report* report, std::string input,
 38 | 			 ActionHandler actionHandler = ActionHandler());
 39 | 
 40 | 	[[nodiscard]] const Lexer& lexer() const noexcept { return lexer_; }
 41 | 	[[nodiscard]] const std::string& lastLiteral() const noexcept { return lastLiteral_; }
 42 | 
 43 | 	[[nodiscard]] const std::string& actionName(int id) const noexcept { return def_.actionNames[id - def_.actionMin()]; }
 44 | 
 45 | 	[[nodiscard]] const SemanticValue& semanticValue(int offset) const {
 46 | 		if (offset < 0)
 47 | 			return valueStack_[valueStack_.size() + offset];
 48 | 		else
 49 | 			return valueStack_[valueStackBase_ + offset];
 50 | 	}
 51 | 
 52 | 	[[nodiscard]] std::optional<SemanticValue> analyze();
 53 | 
 54 |   private:
 55 | 	[[nodiscard]] std::optional<SyntaxTable::Expression> getHandleFor(StateValue nonterminal,
 56 | 																	Terminal currentTerminal) const;
 57 | 
 58 | 	[[nodiscard]] bool isAction(StateValue v) const noexcept;
 59 | 	[[nodiscard]] bool isTerminal(StateValue v) const noexcept;
 60 | 	[[nodiscard]] bool isNonTerminal(StateValue v) const noexcept;
 61 | 
 62 | 	void log(const std::string& msg);
 63 | 
 64 | 	[[nodiscard]] std::string dumpStateStack() const;
 65 | 	[[nodiscard]] std::string dumpSemanticStack() const;
 66 | 	[[nodiscard]] std::string stateValue(StateValue sv) const;
 67 | 	[[nodiscard]] std::string handleString(const SyntaxTable::Expression& handle) const;
 68 | 
 69 |   private:
 70 | 	const SyntaxTable& def_;
 71 | 	Lexer lexer_;
 72 | 	std::string lastLiteral_;
 73 | 	Report* report_;
 74 | 	std::deque<StateValue> stack_;
 75 | 	std::deque<SemanticValue> valueStack_;
 76 | 	size_t valueStackBase_;
 77 | 	ActionHandler actionHandler_;
 78 | };
 79 | 
 80 | }  // namespace klex::cfg::ll
 81 | 
 82 | namespace fmt {
 83 | template <>
 84 | struct formatter<typename klex::cfg::ll::Analyzer<int>::StateValue> {
 85 | 	template <typename ParseContext>
 86 | 	constexpr auto parse(ParseContext& ctx)
 87 | 	{
 88 | 		return ctx.begin();
 89 | 	}
 90 | 
 91 | 	template <typename FormatContext>
 92 | 	constexpr auto format(const klex::cfg::ll::Analyzer<int>::StateValue& v, FormatContext& ctx)
 93 | 	{
 94 | 		return format_to(ctx.out(), "{}", "hello");
 95 | 	}
 96 | };
 97 | }  // namespace fmt
 98 | 
 99 | #include <klex/cfg/ll/Analyzer-inl.h>
100 | 
101 | // vim:ts=4:sw=4:noet
102 | 


--------------------------------------------------------------------------------
/examples/flow.klex:
--------------------------------------------------------------------------------
  1 | # vim:syntax=klex
  2 | # Lexical Grammar for the Flow Language
  3 | 
  4 | # be case insensitive in pattern matching?
  5 | # %pragma ignorecase
  6 | 
  7 | # NUMBER            ::= 0|[1-9][0-9]*
  8 | # IDENT             ::= [a-zA-Z_][a-zA-Z_0-9]*
  9 | # IP4               ::= {IP4Oct}(\.{IP4Oct}){3}
 10 | # IP4Cidr           ::= {IP4}/{CidrMask}
 11 | # 
 12 | # %%
 13 | 
 14 | # symbols
 15 | Assign            ::= "="
 16 | OrAssign          ::= "|="
 17 | AndAssign         ::= "&="
 18 | PlusAssign        ::= "+="
 19 | MinusAssign       ::= "-="
 20 | MulAssign         ::= "*="
 21 | DivAssign         ::= "/="
 22 | Semicolon         ::= ";"
 23 | Question          ::= "?"
 24 | Colon             ::= ":"
 25 | And               ::= "and"
 26 | Or                ::= "or"
 27 | Xor               ::= "xor"
 28 | Equal             ::= "=="
 29 | UnEqual           ::= "!="
 30 | 
 31 | Less              ::= "<"
 32 | Greater           ::= ">"
 33 | LessOrEqual       ::= "<="
 34 | GreaterOrEqual    ::= ">="
 35 | PrefixMatch       ::= "=^"
 36 | SuffixMatch       ::= "=$"
 37 | RegexMatch        ::= "=~"
 38 | In                ::= "in"
 39 | HashRocket        ::= "=>"
 40 | Plus              ::= "+"
 41 | Minus             ::= "-"
 42 | Mul               ::= "/*"
 43 | Div               ::= "/"
 44 | Mod               ::= "%"
 45 | Shl               ::= "shl"
 46 | Shr               ::= "shr"
 47 | Comma             ::= ","
 48 | Pow               ::= "**"
 49 | Not               ::= "not"
 50 | BitNot            ::= "~"
 51 | BitOr             ::= "|"
 52 | BitAnd            ::= "&"
 53 | BitXor            ::= "^"
 54 | BrOpen            ::= "["
 55 | BrClose           ::= "]"
 56 | RndOpen           ::= "("
 57 | RndClose          ::= ")"
 58 | Begin             ::= "{"
 59 | End               ::= "}"
 60 | 
 61 | # keywords
 62 | Handler           ::= handler
 63 | If                ::= if
 64 | Then              ::= then
 65 | Else              ::= else
 66 | Unless            ::= unless
 67 | Match             ::= match
 68 | On                ::= on
 69 | While             ::= while
 70 | For               ::= for
 71 | Import            ::= import
 72 | From              ::= from
 73 | Var               ::= var
 74 | 
 75 | # data types
 76 | VoidType          ::= void
 77 | BoolType          ::= bool
 78 | NumberType        ::= int
 79 | StringType        ::= string
 80 | 
 81 | # literals
 82 | TrueLiteral       ::= true
 83 | FalseLiteral      ::= false
 84 | StringLiteral     ::= '([^'\n]|\\\\')*'|\"([^\"\n]|\\\")*\"
 85 | NumberLiteral     ::= [0-9]+|[0-9]{1,3}(_[0-9]{3})*
 86 | 
 87 | IPv4Octet(ref)    ::= [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
 88 | IPv4(ref)         ::= {IPv4Octet}(\.{IPv4Octet}){3}
 89 | IPv4Literal       ::= {IPv4}
 90 | 
 91 | CidrPart(ref)     ::= [0-9]|[1-2][0-9]|3[012]
 92 | Cidr              ::= {IPv4}\/{CidrPart}
 93 | 
 94 | ipv6Part(ref)     ::= [[:xdigit:]]{1,4}
 95 | IPv6              ::= {ipv6Part}(:{ipv6Part}){7,7}
 96 |                     | ({ipv6Part}:){1,7}:
 97 |                     | :(:{ipv6Part}){1,7}
 98 |                     | ::
 99 |                     | ({ipv6Part}:){1}(:{ipv6Part}){0,6}
100 |                     | ({ipv6Part}:){2}(:{ipv6Part}){0,5}
101 |                     | ({ipv6Part}:){3}(:{ipv6Part}){0,4}
102 |                     | ({ipv6Part}:){4}(:{ipv6Part}){0,3}
103 |                     | ({ipv6Part}:){5}(:{ipv6Part}){0,2}
104 |                     | ({ipv6Part}:){6}(:{ipv6Part}){0,1}
105 |                     | ::[fF]{4}:{IPv4}
106 | 
107 | # misc
108 | Ident             ::= [a-zA-Z_][a-zA-Z_0-9]*
109 | RegExpGroup       ::= \$[0-9]+
110 | 
111 | <RE>RegExp        ::= [^/]*/
112 | 
113 | # specials
114 | Comment(ignore)   ::= "#.*"
115 | Spacing(ignore)   ::= [\s\t\n]+
116 | <*>Eof            ::= <<EOF>>
117 | 


--------------------------------------------------------------------------------
/.clang-tidy:
--------------------------------------------------------------------------------
 1 | ---
 2 | Checks: >-
 3 |   -*,
 4 |   bugprone-*,
 5 |   -bugprone-easily-swappable-parameters,
 6 |   -bugprone-suspicious-include,
 7 |   -bugprone-unchecked-optional-access,
 8 |   cppcoreguidelines-*,
 9 |   -cppcoreguidelines-avoid-c-arrays,
10 |   -cppcoreguidelines-avoid-magic-numbers,
11 |   -cppcoreguidelines-macro-usage,
12 |   -cppcoreguidelines-no-malloc,
13 |   -cppcoreguidelines-non-private-member-variables-in-classes,
14 |   -cppcoreguidelines-pro-bounds-constant-array-index,
15 |   -cppcoreguidelines-owning-memory,
16 |   -cppcoreguidelines-pro-bounds-array-to-pointer-decay,
17 |   -cppcoreguidelines-pro-bounds-pointer-arithmetic,
18 |   -cppcoreguidelines-pro-type-const-cast,
19 |   -cppcoreguidelines-pro-type-cstyle-cast,
20 |   -cppcoreguidelines-pro-type-static-cast-downcast,
21 |   -cppcoreguidelines-pro-type-vararg,
22 |   -cppcoreguidelines-special-member-functions,
23 |   modernize-*,
24 |   -modernize-avoid-bind,
25 |   -modernize-avoid-c-arrays,
26 |   -modernize-return-braced-init-list,
27 |   -modernize-use-bool-literals,
28 |   -modernize-use-nullptr,
29 |   -modernize-use-trailing-return-type,
30 |   readability-non-const-parameter,
31 |   readability-redundant-*
32 |   -readability-redundant-access-specifiers,
33 | WarningsAsErrors: >-
34 |   clang-analyzer-*,
35 |   clang-diagnostic-*,
36 |   performance-*,
37 |   -performance-no-int-to-ptr,
38 |   readability-identifier-naming
39 | UseColor: true
40 | HeaderFilterRegex: '^src/(terminal.*)/.*\.(h|cpp)$'
41 | AnalyzeTemporaryDtors: false
42 | FormatStyle:     none
43 | CheckOptions:
44 |   - key:             bugprone-easily-swappable-parameters.MinimumLength
45 |     value:           '3'
46 |   - key:             cert-dcl16-c.NewSuffixes
47 |     value:           'L;LL;LU;LLU'
48 |   - key:             cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField
49 |     value:           '0'
50 |   - key:             cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors
51 |     value:           '1'
52 |   - key:             cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic
53 |     value:           '1'
54 |   - key:             google-readability-braces-around-statements.ShortStatementLines
55 |     value:           '1'
56 |   - key:             google-readability-function-size.StatementThreshold
57 |     value:           '800'
58 |   - key:             google-readability-namespace-comments.ShortNamespaceLines
59 |     value:           '10'
60 |   - key:             google-readability-namespace-comments.SpacesBeforeComments
61 |     value:           '2'
62 |   - key:             modernize-loop-convert.MaxCopySize
63 |     value:           '16'
64 |   - key:             modernize-loop-convert.MinConfidence
65 |     value:           reasonable
66 |   - key:             modernize-loop-convert.NamingStyle
67 |     value:           CamelCase
68 |   - key:             modernize-pass-by-value.IncludeStyle
69 |     value:           llvm
70 |   - key:             modernize-replace-auto-ptr.IncludeStyle
71 |     value:           llvm
72 |   - key:             modernize-use-nullptr.NullMacros
73 |     value:           'NULL'
74 |   - key:             modernize-use-default-member-init.UseAssignment
75 |     value:           '1'
76 |   # - key:   readability-identifier-naming.EnumCase
77 |   #   value: CamelCase
78 |   # - key:   readability-identifier-naming.ClassCase
79 |   #   value: CamelCase
80 |   # - key:   readability-identifier-naming.ClassMemberCase
81 |   #   value: camelBack
82 |   # - key:   readability-identifier-naming.ClassMethodCase
83 |   #   value: camelBack
84 |   # - key:   readability-identifier-naming.ParameterCase
85 |   #   value: camelBack
86 |   # - key:   readability-identifier-naming.ParameterPrefix
87 |   #   value: ''
88 |   # - key:   readability-identifier-naming.ScopedEnumConstantCase
89 |   #   value: CamelCase
90 | 


--------------------------------------------------------------------------------
/src/klex/regular/DotWriter.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/DotWriter.h>
  9 | #include <klex/regular/Symbols.h>
 10 | 
 11 | #include <fmt/format.h>
 12 | 
 13 | #include <algorithm>
 14 | #include <cassert>
 15 | #include <sstream>
 16 | 
 17 | using namespace std;
 18 | 
 19 | namespace klex::regular
 20 | {
 21 | 
 22 | template <typename StringType>
 23 | static string escapeString(const StringType& str)
 24 | {
 25 |     stringstream stream_;
 26 |     for (char ch: str)
 27 |     {
 28 |         // \t\n\r is already converted to escape sequence
 29 |         switch (ch)
 30 |         {
 31 |             case '\\': stream_ << "\\\\"; break;
 32 |             case '"': stream_ << "\\\""; break;
 33 |             default: stream_ << ch; break;
 34 |         }
 35 |     }
 36 |     return stream_.str();
 37 | }
 38 | 
 39 | void DotWriter::start(StateId initialState)
 40 | {
 41 |     initialState_ = initialState;
 42 |     stream_ << "digraph {\n";
 43 |     stream_ << "  rankdir=LR;\n";
 44 |     // stream_ << "  label=\"" << escapeString("FA" /*TODO*/) << "\";\n";
 45 | }
 46 | 
 47 | void DotWriter::visitNode(StateId number, bool start, bool accept)
 48 | {
 49 |     if (start)
 50 |     {
 51 |         const string_view shape = accept ? "doublecircle" : "circle";
 52 |         stream_ << "  \"\" [shape=plaintext];\n";
 53 |         stream_ << "  node [shape=" << shape << ",color=red];\n";
 54 |         stream_ << "  \"\" -> " << stateLabelPrefix_ << number << ";\n";
 55 |         stream_ << "  node [color=black];\n";
 56 |     }
 57 |     else if (accept)
 58 |     {
 59 |         stream_ << "  node [shape=doublecircle]; " << stateLabelPrefix_ << number << ";\n";
 60 |         stream_ << "  node [shape=circle,color=black];\n";
 61 |     }
 62 |     else
 63 |     {
 64 |         // stream_ << stateLabelPrefix_ << number << ";\n";
 65 |     }
 66 | }
 67 | 
 68 | void DotWriter::visitEdge(StateId from, StateId to, Symbol s)
 69 | {
 70 |     transitionGroups_[to].push_back(s);
 71 | }
 72 | 
 73 | void DotWriter::endVisitEdge(StateId from, StateId to)
 74 | {
 75 |     auto& tgroup = transitionGroups_[to];
 76 |     if (!tgroup.empty())
 77 |     {
 78 |         if (from == initialState_ && initialStates_ != nullptr)
 79 |         {
 80 |             for (Symbol s: tgroup)
 81 |             {
 82 |                 const string label = [this, s]() {
 83 |                     for (const auto& p: *initialStates_)
 84 |                         if (p.second == static_cast<StateId>(s))
 85 |                             return fmt::format("<{}>", p.first);
 86 |                     return prettySymbol(s);
 87 |                 }();
 88 |                 stream_ << fmt::format("  {}{} -> {}{} [label=\"{}\"];\n",
 89 |                                        stateLabelPrefix_,
 90 |                                        from,
 91 |                                        stateLabelPrefix_,
 92 |                                        to,
 93 |                                        escapeString(label));
 94 |             }
 95 |         }
 96 |         else
 97 |         {
 98 |             string label = groupCharacterClassRanges(move(tgroup));
 99 |             stream_ << fmt::format("  {}{} -> {}{} [label=\"{}\"];\n",
100 |                                    stateLabelPrefix_,
101 |                                    from,
102 |                                    stateLabelPrefix_,
103 |                                    to,
104 |                                    escapeString(label));
105 |         }
106 |         tgroup.clear();
107 |     }
108 | }
109 | 
110 | void DotWriter::end()
111 | {
112 |     stream_ << "}\n";
113 | }
114 | 
115 | } // namespace klex::regular
116 | 


--------------------------------------------------------------------------------
/src/klex/regular/NFABuilder.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/DFA.h>
  9 | #include <klex/regular/NFABuilder.h>
 10 | 
 11 | using namespace std;
 12 | 
 13 | namespace klex::regular
 14 | {
 15 | 
 16 | NFA NFABuilder::construct(const RegExpr& re, Tag tag)
 17 | {
 18 |     visit(*this, re);
 19 | 
 20 |     // fa_.setAccept(acceptState_.value_or(fa_.acceptStateId()), tag);
 21 |     if (acceptState_)
 22 |         fa_.setAccept(acceptState_.value(), tag);
 23 |     else
 24 |         fa_.setAccept(tag);
 25 | 
 26 |     return move(fa_);
 27 | }
 28 | 
 29 | NFA NFABuilder::construct(const RegExpr& re)
 30 | {
 31 |     visit(*this, re);
 32 |     return move(fa_);
 33 | }
 34 | 
 35 | void NFABuilder::operator()(const LookAheadExpr& lookaheadExpr)
 36 | {
 37 |     // fa_ = move(construct(lookaheadExpr.leftExpr()).lookahead(construct(lookaheadExpr.rightExpr())));
 38 |     NFA lhs = construct(*lookaheadExpr.left);
 39 |     NFA rhs = construct(*lookaheadExpr.right);
 40 |     lhs.lookahead(move(rhs));
 41 |     fa_ = move(lhs);
 42 | }
 43 | 
 44 | void NFABuilder::operator()(const AlternationExpr& alternationExpr)
 45 | {
 46 |     NFA lhs = construct(*alternationExpr.left);
 47 |     NFA rhs = construct(*alternationExpr.right);
 48 |     lhs.alternate(move(rhs));
 49 |     fa_ = move(lhs);
 50 | }
 51 | 
 52 | void NFABuilder::operator()(const ConcatenationExpr& concatenationExpr)
 53 | {
 54 |     NFA lhs = construct(*concatenationExpr.left);
 55 |     NFA rhs = construct(*concatenationExpr.right);
 56 |     lhs.concatenate(move(rhs));
 57 |     fa_ = move(lhs);
 58 | }
 59 | 
 60 | void NFABuilder::operator()(const CharacterExpr& characterExpr)
 61 | {
 62 |     fa_ = NFA { characterExpr.value };
 63 | }
 64 | 
 65 | void NFABuilder::operator()(const CharacterClassExpr& characterClassExpr)
 66 | {
 67 |     fa_ = NFA { characterClassExpr.symbols };
 68 | }
 69 | 
 70 | void NFABuilder::operator()(const ClosureExpr& closureExpr)
 71 | {
 72 |     const unsigned xmin = closureExpr.minimumOccurrences;
 73 |     const unsigned xmax = closureExpr.maximumOccurrences;
 74 |     constexpr unsigned Infinity = numeric_limits<unsigned>::max();
 75 | 
 76 |     if (xmin == 0 && xmax == 1)
 77 |         fa_ = move(construct(*closureExpr.subExpr).optional());
 78 |     else if (xmin == 0 && xmax == Infinity)
 79 |         fa_ = move(construct(*closureExpr.subExpr).recurring());
 80 |     else if (xmin == 1 && xmax == Infinity)
 81 |         fa_ = move(construct(*closureExpr.subExpr).positive());
 82 |     else if (xmin < xmax)
 83 |         fa_ = move(construct(*closureExpr.subExpr).repeat(xmin, xmax));
 84 |     else if (xmin == xmax)
 85 |         fa_ = move(construct(*closureExpr.subExpr).times(xmin));
 86 |     else
 87 |         throw invalid_argument { "closureExpr" };
 88 | }
 89 | 
 90 | void NFABuilder::operator()(const BeginOfLineExpr&)
 91 | {
 92 |     fa_ = NFA { Symbols::Epsilon };
 93 | }
 94 | 
 95 | void NFABuilder::operator()(const EndOfLineExpr& eolExpr)
 96 | {
 97 |     // NFA lhs;
 98 |     // NFA rhs{'\n'};
 99 |     // lhs.lookahead(move(rhs));
100 |     // fa_ = move(lhs);
101 |     fa_ = move(NFA {}.lookahead(NFA { '\n' }));
102 | }
103 | 
104 | void NFABuilder::operator()(const EndOfFileExpr& eofExpr)
105 | {
106 |     fa_ = NFA { Symbols::EndOfFile };
107 | }
108 | 
109 | void NFABuilder::operator()(const DotExpr& dotExpr)
110 | {
111 |     // any character except LF
112 |     fa_ = NFA { '\t' };
113 |     for (int ch = 32; ch < 127; ++ch)
114 |     {
115 |         fa_.addTransition(fa_.initialStateId(), ch, fa_.acceptStateId());
116 |     }
117 | }
118 | 
119 | void NFABuilder::operator()(const EmptyExpr& emptyExpr)
120 | {
121 |     fa_ = NFA { Symbols::Epsilon };
122 | }
123 | 
124 | } // namespace klex::regular
125 | 


--------------------------------------------------------------------------------
/src/klex/regular/Rule.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <klex/regular/LexerDef.h>  // IgnoreTag
 10 | #include <klex/regular/RegExpr.h>
 11 | #include <klex/regular/RegExprParser.h>
 12 | #include <klex/regular/State.h>  // Tag
 13 | #include <memory>
 14 | #include <optional>
 15 | #include <string>
 16 | #include <vector>
 17 | 
 18 | namespace klex::regular {
 19 | 
 20 | struct Rule {
 21 | 	unsigned int line;
 22 | 	unsigned int column;
 23 | 	Tag tag;
 24 | 	std::vector<std::string> conditions;
 25 | 	std::string name;
 26 | 	std::string pattern;
 27 | 	std::unique_ptr<RegExpr> regexpr = nullptr;
 28 | 
 29 | 	bool isIgnored() const noexcept { return tag == IgnoreTag; }
 30 | 
 31 | 	Rule clone() const
 32 | 	{
 33 | 		return regexpr ? Rule{line,
 34 | 							  column,
 35 | 							  tag,
 36 | 							  conditions,
 37 | 							  name,
 38 | 							  pattern,
 39 | 							  std::make_unique<RegExpr>(RegExprParser{}.parse(pattern, line, column))}
 40 | 					   : Rule{line, column, tag, conditions, name, pattern, nullptr};
 41 | 	}
 42 | 
 43 | 	Rule() = default;
 44 | 
 45 | 	Rule(unsigned _line, unsigned _column, Tag _tag, std::vector<std::string> _conditions, std::string _name,
 46 | 		 std::string _pattern, std::unique_ptr<RegExpr> _regexpr = nullptr)
 47 | 		: line{_line},
 48 | 		  column{_column},
 49 | 		  tag{_tag},
 50 | 		  conditions{_conditions},
 51 | 		  name{_name},
 52 | 		  pattern{_pattern},
 53 | 		  regexpr{std::move(_regexpr)}
 54 | 	{
 55 | 	}
 56 | 
 57 | 	Rule(const Rule& v)
 58 | 		: line{v.line},
 59 | 		  column{v.column},
 60 | 		  tag{v.tag},
 61 | 		  conditions{v.conditions},
 62 | 		  name{v.name},
 63 | 		  pattern{v.pattern},
 64 | 		  regexpr{v.regexpr ? std::make_unique<RegExpr>(RegExprParser{}.parse(pattern, line, column)) : nullptr}
 65 | 	{
 66 | 	}
 67 | 
 68 | 	Rule& operator=(const Rule& v)
 69 | 	{
 70 | 		line = v.line;
 71 | 		column = v.column;
 72 | 		tag = v.tag;
 73 | 		conditions = v.conditions;
 74 | 		name = v.name;
 75 | 		pattern = v.pattern;
 76 | 		regexpr = v.regexpr ? std::make_unique<RegExpr>(RegExprParser{}.parse(pattern, line, column)) : nullptr;
 77 | 		return *this;
 78 | 	}
 79 | 
 80 | 	bool operator<(const Rule& rhs) const noexcept { return tag < rhs.tag; }
 81 | 	bool operator<=(const Rule& rhs) const noexcept { return tag <= rhs.tag; }
 82 | 	bool operator==(const Rule& rhs) const noexcept { return tag == rhs.tag; }
 83 | 	bool operator!=(const Rule& rhs) const noexcept { return tag != rhs.tag; }
 84 | 	bool operator>=(const Rule& rhs) const noexcept { return tag >= rhs.tag; }
 85 | 	bool operator>(const Rule& rhs) const noexcept { return tag > rhs.tag; }
 86 | };
 87 | 
 88 | using RuleList = std::vector<Rule>;
 89 | 
 90 | inline bool ruleContainsBeginOfLine(const Rule& r)
 91 | {
 92 | 	return containsBeginOfLine(*r.regexpr);
 93 | }
 94 | 
 95 | }  // namespace klex::regular
 96 | 
 97 | namespace fmt {
 98 | template <>
 99 | struct formatter<klex::regular::Rule> {
100 | 	template <typename ParseContext>
101 | 	constexpr auto parse(ParseContext& ctx)
102 | 	{
103 | 		return ctx.begin();
104 | 	}
105 | 
106 | 	template <typename FormatContext>
107 | 	constexpr auto format(const klex::regular::Rule& v, FormatContext& ctx)
108 | 	{
109 | 		if (!v.conditions.empty())
110 | 		{
111 | 			format_to(ctx.out(), "<");
112 | 			for (size_t i = 0; i < v.conditions.size(); ++i)
113 | 				if (i != 0)
114 | 					format_to(ctx.out(), ", {}", v.conditions[i]);
115 | 				else
116 | 					format_to(ctx.out(), "{}", v.conditions[i]);
117 | 			format_to(ctx.out(), ">");
118 | 		}
119 | 		if (v.tag == klex::regular::IgnoreTag)
120 | 			return format_to(ctx.out(), "{}({}) ::= {}", v.name, "ignore", v.pattern);
121 | 		else
122 | 			return format_to(ctx.out(), "{}({}) ::= {}", v.name, v.tag, v.pattern);
123 | 	}
124 | };
125 | }  // namespace fmt
126 | 


--------------------------------------------------------------------------------
/src/klex/util/AnsiColor.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "x0" project, http://github.com/christianparpart/x0>
  2 | //   (c) 2009-2019 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <array>
 10 | 
 11 | namespace AnsiColor {
 12 | 
 13 | enum Code : unsigned {
 14 |     Clear = 0,
 15 |     Reset = Clear,
 16 |     Bold = 0x0001,  // 1
 17 |     Dark = 0x0002,  // 2
 18 |     Undef1 = 0x0004,
 19 |     Underline = 0x0008,  // 4
 20 |     Blink = 0x0010,      // 5
 21 |     Undef2 = 0x0020,
 22 |     Reverse = 0x0040,    // 7
 23 |     Concealed = 0x0080,  // 8
 24 |     AllFlags = 0x00FF,
 25 |     Black = 0x0100,
 26 |     Red = 0x0200,
 27 |     Green = 0x0300,
 28 |     Yellow = 0x0400,
 29 |     Blue = 0x0500,
 30 |     Magenta = 0x0600,
 31 |     Cyan = 0x0700,
 32 |     White = 0x0800,
 33 |     AnyFg = 0x0F00,
 34 |     OnBlack = 0x1000,
 35 |     OnRed = 0x2000,
 36 |     OnGreen = 0x3000,
 37 |     OnYellow = 0x4000,
 38 |     OnBlue = 0x5000,
 39 |     OnMagenta = 0x6000,
 40 |     OnCyan = 0x7000,
 41 |     OnWhite = 0x8000,
 42 |     AnyBg = 0xF000
 43 | };
 44 | 
 45 | /// Combines two ANSI escape sequences into one Code.
 46 | constexpr inline Code operator|(Code a, Code b)
 47 | {
 48 |     return Code{unsigned(a) | unsigned(b)};
 49 | }
 50 | 
 51 | /**
 52 |  * Counts the number of ANSI escape sequences in @p codes.
 53 |  */
 54 | constexpr unsigned count(Code codes)
 55 | {
 56 |     if (codes == Clear)
 57 |         return 1;
 58 | 
 59 |     unsigned i = 0;
 60 | 
 61 |     if (codes & AllFlags)
 62 |         for (int k = 0; k < 8; ++k)
 63 |             if (codes & (1 << k))
 64 |                 ++i;
 65 | 
 66 |     if (codes & AnyFg)
 67 |         ++i;
 68 | 
 69 |     if (codes & AnyBg)
 70 |         ++i;
 71 | 
 72 |     return i;
 73 | }
 74 | 
 75 | /**
 76 |  * Retrieves the number of bytes required to store the ANSI escape sequences of @p codes
 77 |  * without prefix/suffix notation.
 78 |  */
 79 | constexpr unsigned capacity(Code codes)
 80 | {
 81 |     if (codes == Clear)
 82 |         return 1;
 83 | 
 84 |     unsigned i = 0;
 85 | 
 86 |     if (codes & AllFlags)
 87 |         for (int k = 0; k < 8; ++k)
 88 |             if (codes & (1 << k))
 89 |                 ++i;
 90 | 
 91 |     if (codes & AnyFg)
 92 |         i += 2;
 93 | 
 94 |     if (codes & AnyBg)
 95 |         i += 2;
 96 | 
 97 |     return i + (count(codes) - 1);
 98 | }
 99 | 
100 | /// Constructs a sequence of ANSI codes for the colors in this @p codes.
101 | template <const Code value, const bool EOS = true>
102 | constexpr auto codes()
103 | {
104 |     std::array<char, capacity(value) + 3 + (EOS ? 1 : 0)> result{};
105 | 
106 |     size_t n = 0;  // n'th escape sequence being iterate through
107 |     size_t i = 0;  // i'th byte in output array
108 | 
109 |     result[i++] = '\x1B';
110 |     result[i++] = '[';
111 | 
112 |     if constexpr (value != 0)
113 |     {
114 |         if (value & AllFlags)
115 |         {
116 |             for (int k = 0; k < 8; ++k)
117 |             {
118 |                 if (value & (1 << k))
119 |                 {
120 |                     if (n++)
121 |                         result[i++] = ';';
122 |                     result[i++] = k + '1';
123 |                 }
124 |             }
125 |         }
126 | 
127 |         if (value & AnyFg)
128 |         {
129 |             if (n++)
130 |                 result[i++] = ';';
131 |             unsigned const val = ((value >> 8) & 0x0F) + 29;  // 36 -> {'3', '6'}
132 |             result[i++] = (val / 10) + '0';
133 |             result[i++] = (val % 10) + '0';
134 |         }
135 | 
136 |         if (value & AnyBg)
137 |         {
138 |             if (n++)
139 |                 result[i++] = ';';
140 |             unsigned const val = ((value >> 12) & 0x0F) + 39;
141 |             result[i++] = (val / 10) + '0';
142 |             result[i++] = (val % 10) + '0';
143 |         }
144 |     }
145 |     else
146 |         result[i++] = '0';  // reset/clear
147 | 
148 |     result[i++] = 'm';
149 | 
150 |     return result;
151 | }
152 | 
153 | }  // namespace AnsiColor
154 | 


--------------------------------------------------------------------------------
/cmdlineTests.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | TMP=${TMP:-/tmp}
  6 | WORKDIR="$(mktemp -d ${TMP}/cmdlineTests.XXXXXXXX)"
  7 | OUTFILE="${WORKDIR}/stdout.txt"
  8 | TESTDIR="../test"
  9 | MKLEX="./mklex"
 10 | # TESTDIR="$(realpath "$(dirname $0)/test")"
 11 | # MKLEX="$(realpath "${MKLEX:-./mklex}")"
 12 | 
 13 | cleanup() {
 14 |   rm -rf ${WORKDIR}
 15 | }
 16 | 
 17 | einfo() {
 18 |   echo "*** ${*}"
 19 | }
 20 | 
 21 | fail() {
 22 |   # echo 1>&2 "Fail. ${*}"
 23 |   echo "Fail. ${*}"
 24 |   exit 1
 25 | }
 26 | 
 27 | test_invalid_arguments() {
 28 |   einfo "test_invalid_arguments"
 29 |   if $MKLEX --invalid &>${OUTFILE}; then
 30 |     fail "Invalid argument test failed"
 31 |   fi
 32 |   grep -q "Unknown Option" ${OUTFILE} || fail
 33 | }
 34 | 
 35 | test_help() {
 36 |   einfo "test_help"
 37 | 
 38 |   $MKLEX --help &>${OUTFILE}
 39 |   grep -q "output-table" ${OUTFILE} || fail
 40 | 
 41 |   $MKLEX -h &>${OUTFILE}
 42 |   grep -q "output-table" ${OUTFILE} || fail
 43 | }
 44 | 
 45 | test_cxx_without_namespaces() {
 46 |   einfo "test_cxx_without_namespaces"
 47 |   $MKLEX -f "${TESTDIR}/good.klex" \
 48 |          --output-table="${WORKDIR}/table.cc" \
 49 |          --output-token="${WORKDIR}/token.h" \
 50 |          --table-name="lexerDef" \
 51 |          --token-name="Token"
 52 | }
 53 | 
 54 | test_cxx_with_namespaces() {
 55 |   einfo "test_cxx_with_namespaces"
 56 |   $MKLEX -f "${TESTDIR}/good.klex" \
 57 |          --output-table="${WORKDIR}/table.cc" \
 58 |          --output-token="${WORKDIR}/token.h" \
 59 |          --table-name="myns::lexerDef" \
 60 |          --token-name="myns::Token"
 61 | }
 62 | 
 63 | test_cxx_output_stderr() {
 64 |   einfo "test_cxx_with_namespaces"
 65 |   $MKLEX -f "${TESTDIR}/good.klex" \
 66 |          --output-table=- \
 67 |          --output-token=- \
 68 |          --table-name="lexerDef" \
 69 |          --token-name="Token" \
 70 |          2>"${WORKDIR}/output.inc"
 71 | 
 72 |   test -f "${WORKDIR}/output.inc"
 73 | }
 74 | 
 75 | test_debug_nfa() {
 76 |   einfo "test_debug_nfa"
 77 |   $MKLEX -f "${TESTDIR}/good.klex" \
 78 |          --output-table="${WORKDIR}/table.cc" \
 79 |          --output-token="${WORKDIR}/token.h" \
 80 |          --table-name="myns::lexerDef" \
 81 |          --token-name="myns::Token" \
 82 |          --debug-nfa > "${WORKDIR}/nfa.dot"
 83 |   test -f "${WORKDIR}/nfa.dot"
 84 | }
 85 | 
 86 | test_debug_nfa_multi() {
 87 |   einfo "test_debug_nfa_multi"
 88 |   $MKLEX -f "${TESTDIR}/multiple_conditions.klex" \
 89 |          --output-table="${WORKDIR}/table.cc" \
 90 |          --output-token="${WORKDIR}/token.h" \
 91 |          --table-name="lexerDef" \
 92 |          --token-name="Token" \
 93 |          --debug-nfa > "${WORKDIR}/nfa.dot"
 94 |   test -f "${WORKDIR}/nfa.dot"
 95 | }
 96 | 
 97 | test_debug_dfa() {
 98 |   einfo "test_debug_dfa"
 99 |   $MKLEX -f "${TESTDIR}/good.klex" \
100 |          --output-table="${WORKDIR}/table.cc" \
101 |          --output-token="${WORKDIR}/token.h" \
102 |          --table-name="myns::lexerDef" \
103 |          --token-name="myns::Token" \
104 |          --debug-dfa="${WORKDIR}/dfa.dot"
105 |   test -f "${WORKDIR}/dfa.dot"
106 | }
107 | 
108 | test_debug_dfa_stdout() {
109 |   einfo "test_debug_dfa_stdout"
110 |   $MKLEX -f "${TESTDIR}/good.klex" \
111 |          --output-table="${WORKDIR}/table.cc" \
112 |          --output-token="${WORKDIR}/token.h" \
113 |          --table-name="myns::lexerDef" \
114 |          --token-name="myns::Token" \
115 |          --debug-dfa=- >"${WORKDIR}/dfa.dot"
116 |   test -f "${WORKDIR}/dfa.dot"
117 | }
118 | 
119 | test_overshadowed() {
120 |   einfo "test_overshadowed"
121 |   $MKLEX -f "${TESTDIR}/overshadowed.klex" \
122 |          --output-table="${WORKDIR}/table.cc" \
123 |          --output-token="${WORKDIR}/token.h" \
124 |          --table-name="lexerDef" \
125 |          --token-name="Token" \
126 |          &>${OUTFILE} && fail "Failure expected."
127 |   grep -q "Rule If cannot be matched as rule" ${OUTFILE} || fail "missing error string"
128 | }
129 | 
130 | main() {
131 |   einfo "WORKDIR: ${WORKDIR}"
132 |   einfo "TESTDIR: ${TESTDIR}"
133 |   einfo "mklex: ${MKLEX}"
134 | 
135 |   trap cleanup INT TERM
136 | 
137 |   test_invalid_arguments
138 |   test_help
139 |   test_cxx_without_namespaces
140 |   test_cxx_with_namespaces
141 |   test_cxx_output_stderr
142 |   test_debug_nfa
143 |   test_debug_nfa_multi
144 |   test_debug_dfa
145 |   test_debug_dfa_stdout
146 |   test_overshadowed
147 | }
148 | 
149 | main
150 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarLexer.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/cfg/GrammarLexer.h>
  9 | 
 10 | #include <fmt/format.h>
 11 | 
 12 | #include <cassert>
 13 | #include <cctype>
 14 | #include <iostream>
 15 | 
 16 | using namespace std;
 17 | using namespace klex;
 18 | using namespace klex::cfg;
 19 | 
 20 | GrammarLexer::GrammarLexer(string content):
 21 |     content_ { std::move(content) }, offset_ { 0 }, currentLiteral_ {}, currentToken_ { Token::Illegal }
 22 | {
 23 | }
 24 | 
 25 | GrammarLexer::Token GrammarLexer::recognize()
 26 | {
 27 |     for (;;)
 28 |     {
 29 |         if (Token t = recognizeOne(); t != Token::Spacing)
 30 |         {
 31 |             // cout << "recognize: " << fmt::format("{}", t) << "\n";
 32 |             return currentToken_ = t;
 33 |         }
 34 |     }
 35 | }
 36 | 
 37 | GrammarLexer::Token GrammarLexer::recognizeOne()
 38 | {
 39 |     currentLiteral_.clear();
 40 | 
 41 |     switch (currentChar())
 42 |     {
 43 |         case -1: return Token::Eof;
 44 |         case ' ':
 45 |         case '\t':
 46 |         case '\n':
 47 |             do
 48 |                 consumeChar();
 49 |             while (!eof() && isspace(currentChar()));
 50 |             return Token::Spacing;
 51 |         case '{': consumeChar(); return Token::SetOpen;
 52 |         case '}': consumeChar(); return Token::SetClose;
 53 |         case '|': consumeChar(); return Token::Or;
 54 |         case ';': consumeChar(); return Token::Semicolon;
 55 |         case ':':
 56 |             if (peekChar(1) == ':' && peekChar(2) == '=')
 57 |             {
 58 |                 consumeChar(3);
 59 |                 return Token::Assoc;
 60 |             }
 61 |             return Token::Illegal;
 62 |         case '\'':
 63 |         case '"': return consumeLiteral();
 64 |         default:
 65 |             if (isalpha(currentChar()) || currentChar() == '_')
 66 |             {
 67 |                 return consumeIdentifier();
 68 |             }
 69 |             consumeChar();
 70 |             return Token::Illegal;
 71 |     }
 72 | }
 73 | 
 74 | string GrammarLexer::consumeLiteralUntilLF()
 75 | {
 76 |     currentLiteral_.clear();
 77 | 
 78 |     while (!eof() && currentChar() != '\n')
 79 |     {
 80 |         currentLiteral_ += static_cast<char>(currentChar());
 81 |         consumeChar();
 82 |     }
 83 | 
 84 |     if (!eof())
 85 |     {
 86 |         currentLiteral_ += static_cast<char>(currentChar());
 87 |         consumeChar();
 88 |     }
 89 | 
 90 |     return currentLiteral_;
 91 | }
 92 | 
 93 | GrammarLexer::Token GrammarLexer::consumeIdentifier()
 94 | {
 95 |     assert(!eof() && (isalpha(currentChar()) || currentChar() == '_'));
 96 | 
 97 |     do
 98 |     {
 99 |         currentLiteral_ += static_cast<char>(currentChar());
100 |         consumeChar();
101 |     } while (!eof() && (isalnum(currentChar()) || currentChar() == '_'));
102 | 
103 |     if (currentLiteral_ == "token")
104 |         return Token::Token;
105 | 
106 |     return Token::Identifier;
107 | }
108 | 
109 | // ' ... ' | " ... "
110 | GrammarLexer::Token GrammarLexer::consumeLiteral()
111 | {
112 |     assert(!eof() && (currentChar() == '"' || currentChar() == '\''));
113 |     const int delimiter = currentChar();
114 |     consumeChar();
115 | 
116 |     while (!eof() && currentChar() != delimiter)
117 |     {
118 |         currentLiteral_ += static_cast<char>(currentChar());
119 |         consumeChar();
120 |     }
121 | 
122 |     if (eof())
123 |         return Token::Illegal; // Unexpected EOF
124 | 
125 |     consumeChar(); // delimiter
126 | 
127 |     return Token::Literal;
128 | }
129 | 
130 | int GrammarLexer::currentChar() const
131 | {
132 |     if (offset_ < content_.size())
133 |         return content_[offset_];
134 |     else
135 |         return -1; // EOF
136 | }
137 | 
138 | int GrammarLexer::peekChar(size_t offset) const
139 | {
140 |     if (offset_ + offset < content_.size())
141 |         return content_[offset_ + offset];
142 |     else
143 |         return -1; // EOF
144 | }
145 | 
146 | int GrammarLexer::consumeChar(size_t count)
147 | {
148 |     offset_ += min(count, content_.size() - offset_);
149 |     return currentChar();
150 | }
151 | 
152 | // vim:ts=4:sw=4:noet
153 | 


--------------------------------------------------------------------------------
/src/klex/util/iterator-detail.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <cstdint>
  9 | #include <iterator>
 10 | #include <utility>
 11 | 
 12 | namespace klex::util::detail {
 13 | 
 14 | template <typename Container>
 15 | struct reversed {
 16 | 	const Container container;
 17 | 
 18 | 	auto begin() { return container.crbegin(); }
 19 | 	auto end() { return container.crend(); }
 20 | };
 21 | 
 22 | template <typename Container>
 23 | struct indexed {
 24 | 	Container& container;
 25 | 
 26 | 	struct iterator {
 27 | 		typename Container::iterator iter;
 28 | 		std::size_t index = 0;
 29 | 
 30 | 		iterator& operator++()
 31 | 		{
 32 | 			++iter;
 33 | 			++index;
 34 | 			return *this;
 35 | 		}
 36 | 
 37 | 		iterator& operator++(int)
 38 | 		{
 39 | 			++*this;
 40 | 			return *this;
 41 | 		}
 42 | 
 43 | 		auto operator*() const { return std::make_pair(index, *iter); }
 44 | 
 45 | 		bool operator==(const iterator& rhs) const noexcept { return iter == rhs.iter; }
 46 | 		bool operator!=(const iterator& rhs) const noexcept { return iter != rhs.iter; }
 47 | 	};
 48 | 
 49 | 	struct const_iterator {
 50 | 		typename Container::const_iterator iter;
 51 | 		std::size_t index = 0;
 52 | 
 53 | 		const_iterator& operator++()
 54 | 		{
 55 | 			++iter;
 56 | 			++index;
 57 | 			return *this;
 58 | 		}
 59 | 
 60 | 		const_iterator& operator++(int)
 61 | 		{
 62 | 			++*this;
 63 | 			return *this;
 64 | 		}
 65 | 
 66 | 		auto operator*() const { return std::make_pair(index, *iter); }
 67 | 
 68 | 		bool operator==(const const_iterator& rhs) const noexcept { return iter == rhs.iter; }
 69 | 		bool operator!=(const const_iterator& rhs) const noexcept { return iter != rhs.iter; }
 70 | 	};
 71 | 
 72 | 	auto begin() const
 73 | 	{
 74 | 		if constexpr (std::is_const<Container>::value)
 75 | 			return const_iterator{container.cbegin()};
 76 | 		else
 77 | 			return iterator{container.begin()};
 78 | 	}
 79 | 
 80 | 	auto end() const
 81 | 	{
 82 | 		if constexpr (std::is_const<Container>::value)
 83 | 			return const_iterator{container.cend()};
 84 | 		else
 85 | 			return iterator{container.end()};
 86 | 	}
 87 | };
 88 | 
 89 | template <typename Container, typename Lambda>
 90 | struct filter {
 91 | 	Container& container;
 92 | 	Lambda proc;
 93 | 
 94 | 	struct iterator {
 95 | 		typename Container::iterator i;
 96 | 		typename Container::iterator e;
 97 | 		Lambda filter;
 98 | 
 99 | 		auto operator*() const { return *i; }
100 | 
101 | 		iterator& operator++()
102 | 		{
103 | 			++i;
104 | 			while (i != e && !filter(*i))
105 | 				++i;
106 | 			return *this;
107 | 		}
108 | 
109 | 		iterator& operator++(int) { return ++*this; }
110 | 
111 | 		bool operator==(const iterator& rhs) const noexcept { return i == rhs.i; }
112 | 		bool operator!=(const iterator& rhs) const noexcept { return !(*this == rhs); }
113 | 	};
114 | 
115 | 	struct const_iterator {
116 | 		typename Container::const_iterator i;
117 | 		typename Container::const_iterator e;
118 | 		Lambda filter;
119 | 
120 | 		auto operator*() const { return *i; }
121 | 
122 | 		const_iterator& operator++()
123 | 		{
124 | 			++i;
125 | 			while (i != e && !filter(*i))
126 | 				++i;
127 | 			return *this;
128 | 		}
129 | 
130 | 		const_iterator& operator++(int) { return ++*this; }
131 | 
132 | 		bool operator==(const const_iterator& rhs) const noexcept { return i == rhs.i; }
133 | 		bool operator!=(const const_iterator& rhs) const noexcept { return !(*this == rhs); }
134 | 	};
135 | 
136 | 	auto begin() const
137 | 	{
138 | 		if constexpr (std::is_const<Container>::value)
139 | 		{
140 | 			auto i = const_iterator{std::cbegin(container), std::cend(container), proc};
141 | 			while (i != end() && !proc(*i))
142 | 				++i;
143 | 			return i;
144 | 		}
145 | 		else
146 | 		{
147 | 			auto i = iterator{std::begin(container), std::end(container), proc};
148 | 			while (i != end() && !proc(*i))
149 | 				++i;
150 | 			return i;
151 | 		}
152 | 	}
153 | 
154 | 	auto end() const
155 | 	{
156 | 		if constexpr (std::is_const<Container>::value)
157 | 			return const_iterator{std::cend(container), std::cend(container), proc};
158 | 		else
159 | 			return iterator{std::end(container), std::end(container), proc};
160 | 	}
161 | };
162 | 
163 | }  // namespace klex::util::detail
164 | 


--------------------------------------------------------------------------------
/src/klex/cfg/LeftRecursion.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/cfg/Grammar.h>
  9 | #include <klex/cfg/LeftRecursion.h>
 10 | 
 11 | #include <algorithm>
 12 | 
 13 | using namespace std;
 14 | 
 15 | namespace klex::cfg
 16 | {
 17 | 
 18 | LeftRecursion::LeftRecursion(Grammar& _grammar): grammar_ { _grammar }
 19 | {
 20 | }
 21 | 
 22 | bool LeftRecursion::isLeftRecursive(const Grammar& grammar)
 23 | {
 24 |     const vector<NonTerminal> nonterminals = cfg::nonterminals(grammar);
 25 | 
 26 |     return any_of(begin(nonterminals), end(nonterminals), [&](const NonTerminal& nt) {
 27 |         const vector<const Production*> productions = grammar.getProductions(nt);
 28 | 
 29 |         return any_of(begin(productions), end(productions), [](const Production* p) {
 30 |             auto syms = symbols(p->handle);
 31 | 
 32 |             return !syms.empty() && holds_alternative<NonTerminal>(syms[0])
 33 |                    && get<NonTerminal>(syms[0]) == p->name && syms.size() > 1;
 34 |         });
 35 |     });
 36 | }
 37 | 
 38 | void LeftRecursion::direct()
 39 | {
 40 |     for (const NonTerminal& nt: cfg::nonterminals(grammar_))
 41 |         eliminateDirect(nt);
 42 | }
 43 | 
 44 | void LeftRecursion::indirect()
 45 | {
 46 |     const vector<NonTerminal> nonterminals = cfg::nonterminals(grammar_);
 47 | 
 48 |     for (size_t i = 0; i < nonterminals.size(); ++i)
 49 |     {
 50 |         for (size_t k = 0; k < i; ++k)
 51 |         {
 52 |             for (Production* p: select(nonterminals[i], nonterminals[k]))
 53 |             {
 54 |                 (void) p; // TODO
 55 |                 for (Production* q: grammar_.getProductions(nonterminals[k]))
 56 |                 {
 57 |                     (void) q; // TODO
 58 |                     // replace first non-terminal
 59 |                     ; // p->replaceSymbolAt(0, NonTerminal{q->name});
 60 |                 }
 61 |             }
 62 |         }
 63 | 
 64 |         eliminateDirect(nonterminals[i]);
 65 |     }
 66 | }
 67 | 
 68 | list<Production*> LeftRecursion::select(const NonTerminal& lhs, const NonTerminal& first)
 69 | {
 70 |     list<Production*> out;
 71 | 
 72 |     for (Production* p: grammar_.getProductions(lhs))
 73 |         if (const optional<NonTerminal> nt = firstNonTerminal(p->handle); nt.has_value() && *nt == first)
 74 |             out.emplace_back(p);
 75 | 
 76 |     return out;
 77 | }
 78 | 
 79 | void LeftRecursion::eliminateDirect(const NonTerminal& nt)
 80 | {
 81 |     if (auto [head, tail] = split(grammar_.getProductions(nt)); !tail.empty())
 82 |     {
 83 |         const NonTerminal tailSymbol = createRelatedNonTerminal(nt);
 84 |         for (Production* p: head) // b -> b A'
 85 |             p->handle.emplace_back(tailSymbol);
 86 | 
 87 |         for (Production* p: tail)
 88 |         {
 89 |             p->name = tailSymbol.name;
 90 |             p->handle.emplace_back(tailSymbol);
 91 |             p->handle.erase(p->handle.begin());
 92 |         }
 93 | 
 94 |         // inject new epsilon-production.
 95 |         grammar_.productions.emplace_back(Production { tailSymbol.name, {} });
 96 |         // TODO: don't emplace at the back of all but at the back of the last NT's tail symbol.
 97 |         // TODO: fix injected EOF rule, omfg
 98 |     }
 99 | }
100 | 
101 | NonTerminal LeftRecursion::createRelatedNonTerminal(const NonTerminal& nt) const
102 | {
103 |     string tail = nt.name + "_";
104 | 
105 |     while (any_of(begin(grammar_.productions), end(grammar_.productions), [&](const Production& p) {
106 |         return p.name == tail;
107 |     }))
108 |         tail += "_";
109 | 
110 |     return NonTerminal { tail };
111 | }
112 | 
113 | pair<vector<Production*>, vector<Production*>> LeftRecursion::split(vector<Production*> productions) const
114 | {
115 |     vector<Production*> head;
116 |     vector<Production*> tail;
117 | 
118 |     for (Production* p: productions)
119 |     {
120 |         const optional<NonTerminal> nt = firstNonTerminal(p->handle);
121 |         if (nt.has_value() && *nt == p->name && symbols(p->handle).size() > 1)
122 |             tail.emplace_back(p);
123 |         else
124 |             head.emplace_back(p);
125 |     }
126 | 
127 |     return make_pair(std::move(head), std::move(tail));
128 | }
129 | 
130 | } // namespace klex::cfg
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # klex - A Scanner Generator
  2 | [![Build Status](https://travis-ci.org/christianparpart/klex.svg?branch=master)](https://travis-ci.org/christianparpart/klex) [![Build Status](https://ci.appveyor.com/api/projects/status/l8isxx0k38kdnatq?svg=true)](https://ci.appveyor.com/project/christianparpart/klex) [![codecov](https://codecov.io/gh/christianparpart/klex/branch/master/graph/badge.svg)](https://codecov.io/gh/christianparpart/klex) [![Language grade: C/C++](https://img.shields.io/lgtm/grade/cpp/g/christianparpart/klex.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/christianparpart/klex/context:cpp)
  3 | 
  4 | 
  5 | 
  6 | - mklex: CLI tool for compiling regular expressions into state transition tables
  7 | - libklex: C++ library for lexing
  8 | 
  9 | ### mklex CLI
 10 | ```
 11 | mklex - klex lexer generator
 12 | (c) 2018 Christian Parpart <christian@parpart.family>
 13 | 
 14 |  -v, --verbose                Prints some more verbose output
 15 |  -h, --help                   Prints this help and exits
 16 |  -f, --file=PATTERN_FILE      Input file with lexer rules
 17 |  -t, --output-table=FILE      Output file that will contain the compiled tables (use - to represent stderr)
 18 |  -T, --output-token=FILE      Output file that will contain the compiled tables (use - to represent stderr)
 19 |  -n, --table-name=IDENTIFIER  Symbol name for generated table (may include namespace). [lexerDef]
 20 |  -N, --token-name=IDENTIFIER  Symbol name for generated token enum type (may include namespace). [Token]
 21 |  -M, --machine-name=IDENTIFIER
 22 |                               Symbol name for generated machine enum type (must not include namespace). [Machine]
 23 |  -x, --debug-dfa=DOT_FILE     Writes dot graph of final finite automaton. Use - to represent stdout. []
 24 |  -d, --debug-nfa              Writes dot graph of non-deterministic finite automaton to stdout and exits.
 25 |      --no-dfa-minimize        Do not minimize the DFA
 26 |  -p, --perf                   Print performance counters to stderr.
 27 | ```
 28 | 
 29 | ### Example klex Grammar
 30 | 
 31 | ```
 32 | # specials
 33 | Spacing(ignore) ::= "[\t\s]+"
 34 | Eof             ::= <<EOF>>
 35 | 
 36 | # symbols
 37 | Plus            ::= \+
 38 | RndOpen         ::= \(
 39 | RndClose        ::= \)
 40 | 
 41 | # keywords
 42 | If              ::= if
 43 | Then            ::= then
 44 | Else            ::= else
 45 | 
 46 | # literals & identifiers
 47 | NumberLiteral   ::= 0|[1-9][0-9]*
 48 | Identifier      ::= [a-zA-Z_][a-zA-Z0-9_]*
 49 | ```
 50 | 
 51 | ### klex Lexer API
 52 | 
 53 | The great thing about the Lexer API is, that it is header-only, as the most complex parts are done
 54 | at compilation already.
 55 | 
 56 | You can compile the above grammar with `klex -f rules.klex -t myrules.h -T mytokens.h`
 57 | and then compile the code below:
 58 | 
 59 | ```cpp
 60 | #include <klex/Lexer.h>
 61 | #include <fstream>
 62 | #include <memory>
 63 | #include "myrules.h"
 64 | #include "mytokens.h"
 65 | 
 66 | int main(int argc, const char* argv[]) {
 67 |   klex::Lexer<Token> lexer {lexerDef, std::make_unique<std::ifstream>(argv[1])};
 68 | 
 69 |   for (Token t = lexer.recognize(); t != Token::Eof; t = lexer.recognize()) {
 70 |     std::cerr << fmt::format("[{}-{}]: token {} (\"{}\")\n",
 71 |                              lexer.offset().first,
 72 |                              lexer.offset().second,
 73 |                              to_string(t), lexer.word());
 74 |   }
 75 | 
 76 |   return EXIT_SUCCESS;
 77 | }
 78 | ```
 79 | 
 80 | ### klex lexer generator API
 81 | 
 82 | See [examples/mathexpr.cc](https://github.com/christianparpart/klex/blob/master/examples/mathexpr.cc)
 83 | as a great example. Here's a snippet:
 84 | 
 85 | ```cpp
 86 | enum class Token { Eof = 1, Plus, Minus, Mul, Div, RndOpen, RndClose, Number, INVALID };
 87 | std::string RULES = R"(
 88 |     Space(ignore) ::= [\s\t]+
 89 |     Eof           ::= <<EOF>>
 90 |     Plus          ::= "+"
 91 |     Minus         ::= "-"
 92 |     Mul           ::= "*"
 93 |     Div           ::= "/"
 94 |     RndOpen       ::= "("
 95 |     RndClose      ::= \)
 96 |     Number        ::= -?([0-9]+|[0-9]{1,3}(_[0-9]{3})*)
 97 |     INVALID       ::= .
 98 | )";
 99 | 
100 | using Number = long long int;
101 | Number expr(Lexer<Token>& lexer) {
102 |   // [... consume lexer tokens here ...]
103 |   return 42;
104 | }
105 | 
106 | int main(int argc, const char* argv[]) {
107 |   klex::Compiler cc;
108 |   cc.declareAll(std::make_unique<std::stringstream>(RULES));
109 | 
110 |   Lexer lexer { cc.compile(), std::make_unique<std::stringstream>("2 + 3 * (5 - 1)") };
111 | 
112 |   lexer.recognize();      // recognize first token
113 |   Number y = expr(lexer);
114 | 
115 |   std::cerr << fmt::format("{} = {}\n", input, y);
116 | 
117 |   return EXIT_SUCCESS;
118 | }
119 | ```
120 | 
121 | ### References
122 | 
123 | - https://swtch.com/~rsc/regexp/
124 | 


--------------------------------------------------------------------------------
/src/klex/cfg/GrammarParser_test.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/Report.h>
  9 | #include <klex/cfg/GrammarLexer.h>
 10 | #include <klex/cfg/GrammarParser.h>
 11 | #include <klex/util/literals.h>
 12 | #include <klex/util/testing.h>
 13 | 
 14 | #include <algorithm>
 15 | 
 16 | using namespace std;
 17 | using namespace klex;
 18 | using namespace klex::cfg;
 19 | using namespace klex::util::literals;
 20 | 
 21 | const static std::string simpleGrammarSpec =
 22 |     R"(`Start ::= A | B;
 23 | 	   `A     ::= 'a';
 24 | 	   `B     ::= 'b'     {b1}
 25 | 	   `        | 'b' B   {b2};
 26 | 	   `)"_multiline;
 27 | 
 28 | TEST(cfg_GrammarParser, parserSimple)
 29 | {
 30 |     ConsoleReport report;
 31 |     GrammarParser parser(GrammarLexer { simpleGrammarSpec }, &report);
 32 |     Grammar grammar = parser.parse();
 33 |     ASSERT_EQ(5, grammar.productions.size());
 34 | 
 35 |     ASSERT_EQ("Start", grammar.productions[0].name);
 36 |     ASSERT_EQ("A", to_string(grammar.productions[0].handle));
 37 |     ASSERT_EQ("Start", grammar.productions[1].name);
 38 |     ASSERT_EQ("B", to_string(grammar.productions[1].handle));
 39 | 
 40 |     ASSERT_EQ("A", grammar.productions[2].name);
 41 |     ASSERT_EQ("\"a\"", to_string(grammar.productions[2].handle));
 42 | 
 43 |     ASSERT_EQ("B", grammar.productions[3].name);
 44 |     ASSERT_EQ("\"b\" {b1}", to_string(grammar.productions[3].handle));
 45 | 
 46 |     ASSERT_EQ("B", grammar.productions[4].name);
 47 |     ASSERT_EQ("\"b\" B {b2}", to_string(grammar.productions[4].handle));
 48 | }
 49 | 
 50 | TEST(cfg_GrammarParser, unresolved_nonterminals)
 51 | {
 52 |     BufferedReport report;
 53 |     Grammar grammar = GrammarParser(GrammarLexer { "Start ::= Another" }, &report).parse();
 54 |     ASSERT_TRUE(report.containsFailures());
 55 | 
 56 |     // TODO: make sure the failure reported is the unresolved-nonterminals case.
 57 | }
 58 | 
 59 | TEST(cfg_GrammarParser, action)
 60 | {
 61 |     ConsoleReport report;
 62 |     GrammarParser parser = GrammarParser("E ::= 'a' {a};", &report);
 63 |     Grammar grammar = parser.parse();
 64 |     ASSERT_FALSE(report.containsFailures());
 65 | }
 66 | 
 67 | TEST(cfg_GrammarParser, action_on_epsilon)
 68 | {
 69 |     ConsoleReport report;
 70 |     GrammarParser parser = GrammarParser("Rule ::= {action};", &report);
 71 |     Grammar grammar = parser.parse();
 72 |     ASSERT_FALSE(report.containsFailures());
 73 | }
 74 | 
 75 | struct CheckTerminalPattern
 76 | {
 77 |     string pattern;
 78 |     bool operator()(const Terminal& w) const { return pattern == w.pattern(); }
 79 | };
 80 | 
 81 | TEST(cfg_GrammarParser, customTokens)
 82 | {
 83 |     BufferedReport report;
 84 |     Grammar grammar = GrammarParser(
 85 |                           GrammarLexer {
 86 |                               R"(`token {
 87 | 			   `  Spacing(ignore) ::= [\s\t]+
 88 | 			   `  Number          ::= [0-9]+
 89 | 			   `}
 90 | 			   `
 91 | 			   `Start ::= '(' Number ')';
 92 | 			   `)"_multiline },
 93 |                           &report)
 94 |                           .parse();
 95 | 
 96 |     ASSERT_FALSE(report.containsFailures());
 97 |     grammar.finalize();
 98 | 
 99 |     log(grammar.dump());
100 | 
101 |     for (const Terminal& w: grammar.terminals)
102 |         logf("Terminal: {}", w);
103 | 
104 |     // verify presense of all terminals in the grammar
105 |     ASSERT_EQ(5, grammar.terminals.size());
106 |     ASSERT_TRUE(any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { "[0-9]+" }));
107 |     ASSERT_TRUE(
108 |         any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { "[\\s\\t]+" }));
109 |     ASSERT_TRUE(any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { "(" }));
110 |     ASSERT_TRUE(any_of(begin(grammar.terminals), end(grammar.terminals), CheckTerminalPattern { ")" }));
111 | 
112 |     // verify production rule to be in the form as the input mandates
113 |     const auto symbols = klex::cfg::symbols(grammar.productions[0].handle);
114 |     ASSERT_EQ(4, symbols.size());
115 | 
116 |     ASSERT_TRUE(holds_alternative<Terminal>(symbols[0]));
117 |     ASSERT_TRUE(holds_alternative<Terminal>(symbols[1]));
118 |     ASSERT_TRUE(holds_alternative<Terminal>(symbols[2]));
119 |     ASSERT_TRUE(holds_alternative<Terminal>(symbols[3]));
120 | 
121 |     ASSERT_EQ("(", get<Terminal>(symbols[0]).pattern());
122 | 
123 |     ASSERT_EQ("Number", get<Terminal>(symbols[1]).name);
124 |     ASSERT_EQ("[0-9]+", get<Terminal>(symbols[1]).pattern());
125 | 
126 |     ASSERT_EQ(")", get<Terminal>(symbols[2]).pattern());
127 | 
128 |     ASSERT_EQ("EOF", get<Terminal>(symbols[3]).name);
129 |     ASSERT_EQ("<<EOF>>", get<Terminal>(symbols[3]).pattern());
130 | }
131 | 
132 | // vim:ts=4:sw=4:noet
133 | 


--------------------------------------------------------------------------------
/src/klex/util/iterator_test.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/util/iterator.h>
  9 | #include <klex/util/testing.h>
 10 | 
 11 | #include <array>
 12 | #include <string>
 13 | #include <type_traits>
 14 | #include <vector>
 15 | 
 16 | using namespace std;
 17 | using namespace klex::util;
 18 | 
 19 | TEST(util_iterator_reversed, empty)
 20 | {
 21 |     const vector<int> v;
 22 |     auto x = reversed(v);
 23 |     auto i = begin(x);
 24 |     ASSERT_TRUE(i == end(x));
 25 | }
 26 | 
 27 | TEST(util_iterator_reversed, one)
 28 | {
 29 |     const vector<int> v { 1 };
 30 |     auto x = reversed(v);
 31 |     auto i = begin(x);
 32 |     ASSERT_EQ(1, *i);
 33 |     i++;
 34 |     ASSERT_TRUE(i == end(x));
 35 | }
 36 | 
 37 | TEST(util_iterator_reversed, many)
 38 | {
 39 |     const vector<int> v { 1, 2, 3 };
 40 |     auto x = reversed(v);
 41 |     auto i = begin(x);
 42 |     ASSERT_EQ(3, *i);
 43 |     i++;
 44 |     ASSERT_EQ(2, *i);
 45 |     i++;
 46 |     ASSERT_EQ(1, *i);
 47 |     i++;
 48 |     ASSERT_TRUE(i == end(x));
 49 | }
 50 | 
 51 | TEST(util_iterator_indexed, many_const)
 52 | {
 53 |     const vector<int> v { 10, 20, 30 };
 54 |     const auto x = indexed(v);
 55 |     static_assert(is_const<decltype(x)>::value);
 56 |     auto i = begin(x);
 57 | 
 58 |     ASSERT_EQ(0, (*i).first);
 59 |     ASSERT_EQ(10, (*i).second);
 60 |     i++;
 61 | 
 62 |     ASSERT_EQ(1, (*i).first);
 63 |     ASSERT_EQ(20, (*i).second);
 64 |     i++;
 65 | 
 66 |     ASSERT_EQ(2, (*i).first);
 67 |     ASSERT_EQ(30, (*i).second);
 68 |     i++;
 69 | 
 70 |     ASSERT_TRUE(i == end(x));
 71 | }
 72 | 
 73 | TEST(util_iterator_indexed, many)
 74 | {
 75 |     vector<string> v { "zero", "one", "two" };
 76 |     auto x = indexed(v);
 77 |     auto i = begin(x);
 78 | 
 79 |     ASSERT_EQ(0, (*i).first);
 80 |     ASSERT_EQ("zero", (*i).second);
 81 |     i++;
 82 | 
 83 |     ASSERT_EQ(1, (*i).first);
 84 |     ASSERT_EQ("one", (*i).second);
 85 |     i++;
 86 | 
 87 |     ASSERT_EQ(2, (*i).first);
 88 |     ASSERT_EQ("two", (*i).second);
 89 |     i++;
 90 | 
 91 |     ASSERT_TRUE(i == end(x));
 92 | }
 93 | 
 94 | TEST(util_iterator_indexed, range_based_for_loop)
 95 | {
 96 |     log("const:");
 97 |     const vector<int> v1 { 10, 20, 30 };
 98 |     for (const auto&& [index, value]: indexed(v1))
 99 |         logf("index {}, value {}", index, value);
100 | 
101 |     log("non-const:");
102 |     vector<int> v2 { 10, 20, 30 };
103 |     for (const auto&& [index, value]: indexed(v2))
104 |         logf("index {}, value {}", index, value);
105 | }
106 | 
107 | TEST(util_iterator_filter, for_range)
108 | {
109 |     const vector<int> nums = { 1, 2, 3, 4 };
110 |     vector<int> odds;
111 |     for (const int i: filter(nums, [](int x) { return x % 2 != 0; }))
112 |         odds.push_back(i);
113 | 
114 |     ASSERT_EQ(2, odds.size());
115 |     EXPECT_EQ(1, odds[0]);
116 |     EXPECT_EQ(3, odds[1]);
117 | }
118 | 
119 | TEST(util_iterator_filter, count_proc_invocations)
120 | {
121 |     static const array<int, 4> numbers = { 1, 2, 3, 4 };
122 |     int count = 0;
123 |     auto counter = [&](int) {
124 |         ++count;
125 |         return true;
126 |     };
127 |     const auto f = filter(numbers, counter);
128 |     for_each(begin(f), end(f), [](int) {});
129 |     ASSERT_EQ(4, count);
130 | }
131 | 
132 | TEST(util_iterator_filter, for_range_initializer_list)
133 | {
134 |     static const array<int, 4> numbers = { 1, 2, 3, 4 };
135 |     vector<int> odds;
136 |     auto f_odd = [&](int x) {
137 |         logf("f_odd: x={0}", x);
138 |         return x % 2 != 0;
139 |     };
140 |     for (const int i: filter(numbers, f_odd))
141 |         odds.push_back(i);
142 | 
143 |     ASSERT_EQ(2, odds.size());
144 |     EXPECT_EQ(1, odds[0]);
145 |     EXPECT_EQ(3, odds[1]);
146 | }
147 | 
148 | TEST(util_iterator_translate, vector)
149 | {
150 |     const vector<int> in { 1, 2, 3, 4 };
151 |     const vector<int> out = translate(in, [](int i) -> int { return i * 2; });
152 | 
153 |     for (const auto&& [i, v]: indexed(out))
154 |         logf("out[{}] = {}", i, v);
155 | 
156 |     ASSERT_EQ(4, out.size());
157 | 
158 |     EXPECT_EQ(2, out[0]);
159 |     EXPECT_EQ(4, out[1]);
160 |     EXPECT_EQ(6, out[2]);
161 |     EXPECT_EQ(8, out[3]);
162 | }
163 | 
164 | TEST(util_iterator_translate, chain_translate_join)
165 | {
166 |     const vector<int> in { 1, 2, 3, 4 };
167 |     const string out { join(translate(in, [](int i) -> string { return to_string(i); }), ", ") };
168 | 
169 |     ASSERT_EQ("1, 2, 3, 4", out);
170 | }
171 | 
172 | TEST(util_iterator, find_last)
173 | {
174 |     const vector<int> v { 1, 2, 3, 4 };
175 |     const auto i = find_last(v, [](int i) { return i % 2 != 0; }); // find last odd value -> 3
176 | 
177 |     ASSERT_TRUE(i != end(v));
178 |     ASSERT_EQ(3, *i);
179 | }
180 | 


--------------------------------------------------------------------------------
/src/klex/regular/Symbols.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/Symbols.h>
  9 | 
 10 | #include <sstream>
 11 | 
 12 | using namespace std;
 13 | 
 14 | namespace klex::regular
 15 | {
 16 | 
 17 | string prettySymbol(Symbol input)
 18 | {
 19 |     switch (input)
 20 |     {
 21 |         case Symbols::Error: return "<<ERROR>>";
 22 |         case Symbols::BeginOfLine: return "<<BOL>>";
 23 |         case Symbols::EndOfLine: return "<<EOL>>";
 24 |         case Symbols::EndOfFile: return "<<EOF>>";
 25 |         case Symbols::Epsilon: return "ε";
 26 |         case '\a': return "\\a";
 27 |         case '\b': return "\\b";
 28 |         case '\f': return "\\f";
 29 |         case '\n': return "\\n";
 30 |         case '\r': return "\\r";
 31 |         case ' ': return "\\s";
 32 |         case '\t': return "\\t";
 33 |         case '\v': return "\\v";
 34 |         case '\0': return "\\0";
 35 |         case '.': return "\\."; // so we can distinguish from dot-operator
 36 |         default:
 37 |             if (isprint(input))
 38 |             {
 39 |                 return fmt::format("{}", (char) input);
 40 |             }
 41 |             else
 42 |             {
 43 |                 return fmt::format("\\x{:02x}", input);
 44 |             }
 45 |     }
 46 | }
 47 | 
 48 | string prettyCharRange(Symbol ymin, Symbol ymax)
 49 | {
 50 |     assert(ymin <= ymax);
 51 | 
 52 |     stringstream sstr;
 53 |     switch (ymax - ymin)
 54 |     {
 55 |         case 0: sstr << prettySymbol(ymin); break;
 56 |         case 1: sstr << prettySymbol(ymin) << prettySymbol(ymin + 1); break;
 57 |         case 2: sstr << prettySymbol(ymin) << prettySymbol(ymin + 1) << prettySymbol(ymax); break;
 58 |         default: sstr << prettySymbol(ymin) << '-' << prettySymbol(ymax); break;
 59 |     }
 60 |     return sstr.str();
 61 | }
 62 | 
 63 | string groupCharacterClassRanges(const vector<bool>& syms)
 64 | {
 65 |     // {1,3,5,a,b,c,d,e,f,z]
 66 |     // ->
 67 |     // {{1}, {3}, {5}, {a-f}, {z}}
 68 | 
 69 |     stringstream sstr;
 70 |     Symbol ymin = '\0';
 71 |     Symbol ymax = ymin;
 72 |     int k = 0;
 73 | 
 74 |     for (size_t i = 0, e = syms.size(); i != e; ++i)
 75 |     {
 76 |         if (!syms[i])
 77 |             continue;
 78 | 
 79 |         const Symbol c = (Symbol) i;
 80 |         if (c == ymax + 1)
 81 |         { // range growing
 82 |             ymax = c;
 83 |         }
 84 |         else
 85 |         { // gap found
 86 |             if (k)
 87 |             {
 88 |                 sstr << prettyCharRange(ymin, ymax);
 89 |             }
 90 |             ymin = ymax = c;
 91 |         }
 92 |         k++;
 93 |     }
 94 |     sstr << prettyCharRange(ymin, ymax);
 95 | 
 96 |     return sstr.str();
 97 | }
 98 | 
 99 | string groupCharacterClassRanges(vector<Symbol> chars)
100 | {
101 |     // we took a copy in tgroup here, so I can sort() later
102 |     sort(chars.begin(), chars.end());
103 | 
104 |     if (chars.size() == 1)
105 |         return prettySymbol(chars.front());
106 | 
107 |     // {1,3,5,a,b,c,d,e,f,z]
108 |     // ->
109 |     // "123a-fz"
110 | 
111 |     stringstream sstr;
112 |     Symbol ymin = 0;
113 |     Symbol ymax = ymin;
114 |     int i = 0;
115 | 
116 |     for (Symbol c: chars)
117 |     {
118 |         if (c == ymax + 1)
119 |         { // range growing
120 |             ymax = c;
121 |         }
122 |         else
123 |         { // gap found
124 |             if (i)
125 |             {
126 |                 sstr << prettyCharRange(ymin, ymax);
127 |             }
128 |             ymin = ymax = c;
129 |         }
130 |         i++;
131 |     }
132 |     sstr << prettyCharRange(ymin, ymax);
133 | 
134 |     return sstr.str();
135 | }
136 | 
137 | SymbolSet::SymbolSet(DotMode): set_(256, true), size_ { 255 }, hash_ { 2166136261 }
138 | {
139 |     set_[(size_t) '\n'] = false;
140 |     for (Symbol s: *this)
141 |     {
142 |         hash_ = (hash_ * 16777619) ^ s;
143 |     }
144 | }
145 | 
146 | bool SymbolSet::isDot() const noexcept
147 | {
148 |     static SymbolSet dot(SymbolSet::Dot);
149 |     return *this == dot;
150 | }
151 | 
152 | string SymbolSet::to_string() const
153 | {
154 |     if (isDot())
155 |         return ".";
156 | 
157 |     return groupCharacterClassRanges(set_);
158 | }
159 | 
160 | void SymbolSet::complement()
161 | {
162 |     // flip bits
163 |     for (size_t i = 0, e = set_.size(); i != e; ++i)
164 |     {
165 |         set_[i] = !set_[i];
166 |     }
167 | 
168 |     // flip size
169 |     size_ = set_.size() - size_;
170 | 
171 |     recalculateHash();
172 | }
173 | 
174 | void SymbolSet::recalculateHash()
175 | {
176 |     // recalculate hash
177 |     hash_ = 2166136261;
178 |     for (Symbol s: *this)
179 |     {
180 |         hash_ = (hash_ * 16777619) ^ s;
181 |     }
182 | }
183 | 
184 | } // namespace klex::regular
185 | 


--------------------------------------------------------------------------------
/src/klex/regular/DFA.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <klex/regular/Alphabet.h>
 10 | #include <klex/regular/State.h>
 11 | #include <algorithm>
 12 | #include <cmath>
 13 | #include <map>
 14 | #include <optional>
 15 | 
 16 | namespace klex::regular {
 17 | 
 18 | class NFA;
 19 | class DFABuilder;
 20 | class DotVisitor;
 21 | 
 22 | /**
 23 |  * Represents a deterministic finite automaton.
 24 |  */
 25 | class DFA {
 26 |   public:
 27 | 	using TransitionMap = std::map<Symbol, StateId>;
 28 | 	struct State {
 29 | 		// std::vector<StateId> states;
 30 | 		TransitionMap transitions;
 31 | 	};
 32 | 	using StateVec = std::vector<State>;
 33 | 
 34 | 	//! defines a mapping between accept state ID and another (prior) ID to track roll back the input stream
 35 | 	//! to.
 36 | 	using BacktrackingMap = std::map<StateId, StateId>;
 37 | 
 38 | 	DFA(const DFA& other) = delete;
 39 | 	DFA& operator=(const DFA& other) = delete;
 40 | 	DFA(DFA&&) = default;
 41 | 	DFA& operator=(DFA&&) = default;
 42 | 	~DFA() = default;
 43 | 
 44 | 	DFA() : states_{}, initialState_{0}, backtrackStates_{}, acceptTags_{} {}
 45 | 
 46 | 	[[nodiscard]] bool empty() const noexcept { return states_.empty(); }
 47 | 	[[nodiscard]] size_t size() const noexcept { return states_.size(); }
 48 | 
 49 | 	[[nodiscard]] StateId lastState() const noexcept
 50 | 	{
 51 | 		assert(!empty());
 52 | 		return states_.size() - 1;
 53 | 	}
 54 | 
 55 | 	//! Retrieves the alphabet of this finite automaton.
 56 | 	Alphabet alphabet() const;
 57 | 
 58 | 	//! Retrieves the initial state.
 59 | 	StateId initialState() const { return initialState_; }
 60 | 
 61 | 	//! Retrieves the list of available states.
 62 | 	const StateVec& states() const { return states_; }
 63 | 	StateVec& states() { return states_; }
 64 | 
 65 | 	StateIdVec stateIds() const
 66 | 	{
 67 | 		StateIdVec v;
 68 | 		v.reserve(states_.size());
 69 | 		for (size_t i = 0, e = states_.size(); i != e; ++i)
 70 | 			v.push_back(i);  // funny, I know
 71 | 		return v;
 72 | 	}
 73 | 
 74 | 	//! Retrieves the list of accepting states.
 75 | 	std::vector<StateId> acceptStates() const;
 76 | 
 77 | 	/**
 78 | 	 * Traverses all states and edges in this NFA and calls @p visitor for each state & edge.
 79 | 	 *
 80 | 	 * Use this function to e.g. get a GraphViz dot-file drawn.
 81 | 	 */
 82 | 	void visit(DotVisitor& visitor) const;
 83 | 
 84 | 	void createStates(size_t count);
 85 | 
 86 | 	void setInitialState(StateId state);
 87 | 
 88 | 	const TransitionMap& stateTransitions(StateId id) const
 89 | 	{
 90 | 		return states_[static_cast<size_t>(id)].transitions;
 91 | 	}
 92 | 
 93 | 	// {{{ backtracking (for lookahead)
 94 | 	void setBacktrack(StateId from, StateId to) { backtrackStates_[from] = to; }
 95 | 
 96 | 	std::optional<StateId> backtrack(StateId acceptState) const
 97 | 	{
 98 | 		if (auto i = backtrackStates_.find(acceptState); i != backtrackStates_.end())
 99 | 			return i->second;
100 | 
101 | 		return std::nullopt;
102 | 	}
103 | 
104 | 	const BacktrackingMap& backtracking() const noexcept { return backtrackStates_; }
105 | 	// }}}
106 | 
107 | 	//! Flags given state as accepting-state with given Tag @p acceptTag.
108 | 	void setAccept(StateId state, Tag acceptTag) { acceptTags_[state] = acceptTag; }
109 | 
110 | 	bool isAccepting(StateId s) const { return acceptTags_.find(s) != acceptTags_.end(); }
111 | 
112 | 	std::optional<Tag> acceptTag(StateId s) const
113 | 	{
114 | 		if (auto i = acceptTags_.find(s); i != acceptTags_.end())
115 | 			return i->second;
116 | 
117 | 		return std::nullopt;
118 | 	}
119 | 
120 | 	std::optional<StateId> delta(StateId state, Symbol symbol) const
121 | 	{
122 | 		const auto& T = states_[state].transitions;
123 | 		if (auto i = T.find(symbol); i != T.end())
124 | 			return i->second;
125 | 
126 | 		return std::nullopt;
127 | 	}
128 | 
129 | 	void setTransition(StateId from, Symbol symbol, StateId to);
130 | 	void removeTransition(StateId from, Symbol symbol);
131 | 
132 | 	StateIdVec nonAcceptStates() const
133 | 	{
134 | 		StateIdVec result;
135 | 		result.reserve(
136 | 			std::abs(static_cast<long int>(states_.size()) - static_cast<long int>(acceptTags_.size())));
137 | 
138 | 		for (StateId s = 0, sE = size(); s != sE; ++s)
139 | 			if (!isAccepting(s))
140 | 				result.push_back(s);
141 | 
142 | 		return result;
143 | 	}
144 | 
145 | 	bool isAcceptor(Tag t) const
146 | 	{
147 | 		for (const std::pair<StateId, Tag>& p : acceptTags_)
148 | 			if (p.second == t)
149 | 				return true;
150 | 
151 | 		return false;
152 | 	}
153 | 
154 | 	StateId append(DFA&& other, StateId q0);
155 | 
156 |   private:
157 | 	void prepareStateIds(StateId baseId, StateId q0);
158 | 
159 |   private:
160 | 	StateVec states_;
161 | 	StateId initialState_;
162 | 	BacktrackingMap backtrackStates_;
163 | 	AcceptMap acceptTags_;
164 | };
165 | 
166 | }  // namespace klex::regular
167 | 


--------------------------------------------------------------------------------
/src/klex/regular/RegExpr.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/RegExpr.h>
  9 | #include <klex/util/overloaded.h>
 10 | 
 11 | #include <fmt/format.h>
 12 | 
 13 | #include <iostream>
 14 | #include <limits>
 15 | #include <sstream>
 16 | 
 17 | using namespace std;
 18 | 
 19 | /*
 20 |   REGULAR EXPRESSION SYNTAX:
 21 |   --------------------------
 22 | 
 23 |   expr                    := alternation
 24 |   alternation             := concatenation ('|' concatenation)*
 25 |   concatenation           := closure (closure)*
 26 |   closure                 := atom ['*' | '?' | '{' NUM [',' NUM] '}']
 27 |   atom                    := character | characterClass | '(' expr ')'
 28 |   characterClass          := '[' ['^'] characterClassFragment+ ']'
 29 |   characterClassFragment  := character | character '-' character
 30 | */
 31 | 
 32 | namespace klex::regular
 33 | {
 34 | 
 35 | auto embrace(const RegExpr& outer, const RegExpr& inner)
 36 | {
 37 |     if (precedence(outer) > precedence(inner))
 38 |         return "(" + to_string(inner) + ")";
 39 |     else
 40 |         return to_string(inner);
 41 | }
 42 | 
 43 | std::string to_string(const RegExpr& re)
 44 | {
 45 |     return visit(
 46 |         overloaded {
 47 |             [&](const ClosureExpr& e) {
 48 |                 stringstream sstr;
 49 |                 sstr << embrace(re, *e.subExpr);
 50 |                 if (e.minimumOccurrences == 0 && e.maximumOccurrences == 1)
 51 |                     sstr << '?';
 52 |                 else if (e.minimumOccurrences == 0 && e.maximumOccurrences == numeric_limits<unsigned>::max())
 53 |                     sstr << '*';
 54 |                 else if (e.minimumOccurrences == 1 && e.maximumOccurrences == numeric_limits<unsigned>::max())
 55 |                     sstr << '+';
 56 |                 else
 57 |                     sstr << '{' << e.minimumOccurrences << ',' << e.maximumOccurrences << '}';
 58 |                 return sstr.str();
 59 |             },
 60 |             [&](const AlternationExpr& e) { return embrace(re, *e.left) + "|" + embrace(re, *e.right); },
 61 |             [&](const ConcatenationExpr& e) { return embrace(re, *e.left) + embrace(re, *e.right); },
 62 |             [&](const LookAheadExpr& e) { return embrace(re, *e.left) + "/" + embrace(re, *e.right); },
 63 |             [](const CharacterExpr& e) { return string(1, e.value); },
 64 |             [](const EndOfFileExpr& e) { return string { "<<EOF>>" }; },
 65 |             [](const BeginOfLineExpr& e) { return string { "^" }; },
 66 |             [](const EndOfLineExpr& e) { return string { "$" }; },
 67 |             [](const CharacterClassExpr& e) { return e.symbols.to_string(); },
 68 |             [](const DotExpr& e) { return string { "." }; },
 69 |             [](const EmptyExpr& e) { return string {}; },
 70 |         },
 71 |         re);
 72 | }
 73 | 
 74 | int precedence(const RegExpr& regex)
 75 | {
 76 |     return visit(overloaded {
 77 |                      [](const AlternationExpr& e) { return 1; },
 78 |                      [](const BeginOfLineExpr& e) { return 4; },
 79 |                      [](const CharacterClassExpr& e) { return 4; },
 80 |                      [](const CharacterExpr& e) { return 4; },
 81 |                      [](const ClosureExpr& e) { return 3; },
 82 |                      [](const ConcatenationExpr& e) { return 2; },
 83 |                      [](const DotExpr& e) { return 4; },
 84 |                      [](const EmptyExpr& e) { return 4; },
 85 |                      [](const EndOfFileExpr& e) { return 4; },
 86 |                      [](const EndOfLineExpr& e) { return 4; },
 87 |                      [](const LookAheadExpr& e) { return 0; },
 88 |                  },
 89 |                  regex);
 90 | }
 91 | 
 92 | bool containsBeginOfLine(const RegExpr& regex)
 93 | {
 94 |     return visit(overloaded {
 95 |                      [](const AlternationExpr& e) {
 96 |                          return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right);
 97 |                      },
 98 |                      [](const BeginOfLineExpr& e) { return true; },
 99 |                      [](const CharacterClassExpr& e) { return false; },
100 |                      [](const CharacterExpr& e) { return false; },
101 |                      [](const ClosureExpr& e) { return containsBeginOfLine(*e.subExpr); },
102 |                      [](const ConcatenationExpr& e) {
103 |                          return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right);
104 |                      },
105 |                      [](const DotExpr& e) { return false; },
106 |                      [](const EmptyExpr& e) { return false; },
107 |                      [](const EndOfFileExpr& e) { return false; },
108 |                      [](const EndOfLineExpr& e) { return false; },
109 |                      [](const LookAheadExpr& e) {
110 |                          return containsBeginOfLine(*e.left) || containsBeginOfLine(*e.right);
111 |                      },
112 |                  },
113 |                  regex);
114 | }
115 | 
116 | } // namespace klex::regular
117 | 


--------------------------------------------------------------------------------
/src/klex/regular/RuleParser.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <klex/regular/Rule.h>
 10 | 
 11 | #include <fmt/format.h>
 12 | #include <istream>
 13 | #include <map>
 14 | #include <memory>
 15 | #include <optional>
 16 | #include <stdexcept>
 17 | #include <string>
 18 | 
 19 | namespace klex::regular {
 20 | 
 21 | class RuleParser {
 22 |   public:
 23 | 	explicit RuleParser(std::unique_ptr<std::istream> input, int firstTerminalId = FirstUserTag);
 24 | 	explicit RuleParser(std::string input, int firstTerminalId = FirstUserTag);
 25 | 
 26 | 	RuleList parseRules();
 27 | 
 28 | 	class UnexpectedChar;
 29 | 	class UnexpectedToken;
 30 | 	class InvalidRuleOption;
 31 | 	class InvalidRefRuleWithConditions;
 32 | 	class DuplicateRule;
 33 | 
 34 |   private:
 35 | 	void parseRule(RuleList& rules);
 36 | 	std::vector<std::string> parseRuleConditions();
 37 | 	void parseBasicRule(RuleList& rules, std::vector<std::string>&& conditions);
 38 | 	std::string parseExpression();
 39 | 
 40 |   private:
 41 | 	std::string consumeToken();
 42 | 	void consumeAnySP();
 43 | 	void consumeSP();
 44 | 	void consumeAssoc();
 45 | 	void consumeSpace();
 46 | 	char currentChar() const noexcept;
 47 | 	char consumeChar(char ch);
 48 | 	char consumeChar();
 49 | 	bool eof() const noexcept;
 50 | 	std::string replaceRefs(const std::string& pattern);
 51 | 
 52 |   private:
 53 | 	std::unique_ptr<std::istream> stream_;
 54 | 	std::map<std::string, Rule> refRules_;
 55 | 	Rule* lastParsedRule_;
 56 | 	bool lastParsedRuleIsRef_;
 57 | 	char currentChar_;
 58 | 	unsigned int line_;
 59 | 	unsigned int column_;
 60 | 	unsigned int offset_;
 61 | 	int nextTag_;
 62 | };
 63 | 
 64 | class RuleParser::InvalidRefRuleWithConditions : public std::runtime_error {
 65 |   public:
 66 | 	InvalidRefRuleWithConditions(unsigned line, unsigned column, Rule&& rule)
 67 | 		: std::runtime_error{fmt::format(
 68 | 			  "{}:{}: Invalid rule \"{}\". Reference rules must not be labelled with conditions.", line,
 69 | 			  column, rule.name)},
 70 | 		  rule_{std::move(rule)}
 71 | 	{
 72 | 	}
 73 | 
 74 | 	const Rule& rule() const noexcept { return rule_; }
 75 | 
 76 |   private:
 77 | 	const Rule rule_;
 78 | };
 79 | 
 80 | class RuleParser::DuplicateRule : public std::runtime_error {
 81 |   public:
 82 | 	DuplicateRule(Rule&& duplicate, const Rule& other)
 83 | 		: std::runtime_error{fmt::format(
 84 | 			  "{}:{}: Duplicated rule definition with name \"{}\", previously defined in {}:{}.",
 85 | 			  duplicate.line, duplicate.column, duplicate.name, other.line, other.column)},
 86 | 		  duplicate_{std::move(duplicate)},
 87 | 		  other_{other}
 88 | 	{
 89 | 	}
 90 | 
 91 | 	const Rule& duplicate() const noexcept { return duplicate_; }
 92 | 	const Rule& other() const noexcept { return other_; }
 93 | 
 94 |   private:
 95 | 	const Rule duplicate_;
 96 | 	const Rule& other_;
 97 | };
 98 | 
 99 | class RuleParser::UnexpectedToken : public std::runtime_error {
100 |   public:
101 | 	UnexpectedToken(unsigned offset, char actual, std::string expected)
102 | 		: std::runtime_error{fmt::format("{}: Unexpected token {}, expected <{}> instead.", offset, actual,
103 | 										 expected)},
104 | 		  offset_{offset},
105 | 		  actual_{std::move(actual)},
106 | 		  expected_{std::move(expected)}
107 | 	{
108 | 	}
109 | 
110 | 	unsigned offset() const noexcept { return offset_; }
111 | 	char actual() const noexcept { return actual_; }
112 | 	const std::string& expected() const noexcept { return expected_; }
113 | 
114 |   private:
115 | 	unsigned offset_;
116 | 	char actual_;
117 | 	std::string expected_;
118 | };
119 | 
120 | class RuleParser::UnexpectedChar : public std::runtime_error {
121 |   public:
122 | 	UnexpectedChar(unsigned int line, unsigned int column, char actual, char expected)
123 | 		: std::runtime_error{fmt::format("[{}:{}] Unexpected char {}, expected {} instead.", line, column,
124 | 										 quoted(actual), quoted(expected))},
125 | 		  line_{line},
126 | 		  column_{column},
127 | 		  actual_{actual},
128 | 		  expected_{expected}
129 | 	{
130 | 	}
131 | 
132 | 	unsigned int line() const noexcept { return line_; }
133 | 	unsigned int column() const noexcept { return column_; }
134 | 	char actual() const noexcept { return actual_; }
135 | 	char expected() const noexcept { return expected_; }
136 | 
137 |   private:
138 | 	static std::string quoted(char ch)
139 | 	{
140 | 		if (ch < 0)
141 | 			return "<<EOF>>";
142 | 		else
143 | 			return fmt::format("'{}'", ch);
144 | 	}
145 | 
146 |   private:
147 | 	unsigned int line_;
148 | 	unsigned int column_;
149 | 	char actual_;
150 | 	char expected_;
151 | };
152 | 
153 | class RuleParser::InvalidRuleOption : public std::runtime_error {
154 |   public:
155 | 	InvalidRuleOption(unsigned offset, std::string option)
156 | 		: std::runtime_error{fmt::format("{}: Invalid rule option \"{}\".", offset, option)},
157 | 		  offset_{offset},
158 | 		  option_{option}
159 | 	{
160 | 	}
161 | 
162 | 	unsigned offset() const noexcept { return offset_; }
163 | 	const std::string& option() const noexcept { return option_; }
164 | 
165 |   private:
166 | 	unsigned offset_;
167 | 	std::string option_;
168 | };
169 | 
170 | }  // namespace klex::regular
171 | 


--------------------------------------------------------------------------------
/src/klex/regular/DFA.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/DFA.h>
  9 | #include <klex/regular/DotVisitor.h>
 10 | #include <klex/regular/NFA.h>
 11 | 
 12 | #include <deque>
 13 | #include <iostream>
 14 | #include <map>
 15 | #include <sstream>
 16 | #include <vector>
 17 | 
 18 | #if 0
 19 |     #define DEBUG(msg, ...)                                \
 20 |         do                                                 \
 21 |         {                                                  \
 22 |             cerr << fmt::format(msg, __VA_ARGS__) << "\n"; \
 23 |         } while (0)
 24 | #else
 25 |     #define DEBUG(msg, ...) \
 26 |         do                  \
 27 |         {                   \
 28 |         } while (0)
 29 | #endif
 30 | 
 31 | using namespace std;
 32 | 
 33 | namespace klex::regular
 34 | {
 35 | 
 36 | Alphabet DFA::alphabet() const
 37 | {
 38 |     Alphabet alphabet;
 39 |     for (const State& state: states_)
 40 |         for (const pair<Symbol, StateId>& t: state.transitions)
 41 |             alphabet.insert(t.first);
 42 | 
 43 |     return alphabet;
 44 | }
 45 | 
 46 | vector<StateId> DFA::acceptStates() const
 47 | {
 48 |     vector<StateId> states;
 49 |     states.reserve(acceptTags_.size());
 50 |     for_each(begin(acceptTags_), end(acceptTags_), [&](const pair<StateId, Tag>& s) {
 51 |         states.push_back(s.first);
 52 |     });
 53 |     return states;
 54 | }
 55 | 
 56 | // --------------------------------------------------------------------------
 57 | 
 58 | void DFA::createStates(size_t count)
 59 | {
 60 |     states_.resize(states_.size() + count);
 61 | }
 62 | 
 63 | void DFA::setInitialState(StateId s)
 64 | {
 65 |     // TODO: assert (s is having no predecessors)
 66 |     initialState_ = s;
 67 | }
 68 | 
 69 | void DFA::setTransition(StateId from, Symbol symbol, StateId to)
 70 | {
 71 |     // if (auto i = states_[from].transitions.find(symbol); i != states_[from].transitions.end())
 72 |     // 	fmt::print("overwriting transition! {} --({})--> {} (new: {})\n", from, prettySymbol(symbol),
 73 |     // 		   i->second, to);
 74 | 
 75 |     // XXX assert(s.transitions.find(symbol) == s.transitions.end());
 76 |     states_[from].transitions[symbol] = to;
 77 | }
 78 | 
 79 | void DFA::removeTransition(StateId from, Symbol symbol)
 80 | {
 81 |     State& s = states_[from];
 82 |     if (auto i = s.transitions.find(symbol); i != s.transitions.end())
 83 |         s.transitions.erase(i);
 84 | }
 85 | 
 86 | StateId DFA::append(DFA&& other, StateId q0)
 87 | {
 88 |     assert(other.initialState() == 0);
 89 | 
 90 |     other.prepareStateIds(states_.size(), q0);
 91 | 
 92 |     states_.reserve(size() + other.size() - 1);
 93 |     states_[q0] = other.states_[0];
 94 |     states_.insert(states_.end(), next(other.states_.begin()), other.states_.end());
 95 |     backtrackStates_.insert(other.backtrackStates_.begin(), other.backtrackStates_.end());
 96 |     acceptTags_.insert(other.acceptTags_.begin(), other.acceptTags_.end());
 97 | 
 98 |     return other.initialState();
 99 | }
100 | 
101 | void DFA::prepareStateIds(StateId baseId, StateId q0)
102 | {
103 |     // adjust transition state IDs
104 |     // traverse through each state's transition set
105 |     //    traverse through each transition in the transition set
106 |     //        traverse through each element and add BASE_ID
107 | 
108 |     auto transformId = [baseId, q0, this](StateId s) -> StateId {
109 |         // we subtract 1, because we already have a slot for q0 elsewhere (pre-allocated)
110 |         return s != initialState_ ? baseId + s - 1 : q0;
111 |     };
112 | 
113 |     // for each state's transitions
114 |     for (State& state: states_)
115 |         for (pair<const Symbol, StateId>& t: state.transitions)
116 |             t.second = transformId(t.second);
117 | 
118 |     AcceptMap remapped;
119 |     for (auto& a: acceptTags_)
120 |         remapped[transformId(a.first)] = a.second;
121 |     acceptTags_ = move(remapped);
122 | 
123 |     BacktrackingMap backtracking;
124 |     for (const auto& bt: backtrackStates_)
125 |         backtracking[transformId(bt.first)] = transformId(bt.second);
126 |     backtrackStates_ = move(backtracking);
127 | 
128 |     initialState_ = q0;
129 | }
130 | 
131 | void DFA::visit(DotVisitor& v) const
132 | {
133 |     v.start(initialState_);
134 | 
135 |     // STATE: initial
136 |     v.visitNode(initialState_, true, isAccepting(initialState_));
137 | 
138 |     // STATE: accepting
139 |     for (StateId s: acceptStates())
140 |         if (s != initialState_)
141 |             v.visitNode(s, false, true);
142 | 
143 |     // STATE: any other
144 |     for (StateId s = 0, sE = lastState(); s != sE; ++s)
145 |         if (s != initialState_ && !isAccepting(s))
146 |             v.visitNode(s, false, false);
147 | 
148 |     // TRANSITIONS
149 |     for (StateId s = 0, sE = size(); s != sE; ++s)
150 |     {
151 |         const TransitionMap& T = states_[s].transitions;
152 |         for_each(T.begin(), T.end(), [&](const auto& t) { v.visitEdge(s, t.second, t.first); });
153 |         for_each(T.begin(), T.end(), [&](const auto& t) { v.endVisitEdge(s, t.second); });
154 |     }
155 |     v.end();
156 | }
157 | 
158 | } // namespace klex::regular
159 | 


--------------------------------------------------------------------------------
/examples/mathexpr.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/regular/Compiler.h>
  9 | #include <klex/regular/DFA.h>
 10 | #include <klex/regular/DotWriter.h>
 11 | #include <klex/regular/Lexable.h>
 12 | #include <klex/util/Flags.h>
 13 | 
 14 | #include <fmt/format.h>
 15 | 
 16 | #include <algorithm>
 17 | #include <iostream>
 18 | #include <memory>
 19 | #include <sstream>
 20 | #include <stdexcept>
 21 | #include <string_view>
 22 | 
 23 | enum class Token
 24 | {
 25 |     Eof = 1,
 26 |     Plus,
 27 |     Minus,
 28 |     Mul,
 29 |     Div,
 30 |     RndOpen,
 31 |     RndClose,
 32 |     Number,
 33 |     INVALID
 34 | };
 35 | std::string RULES = R"(
 36 |     Space(ignore) ::= [\s\t]+
 37 |     Eof           ::= <<EOF>>
 38 |     Plus          ::= "+"
 39 |     Minus         ::= "-"
 40 |     Mul           ::= "*"
 41 |     Div           ::= "/"
 42 |     RndOpen       ::= "("
 43 |     RndClose      ::= \)
 44 |     Number        ::= ([0-9]+|[0-9]{1,3}(_[0-9]{3})*)
 45 |     INVALID       ::= .
 46 | )";
 47 | 
 48 | using Lexable = klex::regular::Lexable<Token>;
 49 | using Lexer = Lexable::iterator;
 50 | using Number = long long int;
 51 | 
 52 | std::string_view to_string(Token t)
 53 | {
 54 |     switch (t)
 55 |     {
 56 |         case Token::INVALID: return "<<INVALID>>";
 57 |         case Token::Eof: return "<<EOF>>";
 58 |         case Token::RndOpen: return "'('";
 59 |         case Token::RndClose: return "')'";
 60 |         case Token::Plus: return "'+'";
 61 |         case Token::Minus: return "'-'";
 62 |         case Token::Mul: return "'*'";
 63 |         case Token::Div: return "'/'";
 64 |         case Token::Number: return "<<NUMBER>>";
 65 |         default: abort();
 66 |     }
 67 | }
 68 | 
 69 | namespace fmt
 70 | {
 71 | template <>
 72 | struct formatter<Token>: formatter<std::string_view>
 73 | {
 74 |     template <typename FormatContext>
 75 |     auto format(Token v, FormatContext& ctx)
 76 |     {
 77 |         return formatter<std::string_view>::format(to_string(v), ctx);
 78 |     }
 79 | };
 80 | } // namespace fmt
 81 | 
 82 | Number expr(Lexer&);
 83 | 
 84 | void consume(Lexer& lexer, Token t)
 85 | {
 86 |     if (lexer.token() != t)
 87 |         throw std::runtime_error { fmt::format(
 88 |             "Unexpected token {}. Expected {} instead.", lexer.token(), t) };
 89 |     ++lexer;
 90 | }
 91 | 
 92 | auto primaryExpr(Lexer& lexer)
 93 | {
 94 |     switch (lexer.token())
 95 |     {
 96 |         case Token::Number: {
 97 |             std::string s;
 98 |             std::for_each(begin(literal(lexer)), end(literal(lexer)), [&](char ch) {
 99 |                 if (ch != '_')
100 |                     s += ch;
101 |             });
102 |             auto y = Number { std::stoi(s) };
103 |             ++lexer;
104 |             return y;
105 |         }
106 |         case Token::Minus: return -1 * primaryExpr(++lexer);
107 |         case Token::RndOpen: {
108 |             auto y = expr(++lexer);
109 |             consume(lexer, Token::RndClose);
110 |             return y;
111 |         }
112 |         default:
113 |             throw std::runtime_error { fmt::format(
114 |                 "Unexpected token {}. Expected primary expression instead.", lexer.token()) };
115 |     }
116 | }
117 | 
118 | auto mulExpr(Lexer& lexer)
119 | {
120 |     auto lhs = primaryExpr(lexer);
121 |     for (;;)
122 |     {
123 |         switch (lexer.token())
124 |         {
125 |             case Token::Mul: lhs = lhs * primaryExpr(++lexer); break;
126 |             case Token::Div: lhs = lhs / primaryExpr(++lexer); break;
127 |             default: return lhs;
128 |         }
129 |     }
130 | }
131 | 
132 | auto addExpr(Lexer& lexer)
133 | {
134 |     auto lhs = mulExpr(lexer);
135 |     for (;;)
136 |     {
137 |         switch (lexer.token())
138 |         {
139 |             case Token::Plus: lhs = lhs + mulExpr(++lexer); break;
140 |             case Token::Minus: lhs = lhs - mulExpr(++lexer); break;
141 |             default: return lhs;
142 |         }
143 |     }
144 | }
145 | 
146 | Number expr(Lexer& lexer)
147 | {
148 |     return addExpr(lexer);
149 | }
150 | 
151 | Number parseExpr(Lexable&& lexer)
152 | {
153 |     auto it = begin(lexer);
154 |     auto n = expr(it);
155 |     consume(it, Token::Eof);
156 |     return n;
157 | }
158 | 
159 | int main(int argc, const char* argv[])
160 | {
161 |     auto flags = klex::util::Flags {};
162 |     flags.defineBool("dfa", 'x', "Dumps DFA dotfile and exits.");
163 |     flags.enableParameters("EXPRESSION", "Mathematical expression to calculate");
164 |     flags.parse(argc, argv);
165 | 
166 |     auto cc = klex::regular::Compiler {};
167 |     cc.parse(std::make_unique<std::stringstream>(RULES));
168 | 
169 |     if (flags.getBool("dfa"))
170 |     {
171 |         auto writer = klex::regular::DotWriter { std::cout, "n" };
172 |         auto dfa = cc.compileMinimalDFA();
173 |         dfa.visit(writer);
174 |         return EXIT_SUCCESS;
175 |     }
176 | 
177 |     auto input = std::string { argc == 1 ? std::string("2+3*4") : flags.parameters()[0] };
178 |     auto ld = cc.compile();
179 | 
180 |     auto n = parseExpr(Lexable { ld, std::make_unique<std::stringstream>(input) });
181 |     std::cerr << fmt::format("{} = {}\n", input, n);
182 | 
183 |     return EXIT_SUCCESS;
184 | }
185 | 


--------------------------------------------------------------------------------
/src/klex/regular/Symbols.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //   (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <fmt/format.h>
 10 | 
 11 | #include <algorithm>
 12 | #include <cassert>
 13 | #include <list>
 14 | #include <memory>
 15 | #include <set>
 16 | #include <string>
 17 | #include <string_view>
 18 | #include <unordered_map>
 19 | #include <vector>
 20 | 
 21 | namespace klex::regular {
 22 | 
 23 | //! input symbol as used for transitions
 24 | using Symbol = int;
 25 | 
 26 | std::string prettySymbol(Symbol input);
 27 | std::string prettyCharRange(Symbol ymin, Symbol ymax);
 28 | std::string groupCharacterClassRanges(const std::vector<bool>& syms);
 29 | std::string groupCharacterClassRanges(std::vector<Symbol> syms);
 30 | 
 31 | // new way of wrapping up Symbols
 32 | struct Symbols {
 33 | 	constexpr static Symbol Epsilon = -1;
 34 | 	constexpr static Symbol Error = -2;
 35 | 	constexpr static Symbol BeginOfLine = -3;
 36 | 	constexpr static Symbol EndOfLine = -4;
 37 | 	constexpr static Symbol EndOfFile = -5;
 38 | 	constexpr static Symbol Character(char ch) { return Symbol(ch); }
 39 | 
 40 | 	constexpr static bool isSpecial(Symbol s)
 41 | 	{
 42 | 		switch (s)
 43 | 		{
 44 | 			case Symbols::EndOfFile:
 45 | 			case Symbols::EndOfLine:
 46 | 			case Symbols::BeginOfLine:
 47 | 			case Symbols::Epsilon:
 48 | 			case Symbols::Error:
 49 | 				return true;
 50 | 			default:
 51 | 				return false;
 52 | 		}
 53 | 	}
 54 | };
 55 | 
 56 | /**
 57 |  * Represents a set of symbols.
 58 |  */
 59 | class SymbolSet {
 60 |   public:
 61 | 	enum DotMode { Dot };
 62 | 
 63 | 	explicit SymbolSet(DotMode);
 64 | 	SymbolSet() : set_(256, false), size_{0}, hash_{2166136261} {}
 65 | 
 66 | 	explicit SymbolSet(std::initializer_list<Symbol> list) : SymbolSet()
 67 | 	{
 68 | 		std::for_each(list.begin(), list.end(), [this](Symbol s) { insert(s); });
 69 | 	}
 70 | 
 71 | 	bool empty() const noexcept { return size_ == 0; }
 72 | 	size_t size() const noexcept { return size_; }
 73 | 
 74 | 	//! Transforms into the complement set.
 75 | 	void complement();
 76 | 
 77 | 	//! Inserts given Symbol @p s into this set.
 78 | 	void insert(Symbol s)
 79 | 	{
 80 | 		if (!contains(s))
 81 | 		{
 82 | 			set_[s] = true;
 83 | 			hash_ = (hash_ * 16777619) ^ s;
 84 | 			size_++;
 85 | 		}
 86 | 	}
 87 | 
 88 | 	//! Inserts a range of Simples between [a, b].
 89 | 	void insert(const std::pair<Symbol, Symbol>& range)
 90 | 	{
 91 | 		for (Symbol s = range.first; s <= range.second; ++s)
 92 | 		{
 93 | 			insert(s);
 94 | 		}
 95 | 	}
 96 | 
 97 | 	//! @returns whether or not given Symbol @p s is in this set.
 98 | 	bool contains(Symbol s) const
 99 | 	{
100 | 		assert(s >= 0 && s <= 255 && "Only ASCII allowed.");
101 | 		return set_[(size_t) s];
102 | 	}
103 | 
104 | 	//! Tests whether or not this SymbolSet can be represented as dot (.), i.e. all but \n.
105 | 	bool isDot() const noexcept;
106 | 
107 | 	//! @returns a human readable representation of this set
108 | 	std::string to_string() const;
109 | 
110 | 	bool operator==(const SymbolSet& rhs) const noexcept { return hash_ == rhs.hash_ && set_ == rhs.set_; }
111 | 	bool operator!=(const SymbolSet& rhs) const noexcept { return !(*this == rhs); }
112 | 
113 | 	class const_iterator {  // {{{
114 | 	  public:
115 | 		const_iterator(std::vector<bool>::const_iterator beg, std::vector<bool>::const_iterator end, size_t n)
116 | 			: beg_{std::move(beg)}, end_{std::move(end)}, offset_{n}
117 | 		{
118 | 			while (beg_ != end_ && !*beg_)
119 | 			{
120 | 				++beg_;
121 | 				++offset_;
122 | 			}
123 | 		}
124 | 
125 | 		Symbol operator*() const { return static_cast<Symbol>(offset_); }
126 | 
127 | 		const_iterator& operator++(int)
128 | 		{
129 | 			do
130 | 			{
131 | 				++beg_;
132 | 				++offset_;
133 | 			} while (beg_ != end_ && !*beg_);
134 | 			return *this;
135 | 		}
136 | 
137 | 		const_iterator& operator++()
138 | 		{
139 | 			do
140 | 			{
141 | 				beg_++;
142 | 				offset_++;
143 | 			} while (beg_ != end_ && !*beg_);
144 | 			return *this;
145 | 		}
146 | 
147 | 		bool operator==(const const_iterator& rhs) const noexcept { return beg_ == rhs.beg_; }
148 | 		bool operator!=(const const_iterator& rhs) const noexcept { return beg_ != rhs.beg_; }
149 | 
150 | 	  private:
151 | 		std::vector<bool>::const_iterator beg_;
152 | 		std::vector<bool>::const_iterator end_;
153 | 		size_t offset_;
154 | 	};  // }}}
155 | 
156 | 	const_iterator begin() const { return const_iterator(set_.begin(), set_.end(), 0); }
157 | 	const_iterator end() const { return const_iterator(set_.end(), set_.end(), set_.size()); }
158 | 
159 | 	size_t hash() const noexcept { return hash_; }
160 | 
161 |   private:
162 | 	void recalculateHash();
163 | 
164 |   private:
165 | 	// XXX we chose vector<bool> as it is an optimized bit vector
166 | 	std::vector<bool> set_;
167 | 	size_t size_;
168 | 	size_t hash_;
169 | };
170 | 
171 | }  // namespace klex::regular
172 | 
173 | namespace fmt {
174 | template <>
175 | struct formatter<klex::regular::SymbolSet> {
176 | 	template <typename ParseContext>
177 | 	constexpr auto parse(ParseContext& ctx)
178 | 	{
179 | 		return ctx.begin();
180 | 	}
181 | 
182 | 	template <typename FormatContext>
183 | 	constexpr auto format(const klex::regular::SymbolSet& v, FormatContext& ctx)
184 | 	{
185 | 		return format_to(ctx.out(), "{}", v.to_string());
186 | 	}
187 | };
188 | }  // namespace fmt
189 | 
190 | namespace std {
191 | template <>
192 | struct hash<klex::regular::SymbolSet> {
193 | 	size_t operator()(const klex::regular::SymbolSet& set) const { return set.hash(); }
194 | };
195 | }  // namespace std
196 | 


--------------------------------------------------------------------------------
/src/klex/cfg/ll/Analyzer_test.cpp:
--------------------------------------------------------------------------------
  1 | // This file is part of the "klex" project, http://github.com/christianparpart/klex>
  2 | //	 (c) 2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | 
  8 | #include <klex/Report.h>
  9 | #include <klex/cfg/Grammar.h>
 10 | #include <klex/cfg/GrammarLexer.h>
 11 | #include <klex/cfg/GrammarParser.h>
 12 | #include <klex/cfg/ll/Analyzer.h>
 13 | #include <klex/cfg/ll/SyntaxTable.h>
 14 | #include <klex/regular/Compiler.h>
 15 | #include <klex/regular/Lexer.h>
 16 | #include <klex/util/literals.h>
 17 | #include <klex/util/testing.h>
 18 | 
 19 | #include <variant>
 20 | 
 21 | using namespace std;
 22 | using namespace klex;
 23 | using namespace klex::cfg;
 24 | using namespace klex::cfg::ll;
 25 | using namespace klex::util::literals;
 26 | 
 27 | const string balancedParentheses = "A ::= '(' A ')' | '(' ')'";
 28 | 
 29 | TEST(cfg_ll_Analyzer, ETF)
 30 | {
 31 |     ConsoleReport report;
 32 |     Grammar grammar = GrammarParser(R"(`token {
 33 | 		   `  Spacing(ignore) ::= [\s\t\n]+
 34 | 		   `  Number          ::= [0-9]+
 35 | 		   `}
 36 | 		   `Start     ::= Expr;
 37 | 		   `Expr      ::= Term Expr_;
 38 | 		   `Expr_     ::= '+' Term Expr_
 39 | 		   `            | ;
 40 | 		   `Term      ::= Factor Term_;
 41 | 		   `Term_     ::= '*' Factor Term_
 42 | 		   `            | ;
 43 | 		   `Factor    ::= Number
 44 | 		   `            | '(' Expr ')'
 45 | 		   `            ;
 46 | 		   `)"_multiline,
 47 |                                     &report)
 48 |                           .parse();
 49 | 
 50 |     ASSERT_FALSE(report.containsFailures());
 51 |     grammar.finalize();
 52 |     log("GRAMMAR:");
 53 |     log(grammar.dump());
 54 | 
 55 |     SyntaxTable st = SyntaxTable::construct(grammar);
 56 | 
 57 |     log("SYNTAX TABLE:");
 58 |     log(st.dump(grammar));
 59 | 
 60 |     Analyzer<int> parser(st, &report, "2 + 3");
 61 | 
 62 |     const optional<int> result = parser.analyze();
 63 | 
 64 |     ASSERT_FALSE(report.containsFailures());
 65 |     ASSERT_TRUE(result.has_value());
 66 | }
 67 | 
 68 | TEST(cfg_ll_Analyzer, action1)
 69 | {
 70 |     BufferedReport report;
 71 |     Grammar grammar = GrammarParser(R"(`
 72 | 			   `token {
 73 | 			   `  Spacing(ignore) ::= [\s\t\n]+
 74 | 			   `  Number          ::= [0-9]+
 75 | 			   `}
 76 | 			   `Start     ::= F '+' F    {add};
 77 | 			   `F         ::= Number     {num};
 78 | 			   `)"_multiline,
 79 |                                     &report)
 80 |                           .parse();
 81 |     ASSERT_FALSE(report.containsFailures());
 82 |     grammar.finalize();
 83 | 
 84 |     log("GRAMMAR:");
 85 |     log(grammar.dump());
 86 | 
 87 |     SyntaxTable st = SyntaxTable::construct(grammar);
 88 | 
 89 |     log("SYNTAX TABLE:");
 90 |     log(st.dump(grammar));
 91 | 
 92 |     deque<vector<int>> valueStack;
 93 |     valueStack.emplace_back(vector<int>());
 94 |     const auto actionHandler = [&](int id, const Analyzer<int>& analyzer) -> int {
 95 |         log(fmt::format("-> run action({}): {}", id, analyzer.actionName(id)));
 96 |         if (analyzer.actionName(id) == "add")
 97 |             // S = F '+' F <<EOF>> {add}
 98 |             return analyzer.semanticValue(-2) + analyzer.semanticValue(-4);
 99 |         else if (analyzer.actionName(id) == "num")
100 |             return stoi(analyzer.lastLiteral()); // return valueStack[-1]
101 |         else
102 |         {
103 |             log("!!! UNKNOWN ACTION !!!");
104 |             return -1;
105 |         }
106 |     };
107 | 
108 |     Analyzer<int> parser(st, &report, "2 + 3", actionHandler);
109 |     optional<int> result = parser.analyze();
110 | 
111 |     ASSERT_TRUE(result.has_value());
112 |     ASSERT_EQ(5, *result);
113 | }
114 | 
115 | TEST(cfg_ll_Analyzer, ETF_with_actions)
116 | {
117 |     ConsoleReport report;
118 |     Grammar grammar = GrammarParser(
119 |                           R"(`token {
120 | 		   `  Spacing(ignore) ::= [\s\t\n]+
121 | 		   `  Number          ::= [0-9]+
122 | 		   `}
123 | 		   `Start     ::= Expr;
124 | 		   `Expr      ::= Term Expr_
125 | 		   `            ;
126 | 		   `Expr_     ::= '+' Term Expr_    {add}
127 | 		   `            |
128 | 		   `            ;
129 | 		   `Term      ::= Factor Term_
130 | 		   `            ;
131 | 		   `Term_     ::= '*' Factor Term_  {mul}
132 | 		   `            |
133 | 		   `            ;
134 | 		   `Factor    ::= Number            {num}
135 | 		   `            | '(' Expr ')'
136 | 		   `            ;
137 | 		   `)"_multiline,
138 |                           &report)
139 |                           .parse();
140 | 
141 |     ASSERT_FALSE(report.containsFailures());
142 |     grammar.finalize();
143 |     log("GRAMMAR:");
144 |     log(grammar.dump());
145 | 
146 |     SyntaxTable st = SyntaxTable::construct(grammar);
147 |     log("SYNTAX TABLE:");
148 |     log(st.dump(grammar));
149 | 
150 |     stack<int> stack;
151 |     const map<int, function<int(const Analyzer<int>&)>> actionMap {
152 |         { st.actionId("num"),
153 |           [&](const Analyzer<int>& analyzer) -> int {
154 |               return stoi(analyzer.lastLiteral());
155 |           } },
156 |         { st.actionId("add"),
157 |           [&](const Analyzer<int>& analyzer) -> int {
158 |               return analyzer.semanticValue(-2) + analyzer.semanticValue(-4);
159 |           } },
160 |         { st.actionId("mul"),
161 |           [&](const Analyzer<int>& analyzer) -> int {
162 |               return analyzer.semanticValue(-2) * analyzer.semanticValue(-4);
163 |           } },
164 |     };
165 | 
166 |     const auto actionHandler = [&](int id, const Analyzer<int>& analyzer) -> int {
167 |         if (const auto x = actionMap.find(id); x != actionMap.end())
168 |         {
169 |             log(fmt::format("-> run action({}): {}", id, analyzer.actionName(id)));
170 |             return x->second(analyzer);
171 |         }
172 |         assert(!"woot");
173 |         return 0;
174 |     };
175 | 
176 |     ASSERT_FALSE(report.containsFailures());
177 |     Analyzer<int> parser(st, &report, "2 + 3 * 4", actionHandler);
178 |     optional<int> result = parser.analyze();
179 | 
180 |     EXPECT_FALSE(report.containsFailures());
181 |     ASSERT_TRUE(result.has_value());
182 |     // TODO EXPECT_EQ(14, *result);
183 | }
184 | 
185 | // vim:ts=4:sw=4:noet
186 | 


--------------------------------------------------------------------------------
/src/klex/util/Flags.h:
--------------------------------------------------------------------------------
  1 | // This file is part of the "x0" project, // http://github.com/christianparpart/x0>
  2 | //   (c) 2009-2018 Christian Parpart <christian@parpart.family>
  3 | //
  4 | // Licensed under the MIT License (the "License"); you may not use this
  5 | // file except in compliance with the License. You may obtain a copy of
  6 | // the License at: http://opensource.org/licenses/MIT
  7 | #pragma once
  8 | 
  9 | #include <functional>
 10 | #include <list>
 11 | #include <optional>
 12 | #include <string>
 13 | #include <string_view>
 14 | #include <system_error>
 15 | #include <unordered_map>
 16 | #include <utility>
 17 | #include <vector>
 18 | 
 19 | namespace klex::util {
 20 | 
 21 | class Flags {
 22 |   public:
 23 |     enum class FlagType {
 24 |         String,
 25 |         Number,
 26 |         Float,
 27 |         Bool,
 28 |     };
 29 | 
 30 |     // FlagPassingStyle
 31 |     enum FlagStyle { ShortSwitch, LongSwitch, ShortWithValue, LongWithValue, UnnamedParameter };
 32 | 
 33 |     enum class ErrorCode {
 34 |         TypeMismatch,
 35 |         UnknownOption,
 36 |         MissingOption,
 37 |         MissingOptionValue,
 38 |         NotFound,
 39 |     };
 40 | 
 41 |     class Error : public std::runtime_error {
 42 |       public:
 43 |         Error(ErrorCode code, std::string arg);
 44 | 
 45 |         ErrorCode code() const noexcept { return code_; }
 46 |         const std::string& arg() const noexcept { return arg_; }
 47 | 
 48 |       private:
 49 |         ErrorCode code_;
 50 |         std::string arg_;
 51 |     };
 52 | 
 53 |     struct FlagDef;
 54 |     class Flag;
 55 | 
 56 |     Flags();
 57 | 
 58 |     std::string getString(const std::string& flag) const;
 59 |     std::string asString(const std::string& flag) const;
 60 |     long int getNumber(const std::string& flag) const;
 61 |     float getFloat(const std::string& flag) const;
 62 |     bool getBool(const std::string& flag) const;
 63 | 
 64 |     const std::vector<std::string>& parameters() const;
 65 |     void setParameters(const std::vector<std::string>& v);
 66 | 
 67 |     size_t size() const { return set_.size(); }
 68 | 
 69 |     std::string to_s() const;
 70 | 
 71 |     void set(const Flag& flag);
 72 |     void set(const std::string& opt, const std::string& val, FlagStyle fs, FlagType ft);
 73 |     bool isSet(const std::string& flag) const;
 74 | 
 75 |     Flags& defineString(const std::string& longOpt, char shortOpt, const std::string& valuePlaceholder,
 76 |                         const std::string& helpText, std::optional<std::string> defaultValue = std::nullopt,
 77 |                         std::function<void(const std::string&)> callback = nullptr);
 78 | 
 79 |     Flags& defineNumber(const std::string& longOpt, char shortOpt, const std::string& valuePlaceholder,
 80 |                         const std::string& helpText, std::optional<long int> defaultValue = std::nullopt,
 81 |                         std::function<void(long int)> callback = nullptr);
 82 | 
 83 |     Flags& defineFloat(const std::string& longOpt, char shortOpt, const std::string& valuePlaceholder,
 84 |                        const std::string& helpText, std::optional<float> defaultValue = std::nullopt,
 85 |                        std::function<void(float)> callback = nullptr);
 86 | 
 87 |     Flags& defineBool(const std::string& longOpt, char shortOpt, const std::string& helpText,
 88 |                       std::function<void(bool)> callback = nullptr);
 89 | 
 90 |     Flags& enableParameters(const std::string& valuePlaceholder, const std::string& helpText);
 91 | 
 92 |     std::string helpText(std::string_view const& header = "") const { return helpText(header, 78, 30); }
 93 |     std::string helpText(std::string_view const& header, size_t width, size_t helpTextOffset) const;
 94 | 
 95 |     const FlagDef* findDef(const std::string& longOption) const;
 96 |     const FlagDef* findDef(char shortOption) const;
 97 | 
 98 |     void parse(int argc, const char* argv[]);
 99 |     void parse(const std::vector<std::string>& args);
100 | 
101 |     // Attempts to parse given arguments and returns an error code in case of parsing errors instead
102 |     // of throwing.
103 |     std::error_code tryParse(const std::vector<std::string>& args);
104 | 
105 |   private:
106 |     Flags& define(const std::string& longOpt, char shortOpt, bool required, FlagType type,
107 |                   const std::string& helpText, const std::string& valuePlaceholder,
108 |                   const std::optional<std::string>& defaultValue,
109 |                   std::function<void(const std::string&)> callback);
110 | 
111 |   private:
112 |     std::list<FlagDef> flagDefs_;
113 |     bool parametersEnabled_;  // non-option parameters enabled?
114 |     std::string parametersPlaceholder_;
115 |     std::string parametersHelpText_;
116 | 
117 |     typedef std::pair<FlagType, std::string> FlagValue;
118 |     std::unordered_map<std::string, FlagValue> set_;
119 |     std::vector<std::string> raw_;
120 | };
121 | 
122 | struct Flags::FlagDef {
123 |     FlagType type;
124 |     std::string longOption;
125 |     char shortOption;
126 |     bool required;
127 |     std::string valuePlaceholder;
128 |     std::string helpText;
129 |     std::optional<std::string> defaultValue;
130 |     std::function<void(const std::string&)> callback;
131 | 
132 |     std::string makeHelpText(size_t width, size_t helpTextOffset) const;
133 | };
134 | 
135 | class Flags::Flag {
136 |   public:
137 |     Flag(const std::string& opt, const std::string& val, FlagStyle fs, FlagType ft);
138 | 
139 |     explicit Flag(char shortOpt);
140 |     Flag(char shortOpt, const std::string& val);
141 |     Flag(const std::string& longOpt);
142 |     Flag(const std::string& longOpt, const std::string& val);
143 | 
144 |     FlagType type() const { return type_; }
145 |     const std::string& name() const { return name_; }
146 |     const std::string& value() const { return value_; }
147 | 
148 |   private:
149 |     FlagType type_;
150 |     FlagStyle style_;
151 |     std::string name_;
152 |     std::string value_;
153 | };
154 | 
155 | class FlagsErrorCategory : public std::error_category {
156 |   public:
157 |     static FlagsErrorCategory& get();
158 | 
159 |     const char* name() const noexcept override;
160 |     std::string message(int ec) const override;
161 | };
162 | 
163 | std::error_code make_error_code(Flags::ErrorCode errc);
164 | 
165 | }  // namespace klex::util
166 | 
167 | namespace std {
168 | template <>
169 | struct is_error_code_enum<klex::util::Flags::ErrorCode> : public std::true_type {
170 | };
171 | }  // namespace std
172 | 


--------------------------------------------------------------------------------