├── test ├── test.cpp ├── test.hpp ├── whitespace_token.cpp ├── tokenize.hpp ├── tokenizer.cpp ├── literal_token.cpp ├── CMakeLists.txt ├── detail │ ├── string.cpp │ └── trie.cpp ├── identifier_token.cpp ├── ascii.cpp ├── token_regex.cpp └── production_rule_token.cpp ├── .gitmodules ├── foonathan_lex-config.cmake ├── example └── CMakeLists.txt ├── benchmark ├── bm_baseline.hpp ├── CMakeLists.txt ├── bm_tokenizer.hpp ├── bm_manual.hpp ├── README.md ├── bm_trie.hpp ├── bm_manual_opt.hpp ├── bm_tokenizer_manual.hpp └── benchmark.cpp ├── include └── foonathan │ └── lex │ ├── whitespace_token.hpp │ ├── grammar.hpp │ ├── token_spec.hpp │ ├── identifier_token.hpp │ ├── detail │ ├── assert.hpp │ ├── select_integer.hpp │ ├── production_rule_base.hpp │ ├── string.hpp │ └── production_rule_postprocess.hpp │ ├── literal_token.hpp │ ├── parser.hpp │ ├── match_result.hpp │ ├── spelling.hpp │ ├── production_kind.hpp │ ├── ascii.hpp │ ├── token_kind.hpp │ ├── token.hpp │ ├── parse_error.hpp │ ├── tokenizer.hpp │ └── rule_production.hpp ├── doc ├── spec_whitespace_token.md ├── spec_token_spec.md ├── doc.md ├── spec_spelling.md ├── spec_literal_token.md ├── spec_match_result.md ├── spec_ascii.md ├── spec_identifier_token.md ├── spec_token_kind.md ├── spec_tokenizer.md ├── spec_token.md └── spec_rule_token.md ├── LICENSE.md ├── azure-pipelines.yml ├── external └── external.cmake ├── .clang-format ├── CMakeLists.txt └── README.md /test/test.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN 6 | #include 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/benchmark"] 2 | path = external/benchmark 3 | url = https://github.com/google/benchmark.git 4 | [submodule "external/debug_assert"] 5 | path = external/debug_assert 6 | url = https://github.com/foonathan/debug_assert 7 | [submodule "external/mp11"] 8 | path = external/mp11 9 | url = https://github.com/boostorg/mp11 10 | -------------------------------------------------------------------------------- /foonathan_lex-config.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018 Jonathan Müller 2 | # This file is subject to the license terms in the LICENSE file 3 | # found in the top-level directory of this distribution. 4 | 5 | include(CMakeFindDependencyMacro) 6 | find_dependency(debug_assert) 7 | include("${CMAKE_CURRENT_LIST_DIR}/foonathan_lex-targets.cmake") 8 | -------------------------------------------------------------------------------- /example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018-2019 Jonathan Müller 2 | # This file is subject to the license terms in the LICENSE file 3 | # found in the top-level directory of this distribution. 4 | 5 | add_executable(foonathan_lex_ctokenizer ctokenizer.cpp) 6 | target_link_libraries(foonathan_lex_ctokenizer PUBLIC foonathan_lex) 7 | 8 | add_executable(foonathan_lex_calculator calculator.cpp) 9 | target_link_libraries(foonathan_lex_calculator PUBLIC foonathan_lex) 10 | -------------------------------------------------------------------------------- /test/test.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_TEST_HPP_INCLUDED 6 | #define FOONATHAN_LEX_TEST_HPP_INCLUDED 7 | 8 | #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 9 | # define FOONATHAN_LEX_TEST_CONSTEXPR 10 | #elif defined(_MSC_VER) 11 | #define FOONATHAN_LEX_TEST_CONSTEXPR 12 | #else 13 | # define FOONATHAN_LEX_TEST_CONSTEXPR constexpr 14 | #endif 15 | 16 | #endif // FOONATHAN_LEX_TEST_HPP_INCLUDED 17 | -------------------------------------------------------------------------------- /benchmark/bm_baseline.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_BM_BASELINE_HPP_INCLUDED 6 | #define FOONATHAN_LEX_BM_BASELINE_HPP_INCLUDED 7 | 8 | #include 9 | 10 | void baseline(const char* str, const char* end, void (*f)(int, foonathan::lex::token_spelling)) 11 | { 12 | namespace lex = foonathan::lex; 13 | 14 | while (str != end) 15 | { 16 | f(0, lex::token_spelling(str, 1)); 17 | ++str; 18 | } 19 | } 20 | 21 | #endif // FOONATHAN_LEX_BM_BASELINE_HPP_INCLUDED 22 | -------------------------------------------------------------------------------- /include/foonathan/lex/whitespace_token.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_WHITESPACE_TOKEN_HPP_INCLUDED 6 | #define FOONATHAN_LEX_WHITESPACE_TOKEN_HPP_INCLUDED 7 | 8 | #include 9 | 10 | namespace foonathan 11 | { 12 | namespace lex 13 | { 14 | struct whitespace_token 15 | {}; 16 | 17 | template 18 | struct is_whitespace_token : std::is_base_of 19 | {}; 20 | } // namespace lex 21 | } // namespace foonathan 22 | 23 | #endif // FOONATHAN_LEX_WHITESPACE_TOKEN_HPP_INCLUDED 24 | -------------------------------------------------------------------------------- /doc/spec_whitespace_token.md: -------------------------------------------------------------------------------- 1 | # Header File `lex/whitespace_token.hpp` 2 | 3 | The file `whitespace_token.hpp` defines the whitespace token. 4 | 5 | ```cpp 6 | namespace lex 7 | { 8 | struct whitespace_token {}; 9 | 10 | // traits 11 | template 12 | struct is_whitespace_token; 13 | } 14 | ``` 15 | 16 | A token that inherits from `lex::whitespace_token` in addition to one of the other base classes is a whitespace token. 17 | When tokenizing, whitespace tokens will be automatically skipped and not created. 18 | 19 | ## Traits 20 | 21 | The traits all derive from either `std::true_type` or `std::false_type`, 22 | depending on the result of the condition. 23 | 24 | `is_whitespace_token`: whether or not `Token` is a whitespace token. 25 | -------------------------------------------------------------------------------- /doc/spec_token_spec.md: -------------------------------------------------------------------------------- 1 | # Header File `lex/token_spec.hpp` 2 | 3 | The file `token_spec.hpp` contains the token specification and base tokens. 4 | 5 | ```cpp 6 | namespace lex 7 | { 8 | // token specification 9 | template 10 | struct token_spec {}; 11 | 12 | struct error_token {}; 13 | struct eof_token {}; 14 | 15 | // traits 16 | template 17 | struct is_token; 18 | } 19 | ``` 20 | 21 | ## Token Specification 22 | 23 | The tokens in a grammar are specified by creating a class that inherits from `lex::token_spec` passing it all the token types. 24 | 25 | More details can be found in the [tutorial](). 26 | 27 | The special token types `lex::error_token`, representing an invalid character (sequence), and `lex::eof_token`, representing the end of the input, is always included. 28 | 29 | ## Traits 30 | 31 | The traits all derive from either `std::true_type` or `std::false_type`, 32 | depending on the result of the condition. 33 | 34 | `is_token`: whether or not `T` is a token type. 35 | -------------------------------------------------------------------------------- /doc/doc.md: -------------------------------------------------------------------------------- 1 | This is the documentation for foonathan/lex, 2 | consisting of a tutorial and detailed specification. 3 | 4 | * [Tutorial: Token Specification and Tokenization](tut_tokenizer.md) 5 | * Header Reference 6 | * Token Specification 7 | * [`lex/identifier_token.hpp`](spec_identifier_token.md) 8 | * [`lex/literal_token.hpp`](spec_literal_token.md) 9 | * [`lex/rule_token.hpp`](spec_rule_token.md) 10 | * [`lex/token_spec.hpp`](spec_token_spec.md) 11 | * [`lex/whitespace_token.hpp`](spec_whitespace_token.md) 12 | * Tokenization 13 | * [`lex/ascii.hpp`](spec_ascii.md) 14 | * [`lex/match_result.hpp`](spec_match_result.md) 15 | * [`lex/spelling.hpp`](spec_spelling.md) 16 | * [`lex/token.hpp`](spec_token.md) 17 | * [`lex/token_kind.hpp`](spec_token_kind.md) 18 | * [`lex/tokenizer.hpp`](spec_tokenizer.md) 19 | 20 | 21 | Code assumes the namespace alias `lex` instead of `foonathan::lex` and the include path `lex/foo.hpp` instead of `foonathan/lex/foo.hpp` for simplicity. 22 | -------------------------------------------------------------------------------- /doc/spec_spelling.md: -------------------------------------------------------------------------------- 1 | # Header File `lex/spelling.hpp` 2 | 3 | The file `spelling.hpp` contains the class `lex::token_spelling`, which is the (string) spelling of a token. 4 | 5 | ```cpp 6 | class token_spelling 7 | { 8 | public: 9 | explicit token_spelling(const char* ptr, std::size_t size); 10 | 11 | // access 12 | char operator[](std::size_t i) const noexcept; 13 | 14 | const char* begin() const noexcept; 15 | const char* end() const noexcept; 16 | 17 | const char* data() const noexcept; 18 | std::size_t size() const noexcept; 19 | }; 20 | 21 | // comparison 22 | bool operator==(token_spelling lhs, token_spelling rhs); 23 | bool operator!=(token_spelling lhs, token_spelling rhs); 24 | 25 | bool operator==(token_spelling lhs, const char* rhs); 26 | bool operator==(const char* lhs, token_spelling rhs); 27 | bool operator!=(token_spelling lhs, const char* rhs); 28 | bool operator!=(const char* lhs, token_spelling rhs); 29 | ``` 30 | 31 | It is a simple, fully `constexpr` and `noexcept` replacement of `std::string_view`. 32 | 33 | -------------------------------------------------------------------------------- /include/foonathan/lex/grammar.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_GRAMMAR_HPP_INCLUDED 6 | #define FOONATHAN_LEX_GRAMMAR_HPP_INCLUDED 7 | 8 | #include 9 | 10 | namespace foonathan 11 | { 12 | namespace lex 13 | { 14 | /// A grammar that is parsed. 15 | template 16 | struct grammar 17 | { 18 | using token_spec = TokenSpec; 19 | using start = StartProduction; 20 | using productions = boost::mp11::mp_list; 21 | }; 22 | 23 | namespace detail 24 | { 25 | struct base_production : production_rule::production_adl 26 | {}; 27 | } // namespace detail 28 | 29 | /// Whether or not the given type is a production. 30 | template 31 | struct is_production : std::is_base_of 32 | {}; 33 | } // namespace lex 34 | } // namespace foonathan 35 | 36 | #endif // FOONATHAN_LEX_GRAMMAR_HPP_INCLUDED 37 | -------------------------------------------------------------------------------- /benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018-2019 Jonathan Müller 2 | # This file is subject to the license terms in the LICENSE file 3 | # found in the top-level directory of this distribution. 4 | 5 | message(STATUS "Installing google/benchmark via submodule") 6 | execute_process(COMMAND git submodule update --init -- ../external/benchmark 7 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) 8 | set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "") 9 | set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "") 10 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/../external/benchmark ${CMAKE_CURRENT_BINARY_DIR}/../external/benchmark 11 | EXCLUDE_FROM_ALL) 12 | 13 | add_executable(foonathan_lex_benchmark 14 | benchmark.cpp 15 | bm_baseline.hpp 16 | bm_manual.hpp 17 | bm_manual_opt.hpp 18 | bm_tokenizer.hpp 19 | bm_tokenizer_manual.hpp 20 | bm_trie.hpp) 21 | target_link_libraries(foonathan_lex_benchmark PUBLIC foonathan_lex benchmark) 22 | target_compile_definitions(foonathan_lex_benchmark PUBLIC 23 | FOONATHAN_LEX_ENABLE_ASSERTIONS=0 24 | FOONATHAN_LEX_ENABLE_PRECONDITIONS=0) 25 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /doc/spec_literal_token.md: -------------------------------------------------------------------------------- 1 | # Header File `lex/literal_token.hpp` 2 | 3 | The file `literal_token.hpp` provides the literal token. 4 | 5 | ```cpp 6 | namespace lex 7 | { 8 | // token specification 9 | template 10 | struct literal_token; 11 | 12 | #define FOONATHAN_LEX_LITERAL(String) 13 | 14 | // traits 15 | template 16 | struct is_literal_token; 17 | } 18 | ``` 19 | 20 | ## Token Specification 21 | 22 | ```cpp 23 | template 24 | struct literal_token 25 | { 26 | static constexpr char name[] = { Literal..., '\0' }; 27 | }; 28 | ``` 29 | 30 | A class derived from `lex::literal_token` is a literal token. 31 | 32 | It creates a token that matches the specified character sequence. 33 | If there are multiple literal tokens sharing a common prefix, the longest literal token is selected. 34 | If a [rule token](spec_rule_token.md#token-specification) and a literal token would both match at the current input, 35 | the literal token will be created unless the rule token has specified it as conflicting. 36 | 37 | ```cpp 38 | #define FOONATHAN_LEX_LITERAL(String) 39 | ``` 40 | 41 | The macro `FOONATHAN_LEX_LITERAL(String)` is equivalent to `lex::literal_token`. 42 | All null characters are ignored. 43 | 44 | ## Traits 45 | 46 | The traits all derive from either `std::true_type` or `std::false_type`, 47 | depending on the result of the condition. 48 | 49 | `is_literal_token`: whether or not `Token` is a literal token. 50 | 51 | -------------------------------------------------------------------------------- /include/foonathan/lex/token_spec.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_TOKEN_SPEC_HPP_INCLUDED 6 | #define FOONATHAN_LEX_TOKEN_SPEC_HPP_INCLUDED 7 | 8 | #include 9 | 10 | namespace foonathan 11 | { 12 | namespace lex 13 | { 14 | namespace production_rule 15 | { 16 | struct production_adl 17 | {}; 18 | } // namespace production_rule 19 | namespace token_regex 20 | { 21 | struct regex_adl 22 | {}; 23 | } // namespace token_regex 24 | 25 | namespace detail 26 | { 27 | struct base_token : production_rule::production_adl, token_regex::regex_adl 28 | {}; 29 | } // namespace detail 30 | 31 | template 32 | struct is_token : std::is_base_of 33 | {}; 34 | 35 | struct error_token : detail::base_token 36 | { 37 | static constexpr const char* name = ""; 38 | }; 39 | 40 | struct eof_token : detail::base_token 41 | { 42 | static constexpr const char* name = ""; 43 | }; 44 | 45 | template 46 | struct token_spec 47 | { 48 | using list = boost::mp11::mp_list; 49 | }; 50 | } // namespace lex 51 | } // namespace foonathan 52 | 53 | #endif // FOONATHAN_LEX_TOKEN_SPEC_HPP_INCLUDED 54 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | name: 'CI build' 2 | trigger: 3 | branches: 4 | include: 5 | - master 6 | - feature/* 7 | paths: 8 | exclude: 9 | - README.md 10 | 11 | jobs: 12 | - job: windows 13 | pool: 14 | vmImage: 'vs2017-win2016' 15 | steps: 16 | - script: | 17 | mkdir build && cd build/ 18 | cmake ../ && cmake --build . && ctest -C Debug --output-on-failure 19 | displayName: "Compiling using Visual Studio 2017" 20 | 21 | - job: linux 22 | pool: 23 | vmImage: 'Ubuntu 16.04' 24 | strategy: 25 | matrix: 26 | GCC5: 27 | IMAGE: 'conanio/gcc5' 28 | GCC6: 29 | IMAGE: 'conanio/gcc6' 30 | GCC7: 31 | IMAGE: 'conanio/gcc7' 32 | GCC8: 33 | IMAGE: 'conanio/gcc8' 34 | clang4: 35 | IMAGE: 'conanio/clang40' 36 | clang5: 37 | IMAGE: 'conanio/clang50' 38 | clang6: 39 | IMAGE: 'conanio/clang60' 40 | clang7: 41 | IMAGE: 'conanio/clang7' 42 | DefaultGCC: 43 | IMAGE: 'foonathan/micro_cpp_gcc' 44 | DefaultClang: 45 | IMAGE: 'foonathan/micro_cpp_clang' 46 | steps: 47 | - script: docker run -u root -v "$PWD:/lex" $(IMAGE) bash -c "cmake /lex/ && cmake --build . && ctest --output-on-failure" 48 | displayName: "Compiling using $(IMAGE)" 49 | 50 | - job: macos 51 | pool: 52 | vmImage: 'macOS-10.13' 53 | strategy: 54 | matrix: 55 | XCode10: 56 | XCODE: '10' 57 | XCode9: 58 | XCODE: '9.4.1' 59 | steps: 60 | - script: | 61 | sudo xcode-select -s /Applications/Xcode_$(XCODE).app/Contents/Developer 62 | mkdir build && cd build/ 63 | cmake ../ && cmake --build . && ctest --output-on-failure 64 | displayName: "Compiling using XCode $(XCODE)" 65 | -------------------------------------------------------------------------------- /test/whitespace_token.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #include 6 | 7 | #include "tokenize.hpp" 8 | #include 9 | 10 | namespace 11 | { 12 | using test_spec = lex::token_spec; 13 | 14 | struct token_a : lex::literal_token<'a'> 15 | {}; 16 | 17 | struct token_b : lex::literal_token<'b'>, lex::whitespace_token 18 | {}; 19 | } // namespace 20 | 21 | TEST_CASE("whitespace_token") 22 | { 23 | static constexpr const char array[] = "bbabaabbb"; 24 | constexpr auto tokenizer = lex::tokenizer(array); 25 | FOONATHAN_LEX_TEST_CONSTEXPR auto result = tokenize(tokenizer); 26 | 27 | REQUIRE(result.size() == 3); 28 | 29 | REQUIRE(result[0].is(token_a{})); 30 | REQUIRE(result[0].spelling() == "a"); 31 | REQUIRE(result[0].offset(tokenizer) == 2); 32 | 33 | REQUIRE(result[1].is(token_a{})); 34 | REQUIRE(result[1].spelling() == "a"); 35 | REQUIRE(result[1].offset(tokenizer) == 4); 36 | 37 | REQUIRE(result[2].is(token_a{})); 38 | REQUIRE(result[2].spelling() == "a"); 39 | REQUIRE(result[2].offset(tokenizer) == 5); 40 | } 41 | 42 | TEST_CASE("whitespace_token and reset()") 43 | { 44 | static constexpr const char array[] = "bbabbba"; 45 | 46 | auto tokenizer = lex::tokenizer(array); 47 | REQUIRE(tokenizer.peek().is(token_a{})); 48 | REQUIRE(tokenizer.peek().offset(tokenizer) == 2); 49 | 50 | tokenizer.reset(tokenizer.current_ptr() + 1); 51 | REQUIRE(tokenizer.peek().is(token_a{})); 52 | REQUIRE(tokenizer.peek().offset(tokenizer) == 6); 53 | } 54 | -------------------------------------------------------------------------------- /include/foonathan/lex/identifier_token.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_IDENTIFIER_HPP_INCLUDED 6 | #define FOONATHAN_LEX_IDENTIFIER_HPP_INCLUDED 7 | 8 | #include 9 | #include 10 | 11 | namespace foonathan 12 | { 13 | namespace lex 14 | { 15 | template 16 | struct identifier_token : rule_token 17 | { 18 | static constexpr const char* name = ""; 19 | }; 20 | 21 | template 22 | struct is_identifier_token : detail::is_token_impl::value 23 | {}; 24 | 25 | template 26 | struct is_non_identifier_rule_token 27 | : std::integral_constant::value && !is_identifier_token::value> 29 | {}; 30 | 31 | template 32 | struct keyword_token : literal_token 33 | {}; 34 | 35 | #define FOONATHAN_LEX_KEYWORD(String) \ 36 | FOONATHAN_LEX_DETAIL_STRING(foonathan::lex::keyword_token, String) 37 | 38 | template 39 | struct is_keyword_token : detail::is_literal_token_impl::value 40 | {}; 41 | 42 | template 43 | struct is_non_keyword_literal_token 44 | : std::integral_constant::value && !is_keyword_token::value> 46 | {}; 47 | } // namespace lex 48 | } // namespace foonathan 49 | 50 | #endif // FOONATHAN_LEX_IDENTIFIER_HPP_INCLUDED 51 | -------------------------------------------------------------------------------- /doc/spec_match_result.md: -------------------------------------------------------------------------------- 1 | # Header File `lex/match_result.hpp` 2 | 3 | The file `match_result.hpp` defines the class `lex::match_result`, which is the result of a token match operation. 4 | 5 | ```cpp 6 | template 7 | struct match_result 8 | { 9 | token_kind kind; 10 | std::size_t bump; 11 | 12 | static match_result unmatched(); 13 | static match_result error(std::size_t bump); 14 | static match_result success(token_kind kind, std::size_t bump); 15 | static match_result eof(); 16 | 17 | bool is_unmatched() const; 18 | bool is_error() const; 19 | bool is_success() const; 20 | bool is_eof() const; 21 | bool is_matched() const; 22 | }; 23 | ``` 24 | 25 | All member functions are `constexpr` and `noexcept`. 26 | 27 | Creation of a `match_result` is only possible using the named functions which put it in one of four states: 28 | 29 | 1. `unmatched()`: no token was matched. 30 | Then `kind` is be `lex::error_token` and `bump` is `0`. 31 | `is_unmatched()` returns `true`, all others return `false`. 32 | 33 | 2. `error()`: the input is an error. 34 | 35 | Then `kind` is be `lex::error_token` and `bump` is the number of characters to skip, which must be `> 0`. 36 | 37 | `is_error()` and `is_matched()` return `true`, all others return `false`. 38 | 39 | 3. `success()`: the input matched a token. 40 | 41 | Then `kind` and `bump` are as specified; `bump` must be greater than `0` and `kind` not `lex::error_token` or `lex::eof_token`. 42 | 43 | `is_success()` and `is_matched()` return `true`, all others return `false`. 44 | 45 | 4. `eof()`: the end of the input was reached. 46 | 47 | Then `kind` is `lex::eof_token` and `bump` is `0`. 48 | 49 | `is_eof()` and `is_matched()` return `true`, all others return `false`. 50 | 51 | -------------------------------------------------------------------------------- /test/tokenize.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_TOKENIZE_HPP_INCLUDED 6 | #define FOONATHAN_LEX_TOKENIZE_HPP_INCLUDED 7 | 8 | #include 9 | 10 | #include "test.hpp" 11 | 12 | namespace 13 | { 14 | namespace lex = foonathan::lex; 15 | 16 | // just a minimal interface to provide what's needed 17 | template 18 | class constexpr_vector 19 | { 20 | static_assert(std::is_default_constructible::value, "type must be default constructible"); 21 | 22 | public: 23 | constexpr constexpr_vector() : array_{}, size_(0u) {} 24 | 25 | //=== access ===// 26 | constexpr bool empty() const noexcept 27 | { 28 | return size_ != 0; 29 | } 30 | 31 | constexpr std::size_t size() const noexcept 32 | { 33 | return size_; 34 | } 35 | 36 | constexpr T& operator[](std::size_t i) noexcept 37 | { 38 | return array_[i]; 39 | } 40 | constexpr const T& operator[](std::size_t i) const noexcept 41 | { 42 | return array_[i]; 43 | } 44 | 45 | //=== modifiers ===// 46 | constexpr void push_back(T element) noexcept 47 | { 48 | array_[size_] = element; 49 | ++size_; 50 | } 51 | 52 | private: 53 | T array_[MaxCapacity]; 54 | std::size_t size_; 55 | }; 56 | 57 | template 58 | using vector = constexpr_vector, 32>; 59 | 60 | template 61 | FOONATHAN_LEX_TEST_CONSTEXPR vector tokenize(lex::tokenizer tokenizer) 62 | { 63 | vector result; 64 | 65 | while (!tokenizer.is_done()) 66 | result.push_back(tokenizer.get()); 67 | 68 | return result; 69 | } 70 | } // namespace 71 | 72 | #endif // FOONATHAN_LEX_TOKENIZE_HPP_INCLUDED 73 | -------------------------------------------------------------------------------- /include/foonathan/lex/detail/assert.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_DETAIL_ASSERT_HPP_INCLUDED 6 | #define FOONATHAN_LEX_DETAIL_ASSERT_HPP_INCLUDED 7 | 8 | #include 9 | 10 | #ifndef FOONATHAN_LEX_ENABLE_ASSERTIONS 11 | # define FOONATHAN_LEX_ENABLE_ASSERTIONS 0 12 | #endif 13 | 14 | #ifndef FOONATHAN_LEX_ENABLE_PRECONDITIONS 15 | # ifdef NDEBUG 16 | # define FOONATHAN_LEX_ENABLE_PRECONDITIONS 0 17 | # else 18 | # define FOONATHAN_LEX_ENABLE_PRECONDITIONS 1 19 | # endif 20 | #endif 21 | 22 | namespace foonathan 23 | { 24 | namespace lex 25 | { 26 | namespace detail 27 | { 28 | struct assert_handler 29 | : debug_assert::default_handler, 30 | debug_assert::set_level(FOONATHAN_LEX_ENABLE_ASSERTIONS)> 31 | {}; 32 | 33 | #define FOONATHAN_LEX_ASSERT(Expr) \ 34 | if (foonathan::lex::detail::assert_handler::level > 0 && !(Expr)) \ 35 | DEBUG_UNREACHABLE(foonathan::lex::detail::assert_handler{}, "internal assertion error: " #Expr) 36 | 37 | struct precondition_handler 38 | : debug_assert::default_handler, 39 | debug_assert::set_level(FOONATHAN_LEX_ENABLE_PRECONDITIONS)> 40 | {}; 41 | 42 | #define FOONATHAN_LEX_PRECONDITION(Expr, Str) \ 43 | if (foonathan::lex::detail::precondition_handler::level > 0 && !(Expr)) \ 44 | DEBUG_UNREACHABLE(foonathan::lex::detail::precondition_handler{}, #Expr ": " Str) 45 | } // namespace detail 46 | } // namespace lex 47 | } // namespace foonathan 48 | 49 | #endif // FOONATHAN_LEX_DETAIL_ASSERT_HPP_INCLUDED 50 | -------------------------------------------------------------------------------- /include/foonathan/lex/detail/select_integer.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_DETAIL_SELECT_INTEGER_HPP_INCLUDED 6 | #define FOONATHAN_LEX_DETAIL_SELECT_INTEGER_HPP_INCLUDED 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | namespace foonathan 13 | { 14 | namespace lex 15 | { 16 | namespace detail 17 | { 18 | template 19 | struct select_integer_impl 20 | { 21 | static_assert(Size == 0u, "too high"); 22 | using type = void; 23 | }; 24 | 25 | #define FOONATHAN_LEX_DETAIL_SELECT(Min, Max, Type) \ 26 | template \ 27 | struct select_integer_impl= (Min) && Size <= (Max))>> \ 28 | { \ 29 | using type = Type; \ 30 | }; 31 | 32 | FOONATHAN_LEX_DETAIL_SELECT(0, UINT_LEAST8_MAX, std::uint_least8_t) 33 | FOONATHAN_LEX_DETAIL_SELECT(UINT_LEAST8_MAX + 1ull, UINT_LEAST16_MAX, std::uint_least16_t) 34 | FOONATHAN_LEX_DETAIL_SELECT(UINT_LEAST16_MAX + 1ull, UINT_LEAST32_MAX, std::uint_least32_t) 35 | FOONATHAN_LEX_DETAIL_SELECT(UINT_LEAST32_MAX + 1ull, UINT_LEAST64_MAX, std::uint_least64_t) 36 | 37 | #undef FOONATHAN_LEX_DETAIL_SELECT 38 | 39 | template 40 | using select_integer = typename select_integer_impl::type; 41 | } // namespace detail 42 | } // namespace lex 43 | } // namespace foonathan 44 | 45 | #endif // FOONATHAN_LEX_DETAIL_SELECT_INTEGER_HPP_INCLUDED 46 | -------------------------------------------------------------------------------- /test/tokenizer.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #include 6 | 7 | #include 8 | 9 | namespace lex = foonathan::lex; 10 | 11 | namespace 12 | { 13 | using test_spec = lex::token_spec; 14 | 15 | struct token_a : FOONATHAN_LEX_LITERAL("a") 16 | {}; 17 | 18 | struct token_bc : FOONATHAN_LEX_LITERAL("bc") 19 | {}; 20 | 21 | template 22 | void verify(const lex::tokenizer& tokenizer, const char* ptr, bool is_done) 23 | { 24 | REQUIRE(tokenizer.current_ptr() == ptr); 25 | REQUIRE(tokenizer.is_done() == is_done); 26 | 27 | REQUIRE(tokenizer.peek().is(Token{})); 28 | REQUIRE(tokenizer.peek().spelling().data() == tokenizer.current_ptr()); 29 | } 30 | } // namespace 31 | 32 | TEST_CASE("tokenizer") 33 | { 34 | const char array[] = "abc aabc"; 35 | lex::tokenizer tokenizer(array); 36 | REQUIRE(tokenizer.begin_ptr() == array); 37 | REQUIRE(tokenizer.end_ptr() == array + sizeof(array) - 1); 38 | 39 | verify(tokenizer, array, false); 40 | 41 | tokenizer.bump(); 42 | verify(tokenizer, array + 1, false); 43 | 44 | tokenizer.bump(); 45 | verify(tokenizer, array + 3, false); 46 | 47 | auto token = tokenizer.get(); 48 | REQUIRE(token.is(lex::error_token{})); 49 | REQUIRE(token.spelling().data() == array + 3); 50 | verify(tokenizer, array + 4, false); 51 | 52 | tokenizer.reset(array); 53 | verify(tokenizer, array, false); 54 | 55 | tokenizer.reset(array + 6); 56 | verify(tokenizer, array + 6, false); 57 | 58 | tokenizer.bump(); 59 | verify(tokenizer, array + 8, true); 60 | 61 | tokenizer.bump(); 62 | verify(tokenizer, array + 8, true); 63 | } 64 | -------------------------------------------------------------------------------- /external/external.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018-2019 Jonathan Müller 2 | # This file is subject to the license terms in the LICENSE file 3 | # found in the top-level directory of this distribution. 4 | 5 | option(FOONATHAN_LEX_FORCE_FIND_PACKAGE "force find_package() instead of using git submodule" OFF) 6 | set(dependency_via_submodule OFF) 7 | 8 | if(FOONATHAN_LEX_FORCE_FIND_PACKAGE) 9 | find_package(debug_assert REQUIRED) 10 | else() 11 | find_package(debug_assert QUIET) 12 | if(NOT debug_assert_FOUND) 13 | set(dependency_via_submodule ON) 14 | if(TARGET debug_assert) 15 | message(STATUS "Using inherited debug_assert target") 16 | else() 17 | message(STATUS "Installing debug_assert via submodule") 18 | execute_process(COMMAND git submodule update --init -- external/debug_assert 19 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) 20 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/external/debug_assert EXCLUDE_FROM_ALL) 21 | endif() 22 | endif() 23 | endif() 24 | 25 | add_library(foonathan_lex_mp11 INTERFACE) 26 | if(FOONATHAN_LEX_FORCE_FIND_PACKAGE) 27 | find_package(Boost 1.69.0 REQUIRED) 28 | target_link_libraries(foonathan_lex_mp11 INTERFACE Boost::boost) 29 | else() 30 | find_package(Boost 1.69.0) 31 | if(Boost_FOUND) 32 | target_link_libraries(foonathan_lex_mp11 INTERFACE Boost::boost) 33 | else() 34 | set(dependency_via_submodule ON) 35 | if(TARGET Boost::mp11) 36 | message(STATUS "Using inherited Boost::mp11 target") 37 | else() 38 | message(STATUS "Installing Boost::mp11 via submodule") 39 | execute_process(COMMAND git submodule update --init -- external/mp11 40 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) 41 | target_include_directories(foonathan_lex_mp11 SYSTEM INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/external/mp11/include) 42 | endif() 43 | endif() 44 | endif() 45 | -------------------------------------------------------------------------------- /include/foonathan/lex/literal_token.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018-2019 Jonathan Müller 2 | // This file is subject to the license terms in the LICENSE file 3 | // found in the top-level directory of this distribution. 4 | 5 | #ifndef FOONATHAN_LEX_LITERAL_TOKEN_HPP_INCLUDED 6 | #define FOONATHAN_LEX_LITERAL_TOKEN_HPP_INCLUDED 7 | 8 | #include 9 | #include 10 | 11 | namespace foonathan 12 | { 13 | namespace lex 14 | { 15 | template 16 | struct literal_token : detail::base_token 17 | { 18 | static constexpr const char value[sizeof...(Literal) + 1] = {Literal..., '\0'}; 19 | static_assert(value[sizeof...(Literal) - 1] != '\0', "literal must not be null-terminated"); 20 | 21 | static constexpr const char* name = value; 22 | }; 23 | 24 | template 25 | constexpr const char literal_token::value[]; 26 | 27 | #define FOONATHAN_LEX_LITERAL(String) \ 28 | FOONATHAN_LEX_DETAIL_STRING(foonathan::lex::literal_token, String) 29 | 30 | namespace detail 31 | { 32 | template