├── samples ├── infix │ ├── testsuite │ │ ├── examples │ │ │ ├── mixed_out │ │ │ │ ├── stderr.txt │ │ │ │ ├── exit_code.txt │ │ │ │ └── stdout.txt │ │ │ ├── simple_out │ │ │ │ ├── exit_code.txt │ │ │ │ ├── stderr.txt │ │ │ │ └── stdout.txt │ │ │ ├── multi_define_out │ │ │ │ ├── stderr.txt │ │ │ │ ├── exit_code.txt │ │ │ │ └── stdout.txt │ │ │ ├── multi_ident_out │ │ │ │ ├── exit_code.txt │ │ │ │ ├── stderr.txt │ │ │ │ └── stdout.txt │ │ │ ├── multi_define.infix │ │ │ ├── simple.infix │ │ │ ├── mixed.infix │ │ │ └── multi_ident.infix │ │ ├── CMakeLists.txt │ │ └── infix.cmake │ ├── infix_trieste.cc │ ├── CMakeLists.txt │ ├── internal.h │ ├── infix.h │ ├── infix.cc │ ├── parse.cc │ └── reader.cc └── shrubbery │ ├── shrubbery.cc │ ├── CMakeLists.txt │ ├── shrubbery.h │ ├── wf.h │ └── reader.cc ├── .gitignore ├── cmake ├── triesteConfig.cmake.in ├── runcommand.cmake ├── compare.cmake └── testsuite.cmake ├── CODE_OF_CONDUCT.md ├── include └── trieste │ ├── gen.h │ ├── debug.h │ ├── rewriter.h │ ├── xoroshiro.h │ ├── trieste.h │ ├── defaultmap.h │ ├── regex.h │ ├── reader.h │ ├── token.h │ ├── source.h │ ├── writer.h │ └── passes.h ├── test ├── CMakeLists.txt └── intrusive_ptr_test.cc ├── notes ├── packages.md ├── lookup.md ├── arguments.md ├── von.md ├── dispatch.md ├── regions.md └── control-flow.md ├── parsers ├── CMakeLists.txt ├── test │ ├── json_checker.cc │ ├── json_fuzzer.cc │ ├── yaml_checker.cc │ ├── yaml_fuzzer.cc │ └── CMakeLists.txt ├── json │ ├── internal.h │ ├── CMakeLists.txt │ ├── parse.cc │ ├── reader.cc │ ├── README.md │ └── writer.cc ├── yaml │ ├── CMakeLists.txt │ ├── internal.h │ ├── yamlc.cc │ └── README.md ├── README.md └── include │ └── trieste │ └── yaml.h ├── LICENSE ├── CONTRIBUTING.md ├── CMakePresets.json ├── README.md ├── SECURITY.md ├── .github └── workflows │ └── buildtest.yml ├── .clang-format └── CMakeLists.txt /samples/infix/testsuite/examples/mixed_out/stderr.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/mixed_out/exit_code.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/simple_out/exit_code.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/simple_out/stderr.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_define_out/stderr.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_ident_out/exit_code.txt: -------------------------------------------------------------------------------- 1 | 0 -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_ident_out/stderr.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_define.infix: -------------------------------------------------------------------------------- 1 | x = 5; 2 | x = 6; -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_define_out/exit_code.txt: -------------------------------------------------------------------------------- 1 | 1 -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_ident_out/stdout.txt: -------------------------------------------------------------------------------- 1 | 1 22 2 | 2 10 3 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/simple_out/stdout.txt: -------------------------------------------------------------------------------- 1 | x 5 2 | 1 + 10 11 3 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/mixed_out/stdout.txt: -------------------------------------------------------------------------------- 1 | x -11.700000 2 | y -32.440000 3 | -------------------------------------------------------------------------------- /samples/infix/testsuite/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include (../../../cmake/testsuite.cmake) 2 | 3 | testsuite(infix) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build*/ 2 | .vscode/ 3 | .vs/ 4 | **/dump/* 5 | .*.swp 6 | compile_commands.json 7 | notes/ 8 | .cache 9 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/simple.infix: -------------------------------------------------------------------------------- 1 | x = 5; 2 | print "x" x; 3 | y = 2 - 1; 4 | print "1 + 10" 1 + 10; 5 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/mixed.infix: -------------------------------------------------------------------------------- 1 | x = 1 + 2 * 3 + 5.3 - 4 - 2 / 0.1; 2 | y = 3.2 * x + 5; 3 | print "x" x; 4 | print "y" y; -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_ident.infix: -------------------------------------------------------------------------------- 1 | x = 5 + 10; 2 | y = 1 - 9 + x; 3 | print "1" x + y; 4 | z = (5 * x) / y; 5 | print "2" z; -------------------------------------------------------------------------------- /cmake/triesteConfig.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") 4 | check_required_components("@PROJECT_NAME@") -------------------------------------------------------------------------------- /samples/shrubbery/shrubbery.cc: -------------------------------------------------------------------------------- 1 | #include "shrubbery.h" 2 | 3 | #include 4 | 5 | int main(int argc, char** argv) 6 | { 7 | return trieste::Driver(shrubbery::reader()).run(argc, argv); 8 | } 9 | -------------------------------------------------------------------------------- /samples/infix/testsuite/examples/multi_define_out/stdout.txt: -------------------------------------------------------------------------------- 1 | multi_define.infix:2:1: conflicting definitions of `x`: 2 | x = 5; 3 | ~~~~~ 4 | x = 6; 5 | ~~~~~ 6 | 7 | Errors:Pass expressions failed with 0 error! 8 | 9 | -------------------------------------------------------------------------------- /samples/infix/infix_trieste.cc: -------------------------------------------------------------------------------- 1 | #include "infix.h" 2 | 3 | #include 4 | 5 | int main(int argc, char** argv) 6 | { 7 | using namespace trieste; 8 | Reader read_and_calculate = infix::reader() >>= infix::calculate(); 9 | return Driver(read_and_calculate).run(argc, argv); 10 | } 11 | -------------------------------------------------------------------------------- /samples/shrubbery/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(shrubbery 2 | reader.cc 3 | parse.cc 4 | shrubbery.cc 5 | ) 6 | enable_warnings(shrubbery) 7 | 8 | target_link_libraries(shrubbery 9 | trieste::trieste 10 | ) 11 | 12 | add_test(NAME shrubbery COMMAND shrubbery test -f) 13 | add_test(NAME shrubbery_check COMMAND shrubbery check -w) 14 | 15 | install(TARGETS shrubbery RUNTIME DESTINATION shrubbery) 16 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /include/trieste/gen.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "ast.h" 6 | #include "xoroshiro.h" 7 | 8 | #include 9 | 10 | namespace trieste 11 | { 12 | using Rand = xoroshiro::p128r32; 13 | using Seed = uint64_t; 14 | using Result = uint32_t; 15 | 16 | using GenLocationF = std::function; 17 | using GenNodeLocationF = std::function; 18 | } 19 | -------------------------------------------------------------------------------- /samples/infix/testsuite/infix.cmake: -------------------------------------------------------------------------------- 1 | # Arguments for testing infix samples 2 | macro(toolinvoke ARGS testfile outputdir) 3 | set(${ARGS} ${testfile}) 4 | endmacro() 5 | 6 | # Regular expression to match test files 7 | # This regex matches files with the .infix extension 8 | set(TESTSUITE_REGEX ".*\\.infix") 9 | 10 | set(TESTSUITE_EXE "$") 11 | 12 | function (test_output_dir out test) 13 | # Use get_filename_component to remove the file extension and keep the directory structure 14 | get_filename_component(test_dir ${test} DIRECTORY) 15 | get_filename_component(test_name ${test} NAME_WE) 16 | # Create the output directory relative to the test directory 17 | set(${out} "${test_dir}/${test_name}_out" PARENT_SCOPE) 18 | endfunction() 19 | -------------------------------------------------------------------------------- /samples/infix/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(infix_trieste 2 | reader.cc 3 | writers.cc 4 | infix_trieste.cc 5 | parse.cc 6 | ) 7 | enable_warnings(infix_trieste) 8 | 9 | target_link_libraries(infix_trieste 10 | trieste::trieste 11 | ) 12 | 13 | add_test(NAME infix COMMAND infix_trieste test -f) 14 | add_test(NAME invalid_input COMMAND infix ./infix) 15 | set_property(TEST invalid_input PROPERTY WILL_FAIL On) 16 | add_test(NAME infix_check COMMAND infix_trieste check -w) 17 | 18 | add_executable(infix 19 | reader.cc 20 | writers.cc 21 | infix.cc 22 | parse.cc 23 | ) 24 | enable_warnings(infix) 25 | 26 | target_link_libraries(infix 27 | trieste::trieste 28 | ) 29 | 30 | add_subdirectory(testsuite) 31 | 32 | install(TARGETS infix infix_trieste RUNTIME DESTINATION infix) 33 | install(DIRECTORY testsuite/examples DESTINATION infix) 34 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | add_executable(trieste_intrusive_ptr_test 3 | intrusive_ptr_test.cc 4 | ) 5 | enable_warnings(trieste_intrusive_ptr_test) 6 | target_link_libraries(trieste_intrusive_ptr_test trieste::trieste) 7 | 8 | # This test might not make so much sense without asan enabled, but might as well 9 | # check that the test compiles and doesn't crash on other compilers. 10 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT TRIESTE_SANITIZE) 11 | target_compile_options(trieste_intrusive_ptr_test PUBLIC -g -fsanitize=thread) 12 | target_link_libraries(trieste_intrusive_ptr_test -fsanitize=thread) 13 | endif() 14 | 15 | add_test(NAME trieste_intrusive_ptr_test COMMAND trieste_intrusive_ptr_test WORKING_DIRECTORY $) 16 | 17 | add_executable(trieste_source_test 18 | source_test.cc 19 | ) 20 | enable_warnings(trieste_source_test) 21 | target_link_libraries(trieste_source_test trieste::trieste) 22 | 23 | add_test(NAME trieste_source_test COMMAND trieste_source_test --depth 6 WORKING_DIRECTORY $) 24 | -------------------------------------------------------------------------------- /notes/packages.md: -------------------------------------------------------------------------------- 1 | # Packages 2 | 3 | The string that represents a package is a resolver. 4 | 5 | Packages go in `Top`, inside a class, where the class name is derived from the resolver. Two resolvers that end up at the same package should get the same class name. 6 | 7 | On disk, packages are directories. The package name is the directory name. Package directories go in your build directory, not in the source directory. 8 | 9 | ## Build Directory 10 | 11 | ```ts 12 | build 13 | - packages 14 | - package1 15 | - ... 16 | - debug 17 | - program1 18 | - program2 19 | - ... 20 | - release 21 | - program1 22 | - program2 23 | - ... 24 | ``` 25 | 26 | ## Resolvers 27 | 28 | https://cmake.org/cmake/help/latest/module/ExternalProject.html#id1 29 | - URL 30 | - git 31 | - local directory? 32 | 33 | ## Prologue 34 | 35 | This could be added as the prologue to every program: 36 | 37 | ```ts 38 | type std = "std lib resolver" 39 | use std::builtin 40 | 41 | (TypeAlias 42 | (Ident std) 43 | (TypeParams) 44 | (Type (TypeVar $0)) 45 | (Type 46 | (Package (String "std lib resolver")))) 47 | ``` 48 | -------------------------------------------------------------------------------- /cmake/runcommand.cmake: -------------------------------------------------------------------------------- 1 | # This is used to run a command that can fail. 2 | # Dumping all the output and error code into a file 3 | # Also handles timeouts 4 | 5 | file(REMOVE_RECURSE ${OUTPUT_DIR}) 6 | make_directory(${OUTPUT_DIR}) 7 | 8 | include(${COLLECTION}) 9 | 10 | toolinvoke(TOOLINVOKE ${TESTFILE} ${OUTPUT_DIR}) 11 | 12 | list(JOIN TOOLINVOKE " " TOOLINVOKE_SEP) 13 | message ("Running") 14 | message (" ${TEST_EXE} ${TOOLINVOKE_SEP}") 15 | message ("in working directory") 16 | message (" ${WORKING_DIR}") 17 | message ("output sent to") 18 | message (" ${OUTPUT_DIR}") 19 | 20 | # Run command 21 | execute_process( 22 | COMMAND ${TEST_EXE} ${TOOLINVOKE} 23 | WORKING_DIRECTORY ${WORKING_DIR} 24 | OUTPUT_FILE ${OUTPUT_DIR}/stdout.txt 25 | ERROR_FILE ${OUTPUT_DIR}/stderr.txt 26 | TIMEOUT 20 # Timeout at 20 seconds, may need to increase this. 27 | RESULT_VARIABLE status 28 | ) 29 | 30 | # Push exit code into dump and make sure both stdout and stderr exist 31 | file(WRITE ${OUTPUT_DIR}/exit_code.txt ${status}) 32 | file(TOUCH ${OUTPUT_DIR}/stdout.txt) 33 | file(TOUCH ${OUTPUT_DIR}/stderr.txt) 34 | -------------------------------------------------------------------------------- /parsers/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(json) 2 | add_subdirectory(yaml) 3 | 4 | if(TRIESTE_BUILD_PARSER_TESTS) 5 | enable_testing() 6 | add_subdirectory(test) 7 | 8 | if(NOT IS_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/JSONTestSuite) 9 | execute_process(COMMAND ${GIT_EXECUTABLE} clone --depth=1 https://github.com/nst/JSONTestSuite 10 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 11 | OUTPUT_QUIET) 12 | endif() 13 | 14 | if(NOT IS_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/json-patch-tests) 15 | execute_process(COMMAND ${GIT_EXECUTABLE} clone --depth=1 https://github.com/json-patch/json-patch-tests 16 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 17 | OUTPUT_QUIET) 18 | endif() 19 | 20 | if(NOT IS_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/yaml-test-suite) 21 | execute_process(COMMAND ${GIT_EXECUTABLE} clone --depth=1 --branch data-2022-01-17 https://github.com/yaml/yaml-test-suite 22 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 23 | OUTPUT_QUIET) 24 | endif() 25 | endif() 26 | -------------------------------------------------------------------------------- /include/trieste/debug.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #ifdef __cpp_lib_source_location 5 | # include 6 | #endif 7 | 8 | namespace trieste 9 | { 10 | namespace detail 11 | { 12 | /* 13 | * Type used to track where a particular value was constructed in the 14 | * source. This aids in debugging and error reporting. 15 | */ 16 | #ifdef __cpp_lib_source_location 17 | struct DebugLocation 18 | { 19 | std::source_location location; 20 | 21 | DebugLocation(std::source_location l = std::source_location::current()) 22 | : location(l) 23 | {} 24 | }; 25 | #else 26 | struct DebugLocation 27 | { 28 | // Dummy value as we got a UBSan Misaligned Use without this. 29 | // I am assuming that the empty struct was trigger some kind of compiler bug. (MJP) 30 | size_t dummy{0}; 31 | DebugLocation() {} 32 | }; 33 | #endif 34 | 35 | template 36 | struct Located 37 | { 38 | T value; 39 | DebugLocation location; 40 | 41 | Located(T t, DebugLocation l = {}) 42 | : value(t), location(l) 43 | {} 44 | }; 45 | } 46 | } -------------------------------------------------------------------------------- /parsers/test/json_checker.cc: -------------------------------------------------------------------------------- 1 | #include "trieste/logging.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace trieste; 8 | 9 | int main(int argc, char** argv) 10 | { 11 | CLI::App app; 12 | 13 | app.set_help_all_flag("--help-all", "Expand all help"); 14 | 15 | bool check_against_wf = false; 16 | app.add_flag( 17 | "-w", check_against_wf, "Check pattern against well-formedness rules"); 18 | 19 | std::vector ignored_tokens; 20 | app.add_option( 21 | "-i,--ignore_token", 22 | ignored_tokens, 23 | "Ignore this token when checking patterns against well-formedness rules."); 24 | 25 | try 26 | { 27 | app.parse(argc, argv); 28 | } 29 | catch (const CLI::ParseError& e) 30 | { 31 | return app.exit(e); 32 | } 33 | 34 | Checker reader_checker = 35 | Checker(json::reader()).check_against_wf(check_against_wf).ignored_tokens(ignored_tokens); 36 | Checker writer_checker = 37 | Checker(json::writer("checker")).check_against_wf(check_against_wf).ignored_tokens(ignored_tokens); 38 | 39 | return reader_checker.check() + writer_checker.check(); 40 | } 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /parsers/json/internal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "json.h" 4 | 5 | namespace trieste 6 | { 7 | namespace json 8 | { 9 | inline const auto Comma = TokenDef("json-comma"); 10 | inline const auto Colon = TokenDef("json-colon"); 11 | inline const auto Lhs = TokenDef("json-lhs"); 12 | inline const auto Rhs = TokenDef("json-rhs"); 13 | 14 | Parse parser(); 15 | 16 | inline const auto wf_parse_tokens = 17 | Object | Array | String | Number | True | False | Null | Comma | Colon; 18 | 19 | // clang-format off 20 | inline const auto wf_parse = 21 | (Top <<= File) 22 | | (File <<= ~Group) 23 | | (Value <<= Group) 24 | | (Array <<= Group) 25 | | (Object <<= Group) 26 | | (Member <<= Group) 27 | | (Group <<= wf_parse_tokens++) 28 | ; 29 | // clang-format on 30 | 31 | template 32 | inline auto err(Node node, const T& msg) 33 | { 34 | return Error << (ErrorMsg ^ msg) << (ErrorAst << node->clone()); 35 | } 36 | 37 | template 38 | inline Node err(const NodeRange& r, const T& msg) 39 | { 40 | return Error << (ErrorMsg ^ msg) << (ErrorAst << r); 41 | } 42 | 43 | template 44 | inline auto err(const T& msg) 45 | { 46 | return Error << (ErrorMsg ^ msg); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /cmake/compare.cmake: -------------------------------------------------------------------------------- 1 | # This is used to compare to files using CMake. 2 | # It improves on the default behaviour to print the 3 | # the files if there is a difference. 4 | 5 | execute_process( 6 | COMMAND ${CMAKE_COMMAND} -E compare_files --ignore-eol ${original_file} ${new_file} 7 | RESULT_VARIABLE status 8 | ) 9 | 10 | if (${status} EQUAL 1) 11 | message ("Compare ${original_file} with ${new_file}") 12 | if (diff_tool STREQUAL "") 13 | file(READ ${original_file} original_text) 14 | file(READ ${new_file} new_text) 15 | message("--Original File-----------------------------------------------------------------") 16 | if (NOT original_text STREQUAL "") 17 | message("${original_text}") 18 | endif() 19 | message("--------------------------------------------------------------------------------") 20 | message(" ") 21 | message("--New File----------------------------------------------------------------------") 22 | if (NOT new_text STREQUAL "") 23 | message(${new_text}) 24 | endif() 25 | message("--------------------------------------------------------------------------------") 26 | else () 27 | execute_process( 28 | COMMAND ${diff_tool} ${original_file} ${new_file} 29 | ) 30 | endif () 31 | message(FATAL_ERROR "Files differ!") 32 | endif () -------------------------------------------------------------------------------- /parsers/json/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( SOURCES 2 | parse.cc 3 | reader.cc 4 | writer.cc 5 | json.cc 6 | ) 7 | 8 | if( BUILD_SHARED_LIBS ) 9 | add_library(json SHARED ${SOURCES}) 10 | else() 11 | add_library(json STATIC ${SOURCES}) 12 | endif() 13 | enable_warnings(json) 14 | 15 | add_library(trieste::json ALIAS json) 16 | 17 | target_link_libraries(json 18 | PUBLIC 19 | trieste::trieste 20 | ) 21 | 22 | if(MSVC) 23 | target_compile_options(json PUBLIC "/Zc:__cplusplus") 24 | target_compile_definitions(json PUBLIC "_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING") 25 | endif() 26 | 27 | if(TRIESTE_USE_CXX17) 28 | target_compile_features(json PUBLIC cxx_std_17) 29 | target_compile_definitions(json PUBLIC TRIESTE_USE_CXX17) 30 | else() 31 | target_compile_features(json PUBLIC cxx_std_20) 32 | endif() 33 | 34 | target_include_directories( json 35 | PUBLIC 36 | $ 37 | $ 38 | $ 39 | PRIVATE 40 | ${CMAKE_CURRENT_SOURCE_DIR} 41 | $ 42 | ) 43 | 44 | install(TARGETS json 45 | EXPORT ${PROJECT_NAME}_Targets 46 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 47 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 48 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 49 | ) 50 | 51 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../include/trieste/json.h DESTINATION include/trieste) 52 | -------------------------------------------------------------------------------- /samples/infix/internal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "infix.h" 4 | 5 | namespace infix 6 | { 7 | inline const auto Paren = TokenDef("infix-paren"); 8 | inline const auto Equals = TokenDef("infix-equals"); 9 | inline const auto Print = TokenDef("infix-print"); 10 | 11 | inline const auto wf_literal = Int | Float; 12 | inline const auto wf_parse_tokens = wf_literal | String | Paren | Print | 13 | Ident | Add | Subtract | Divide | Multiply; 14 | 15 | // clang-format off 16 | inline const auto wf_parser = 17 | (Top <<= File) 18 | | (File <<= (Group | Equals)++) 19 | | (Paren <<= Group++) 20 | | (Equals <<= Group++) 21 | | (Group <<= wf_parse_tokens++) 22 | ; 23 | // clang-format on 24 | 25 | inline const auto Number = T(Int, Float); 26 | 27 | // The Error token allows the creation of a special node which we can 28 | // use to replace the erroneous node. This will then exempt that subtree 29 | // from the well-formedness check. This is the mechanism by which we can 30 | // use the testing system to discover edges cases, i.e. the testing will 31 | // not proceed to the next pass until all of the invalid subtrees have 32 | // been marked as `Error`. 33 | inline auto err(const NodeRange& r, const std::string& msg) 34 | { 35 | return Error << (ErrorMsg ^ msg) << (ErrorAst << r); 36 | } 37 | 38 | inline auto err(Node node, const std::string& msg) 39 | { 40 | return Error << (ErrorMsg ^ msg) << (ErrorAst << node); 41 | } 42 | 43 | Parse parser(); 44 | } 45 | -------------------------------------------------------------------------------- /notes/lookup.md: -------------------------------------------------------------------------------- 1 | # Lookup 2 | 3 | Given a Node that names something, find the Node that defines it. 4 | 5 | - Names may be scoped. 6 | - After looking up, we may need to look down. 7 | - Names may be overloaded. 8 | - We may need to return multiple results. 9 | - This can come from the same scope or from different scopes. 10 | - It can even come from lookup on an algrebraic type. 11 | - Names may be imported from other scopes. 12 | - And those scopes may need additional information, such as type arguments. 13 | 14 | Lookup may want to return more information than just the definition node. For Verona, we want to return a map of type parameter bindings as well. 15 | 16 | ```f# 17 | // TODO: `use`, multidef, shadowing, lookdown 18 | // multidef => not shadowing 19 | // not multidef => shadowing 20 | 21 | // one unidef: done 22 | // unidef with anything else: {} 23 | // all multidef: union with parent 24 | let LU scope name = 25 | let defs = 26 | { def | 27 | def ∈ scope.map name, !def.type.defbeforeuse or (def < name) } 28 | if ∃def ∈ defs: !def.type.multidef then 29 | if |defs| = 1 then 30 | defs 31 | else 32 | {} 33 | else 34 | defs ∪ (LU scope.parent name) 35 | 36 | let LD map elems = 37 | match elems 38 | | none -> map 39 | | elem, elems -> 40 | // TODO: 41 | let defs = { def | def ∈ map.def, def.type.exported } 42 | 43 | let Resolve scope elems = 44 | match elems 45 | | none -> {} 46 | | elem, elems -> 47 | let maps = LU scope elem.name elem.args 48 | { LD map elems | map ∈ maps } 49 | ``` 50 | -------------------------------------------------------------------------------- /samples/shrubbery/shrubbery.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace shrubbery 6 | { 7 | using namespace trieste; 8 | 9 | inline const auto Paren = TokenDef("shrub-paren"); // () 10 | inline const auto Bracket = TokenDef("shrub-bracket"); // [] 11 | inline const auto Brace = TokenDef("shrub-brace"); // {} 12 | inline const auto Block = TokenDef("shrub-block"); // : 13 | inline const auto Comma = TokenDef("shrub-comma"); // , 14 | inline const auto Semi = TokenDef("shrub-semicolon"); // ; 15 | inline const auto Alt = TokenDef("shrub-alt"); // | 16 | inline const auto Op = TokenDef("shrub-op", flag::print); // Operators 17 | inline const auto Atom = TokenDef("shrub-atom", flag::print); // Everything else 18 | 19 | // Used for final structure 20 | inline const auto Terms = TokenDef("shrub-terms"); 21 | inline const auto None = TokenDef("shrub-none"); 22 | 23 | // Used as identifiers 24 | inline const auto Id = TokenDef("shrub-id"); 25 | inline const auto Lhs = TokenDef("shrub-lhs"); 26 | inline const auto Rhs = TokenDef("shrub-rhs"); 27 | 28 | // clang-format off 29 | inline const auto wf = 30 | (Top <<= File) 31 | | (File <<= Group++) 32 | | (Paren <<= Group++) 33 | | (Bracket <<= Group++) 34 | | (Brace <<= Group++) 35 | | (Block <<= Group++) 36 | | (Alt <<= Block++[1]) 37 | | (Group <<= Terms * (Block >>= Block | None) * (Alt >>= Alt | None)) 38 | | (Terms <<= (Paren | Bracket | Brace | Op | Atom)++) 39 | ; 40 | // clang-format on 41 | 42 | Parse parser(); 43 | Reader reader(); 44 | } 45 | -------------------------------------------------------------------------------- /parsers/test/json_fuzzer.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace trieste; 6 | 7 | int main(int argc, char** argv) 8 | { 9 | CLI::App app; 10 | 11 | app.set_help_all_flag("--help-all", "Expand all help"); 12 | 13 | std::string transform; 14 | app.add_option("transform", transform, "Transform to test") 15 | ->check(CLI::IsMember({"reader", "writer"})) 16 | ->required(true); 17 | 18 | uint32_t seed = std::random_device()(); 19 | app.add_option("-s,--seed", seed, "Random seed"); 20 | 21 | uint32_t count = 100; 22 | app.add_option("-c,--count", count, "Number of seed to test"); 23 | 24 | bool failfast = false; 25 | app.add_flag("-f,--failfast", failfast, "Stop on first failure"); 26 | 27 | std::string log_level; 28 | app 29 | .add_option( 30 | "-l,--log_level", 31 | log_level, 32 | "Set Log Level to one of " 33 | "Trace, Debug, Info, " 34 | "Warning, Output, Error, " 35 | "None") 36 | ->check(logging::set_log_level_from_string); 37 | 38 | try 39 | { 40 | app.parse(argc, argv); 41 | } 42 | catch (const CLI::ParseError& e) 43 | { 44 | return app.exit(e); 45 | } 46 | 47 | logging::Output() << "Testing x" << count << ", seed: " << seed << std::endl; 48 | 49 | Fuzzer fuzzer; 50 | Reader reader = json::reader(); 51 | if (transform == "reader") 52 | { 53 | fuzzer = Fuzzer(reader); 54 | } 55 | else 56 | { 57 | fuzzer = Fuzzer(json::writer("fuzzer"), reader.parser().generators()); 58 | } 59 | 60 | return fuzzer.start_seed(seed).seed_count(count).failfast(failfast).test(); 61 | } 62 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Most contributions require you to agree to a 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 5 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 6 | 7 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 8 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 9 | provided by the bot. You will only need to do this once across all repos using our CLA. 10 | 11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 14 | 15 | # Bugs and patches 16 | 17 | We use Github [issues](https://github.com/microsoft/Trieste/issues) to track bugs and features. If you found a bug or want to propose a new feature, please file an issue on our project. 18 | 19 | We use Github [pull requests](https://github.com/microsoft/Trieste/pulls) for contributions in code, documents, tests, etc. 20 | 21 | Every PR must pass the CI builds (which include CLA checks and formatting) and the appropriate set of tests on Windows, Linux (clang & gcc) and Mac, on x86_64. PRs cannot be merged if any of the tests fail. 22 | 23 | You are not, however, required to run all these tests on your own, before submitting the PR. Running on at least one of those above and passing should be fine. We can work out the remaining issues during the review process. 24 | -------------------------------------------------------------------------------- /notes/arguments.md: -------------------------------------------------------------------------------- 1 | # Type Based Dispatch 2 | 3 | TODO: don't know how to do this yet 4 | 5 | # Partial Application 6 | 7 | Given the initial N₀ arguments and a sequence of applications, each of which adds Nᵢ arguments, the number of arguments at a given application j is argsⱼ = N₀ + ∑ᵢ₌₀₋ⱼ Nᵢ. Over j ∈ i..0, select the lowest arity function that takes at least argsⱼ arguments. Absorb j applications into a single call. Any remaining applications are applied to the result of the call. 8 | 9 | # Default Arguments 10 | 11 | A default argument at the end effectively creates a pair of functions. The implicit function is one arity shorter, and calls the longer one with the default argument. 12 | 13 | The caller can't select the default argument, because it may depend on virtual dispatch. 14 | 15 | ```ts 16 | f(a, b, c = 3) 17 | { 18 | ... 19 | } 20 | 21 | let x = f(a, b) // default argument, not partial application 22 | ``` 23 | 24 | # Default Field Values 25 | 26 | If there's no `create` method, implicitly generate one with all the fields as parameters, with each field taking the default value. 27 | 28 | # Named Arguments 29 | 30 | Possible format for a named argument: 31 | `(assign (expr dot ident) (expr...))` 32 | 33 | ```ts 34 | let x = f(.b = 2, .a = 1) 35 | ``` 36 | 37 | # Function Types 38 | 39 | A function may be: 40 | - `lin`: the function may only be called once. 41 | - `in`: the function may mutate its free variables. 42 | - `const`: the function only closes over `const` variables. 43 | - `?`: the function is sendable, but may mutate its free variables. 44 | 45 | An `iso` function could be `Region[A->B] & lin`. 46 | 47 | ```ts 48 | f: A...->B 49 | 50 | type Fun[A..., B, K] = { apply(Self & K, A...): B } & K 51 | 52 | ``` 53 | -------------------------------------------------------------------------------- /parsers/yaml/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set( SOURCES 2 | parse.cc 3 | event_writer.cc 4 | writer.cc 5 | reader.cc 6 | to_json.cc 7 | yaml.cc 8 | ) 9 | 10 | if( BUILD_SHARED_LIBS ) 11 | add_library(yaml SHARED ${SOURCES}) 12 | else() 13 | add_library(yaml STATIC ${SOURCES}) 14 | endif() 15 | enable_warnings(yaml) 16 | 17 | add_library(trieste::yaml ALIAS yaml) 18 | 19 | target_link_libraries(yaml 20 | PUBLIC 21 | trieste::trieste 22 | trieste::json 23 | ) 24 | 25 | if(MSVC) 26 | target_compile_options(yaml PUBLIC "/Zc:__cplusplus") 27 | target_compile_definitions(yaml PUBLIC "_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING") 28 | endif() 29 | 30 | if(YAML_USE_CXX17) 31 | target_compile_features(yaml PUBLIC cxx_std_17) 32 | target_compile_definitions(yaml PUBLIC YAML_USE_CXX17) 33 | else() 34 | target_compile_features(yaml PUBLIC cxx_std_20) 35 | endif() 36 | 37 | target_include_directories( yaml 38 | PUBLIC 39 | $ 40 | $ 41 | $ 42 | PRIVATE 43 | ${CMAKE_CURRENT_SOURCE_DIR} 44 | $ 45 | ) 46 | 47 | install(TARGETS yaml 48 | EXPORT ${PROJECT_NAME}_Targets 49 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 50 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 51 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 52 | ) 53 | 54 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../include/trieste/yaml.h DESTINATION include/trieste) 55 | 56 | if( TRIESTE_BUILD_PARSER_TOOLS ) 57 | add_executable(yamlc yamlc.cc) 58 | enable_warnings(yamlc) 59 | 60 | target_link_libraries(yamlc 61 | PRIVATE 62 | yaml 63 | ) 64 | 65 | install(TARGETS yamlc RUNTIME DESTINATION parsers) 66 | endif() 67 | -------------------------------------------------------------------------------- /parsers/test/yaml_checker.cc: -------------------------------------------------------------------------------- 1 | #include "trieste/logging.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace trieste; 8 | 9 | int main(int argc, char** argv) 10 | { 11 | CLI::App app; 12 | 13 | app.set_help_all_flag("--help-all", "Expand all help"); 14 | 15 | bool check_against_wf = false; 16 | app.add_flag( 17 | "-w", check_against_wf, "Check pattern against well-formedness rules"); 18 | 19 | std::vector ignored_tokens; 20 | app.add_option( 21 | "-i,--ignore_token", 22 | ignored_tokens, 23 | "Ignore this token when checking patterns against well-formedness rules."); 24 | 25 | try 26 | { 27 | app.parse(argc, argv); 28 | } 29 | catch (const CLI::ParseError& e) 30 | { 31 | return app.exit(e); 32 | } 33 | 34 | logging::Output() << "Checking patterns" << std::endl; 35 | 36 | Checker reader_checker = Checker(yaml::reader()) 37 | .check_against_wf(check_against_wf) 38 | .ignored_tokens(ignored_tokens); 39 | Checker writer_checker = Checker(yaml::writer("checker")) 40 | .check_against_wf(check_against_wf) 41 | .ignored_tokens(ignored_tokens); 42 | Checker event_writer_checker = Checker(yaml::event_writer("checker")) 43 | .check_against_wf(check_against_wf) 44 | .ignored_tokens(ignored_tokens); 45 | Checker to_json_checker = Checker(yaml::to_json()) 46 | .check_against_wf(check_against_wf) 47 | .ignored_tokens(ignored_tokens); 48 | 49 | return reader_checker.check() + writer_checker.check() + 50 | event_writer_checker.check() + to_json_checker.check(); 51 | } 52 | -------------------------------------------------------------------------------- /samples/shrubbery/wf.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "shrubbery.h" 4 | 5 | namespace shrubbery 6 | { 7 | using namespace wf::ops; 8 | 9 | inline const auto wf_term = Paren | Bracket | Brace | 10 | Block | Alt | Op | Atom; 11 | 12 | inline const auto wf_grouping_construct = Comma | Semi | Group; 13 | 14 | // clang-format off 15 | 16 | // After parsing, commas and semicolons can appear virtually everywhere 17 | inline const auto wf_parser = 18 | (Top <<= File) 19 | | (File <<= wf_grouping_construct++) 20 | | (Paren <<= wf_grouping_construct++) 21 | | (Bracket <<= wf_grouping_construct++) 22 | | (Brace <<= wf_grouping_construct++) 23 | | (Block <<= wf_grouping_construct++) 24 | | (Alt <<= wf_grouping_construct++) 25 | | (Comma <<= (Semi | Group)++) 26 | | (Semi <<= (Comma | Group)++) 27 | | (Group <<= wf_term++) 28 | ; 29 | 30 | // The first pass ensures that commas and semi-colons are in the right places 31 | inline const auto wf_check_parser = 32 | wf_parser 33 | | (File <<= (Group | Semi)++) 34 | | (Paren <<= (Group | Comma)++) 35 | | (Bracket <<= (Group | Comma)++) 36 | | (Brace <<= (Group | Comma)++) 37 | | (Block <<= (Group | Semi)++) 38 | | (Alt <<= (Group | Semi)++[1]) 39 | | (Comma <<= Group++[1]) 40 | | (Semi <<= Group++) 41 | ; 42 | 43 | // Merge alternatives into one node with a sequence of blocks 44 | inline const auto wf_alternatives = 45 | wf_check_parser 46 | | (Alt <<= Block++[1]) 47 | ; 48 | 49 | // Get rid of commas and semi-colons 50 | inline const auto wf_no_semis_or_commas = 51 | wf_alternatives 52 | | (File <<= Group++) 53 | | (Paren <<= Group++) 54 | | (Bracket <<= Group++) 55 | | (Brace <<= Group++) 56 | | (Block <<= Group++) 57 | ; 58 | 59 | // clang-format on 60 | } 61 | -------------------------------------------------------------------------------- /parsers/test/yaml_fuzzer.cc: -------------------------------------------------------------------------------- 1 | #include "trieste/logging.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace trieste; 8 | 9 | int main(int argc, char** argv) 10 | { 11 | CLI::App app; 12 | 13 | app.set_help_all_flag("--help-all", "Expand all help"); 14 | 15 | std::string transform; 16 | app.add_option("transform", transform, "Transform to test") 17 | ->check(CLI::IsMember({"reader", "writer", "event_writer", "to_json"})) 18 | ->required(true); 19 | 20 | uint32_t seed = std::random_device()(); 21 | app.add_option("-s,--seed", seed, "Random seed"); 22 | 23 | uint32_t count = 100; 24 | app.add_option("-c,--count", count, "Number of seed to test"); 25 | 26 | bool failfast = false; 27 | app.add_flag("-f,--failfast", failfast, "Stop on first failure"); 28 | 29 | std::string log_level; 30 | app 31 | .add_option( 32 | "-l,--log_level", 33 | log_level, 34 | "Set Log Level to one of " 35 | "Trace, Debug, Info, " 36 | "Warning, Output, Error, " 37 | "None") 38 | ->check(logging::set_log_level_from_string); 39 | 40 | try 41 | { 42 | app.parse(argc, argv); 43 | } 44 | catch (const CLI::ParseError& e) 45 | { 46 | return app.exit(e); 47 | } 48 | 49 | logging::Output() << "Testing x" << count << ", seed: " << seed << std::endl; 50 | 51 | Fuzzer fuzzer; 52 | Reader reader = yaml::reader(); 53 | if (transform == "reader") 54 | { 55 | fuzzer = Fuzzer(reader); 56 | } 57 | else if (transform == "writer") 58 | { 59 | fuzzer = Fuzzer(yaml::writer("fuzzer"), reader.parser().generators()); 60 | } 61 | else if (transform == "event_writer") 62 | { 63 | fuzzer = Fuzzer(yaml::event_writer("fuzzer"), reader.parser().generators()); 64 | } 65 | else if (transform == "to_json") 66 | { 67 | fuzzer = Fuzzer(yaml::to_json(), reader.parser().generators()); 68 | } 69 | 70 | return fuzzer.start_seed(seed).seed_count(count).failfast(failfast).test(); 71 | } 72 | -------------------------------------------------------------------------------- /samples/infix/infix.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace infix 6 | { 7 | using namespace trieste; 8 | 9 | inline const auto Int = TokenDef("infix-int", flag::print); 10 | inline const auto Float = TokenDef("infix-float", flag::print); 11 | inline const auto String = TokenDef("infix-string", flag::print); 12 | inline const auto Ident = TokenDef("infix-ident", flag::print); 13 | 14 | inline const auto Calculation = 15 | TokenDef("infix-calculation", flag::symtab | flag::defbeforeuse); 16 | inline const auto Expression = TokenDef("infix-expression"); 17 | inline const auto Assign = 18 | TokenDef("infix-assign", flag::lookup | flag::shadowing); 19 | inline const auto Output = TokenDef("infix-output"); 20 | inline const auto Ref = TokenDef("infix-ref"); 21 | 22 | inline const auto Add = TokenDef("infix-add"); 23 | inline const auto Subtract = TokenDef("infix-subtract"); 24 | inline const auto Multiply = TokenDef("infix-multiply"); 25 | inline const auto Divide = TokenDef("infix-divide"); 26 | inline const auto Literal = TokenDef("infix-literal"); 27 | 28 | inline const auto Id = TokenDef("infix-id"); 29 | inline const auto Op = TokenDef("infix-op"); 30 | inline const auto Lhs = TokenDef("infix-lhs"); 31 | inline const auto Rhs = TokenDef("infix-rhs"); 32 | 33 | // clang-format off 34 | const auto wf = 35 | (Top <<= Calculation) 36 | | (Calculation <<= (Assign | Output)++) 37 | | (Assign <<= Ident * Expression)[Ident] 38 | | (Output <<= String * Expression) 39 | | (Expression <<= (Add | Subtract | Multiply | Divide | Ref | Float | Int)) 40 | | (Ref <<= Ident) 41 | | (Add <<= Expression * Expression) 42 | | (Subtract <<= Expression * Expression) 43 | | (Multiply <<= Expression * Expression) 44 | | (Divide <<= Expression * Expression) 45 | ; 46 | // clang-format off 47 | 48 | Reader reader(); 49 | Writer writer(const std::filesystem::path& path = "infix"); 50 | Writer postfix_writer(const std::filesystem::path& path = "postfix"); 51 | Rewriter calculate(); 52 | } -------------------------------------------------------------------------------- /CMakePresets.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 3, 3 | "configurePresets": [ 4 | { 5 | "name": "debug-clang", 6 | "displayName": "Debug Build using clang", 7 | "description": "Sets up a debug build that uses Clang++", 8 | "generator": "Ninja", 9 | "cacheVariables": { 10 | "CMAKE_BUILD_TYPE": "Debug", 11 | "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist", 12 | "CMAKE_CXX_COMPILER": "clang++", 13 | "TRIESTE_BUILD_SAMPLES": "ON", 14 | "TRIESTE_CLEAN_INSTALL": "ON" 15 | } 16 | }, 17 | { 18 | "name": "debug", 19 | "displayName": "Debug Build", 20 | "description": "Sets up a debug build that uses the default compiler and generator", 21 | "cacheVariables": { 22 | "CMAKE_BUILD_TYPE": "Debug", 23 | "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist", 24 | "TRIESTE_BUILD_SAMPLES": "ON", 25 | "TRIESTE_CLEAN_INSTALL": "ON" 26 | } 27 | }, 28 | { 29 | "name": "release-clang", 30 | "displayName": "Release Build using clang", 31 | "description": "Sets up a release build that uses Clang++", 32 | "generator": "Ninja", 33 | "cacheVariables": { 34 | "CMAKE_BUILD_TYPE": "Release", 35 | "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist", 36 | "CMAKE_CXX_COMPILER": "clang++", 37 | "TRIESTE_BUILD_SAMPLES": "ON", 38 | "TRIESTE_CLEAN_INSTALL": "ON" 39 | } 40 | }, 41 | { 42 | "name": "release", 43 | "displayName": "Release Build", 44 | "description": "Sets up a release build that uses the default compiler and generator", 45 | "cacheVariables": { 46 | "CMAKE_BUILD_TYPE": "Release", 47 | "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist", 48 | "TRIESTE_BUILD_SAMPLES": "ON", 49 | "TRIESTE_CLEAN_INSTALL": "ON" 50 | } 51 | } 52 | ] 53 | } -------------------------------------------------------------------------------- /samples/infix/infix.cc: -------------------------------------------------------------------------------- 1 | #include "infix.h" 2 | 3 | #include 4 | 5 | using namespace trieste; 6 | 7 | int main(int argc, char** argv) 8 | { 9 | CLI::App app; 10 | 11 | std::filesystem::path input_path; 12 | app.add_option("input", input_path, "Path to the input file ")->required(); 13 | 14 | std::filesystem::path output_path; 15 | app.add_option("output", output_path, "Path to the output file "); 16 | 17 | auto modes = {"calculate", "infix", "postfix"}; 18 | std::string mode = "calculate"; 19 | app.add_option("-m,--mode", mode, "Output mode.") 20 | ->transform(CLI::IsMember(modes)); 21 | 22 | try 23 | { 24 | app.parse(argc, argv); 25 | } 26 | catch (const CLI::ParseError& e) 27 | { 28 | return app.exit(e); 29 | } 30 | 31 | auto reader = infix::reader().file(input_path); 32 | Destination dest = 33 | output_path.empty() ? DestinationDef::console() : DestinationDef::dir("."); 34 | if (output_path.empty()) 35 | { 36 | output_path = mode; 37 | } 38 | 39 | try 40 | { 41 | ProcessResult result; 42 | if (mode == "calculate") 43 | { 44 | result = reader >> infix::calculate(); 45 | if (!result.ok) 46 | { 47 | logging::Error err; 48 | result.print_errors(err); 49 | return 1; 50 | } 51 | 52 | Node calc = result.ast->front(); 53 | for (const Node& output : *calc) 54 | { 55 | auto str = output->front()->location().view(); 56 | auto val = output->back()->location().view(); 57 | std::cout << str << " " << val << std::endl; 58 | } 59 | 60 | return 0; 61 | } 62 | if (mode == "infix") 63 | { 64 | result = reader >> infix::writer(output_path).destination(dest); 65 | } 66 | else if (mode == "postfix") 67 | { 68 | result = reader >> infix::postfix_writer(output_path).destination(dest); 69 | } 70 | 71 | if (!result.ok) 72 | { 73 | logging::Error err; 74 | result.print_errors(err); 75 | return 1; 76 | } 77 | } 78 | catch (const std::exception& e) 79 | { 80 | std::cerr << e.what() << std::endl; 81 | return 1; 82 | } 83 | 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /include/trieste/rewriter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "passes.h" 4 | 5 | namespace trieste 6 | { 7 | class Rewriter 8 | { 9 | private: 10 | std::string name_; 11 | std::vector passes_; 12 | const wf::Wellformed* wf_; 13 | bool debug_enabled_; 14 | bool wf_check_enabled_; 15 | std::filesystem::path debug_path_; 16 | 17 | public: 18 | Rewriter( 19 | const std::string& name, 20 | const std::vector& passes, 21 | const wf::Wellformed& input_wf) 22 | : name_(name), 23 | passes_(passes), 24 | wf_(&input_wf), 25 | debug_enabled_(false), 26 | wf_check_enabled_(true), 27 | debug_path_(".") 28 | {} 29 | 30 | ProcessResult rewrite(Node ast) 31 | { 32 | PassRange pass_range(passes_, *wf_, name_); 33 | 34 | logging::Info summary; 35 | std::filesystem::path debug_path; 36 | if (debug_enabled_) 37 | { 38 | debug_path = debug_path_; 39 | } 40 | 41 | summary << "---------" << std::endl; 42 | auto result = Process(pass_range) 43 | .set_check_well_formed(wf_check_enabled_) 44 | .set_default_pass_complete(summary, name_, debug_path) 45 | .run(ast); 46 | summary << "---------" << std::endl; 47 | return result; 48 | } 49 | 50 | Rewriter& debug_enabled(bool value) 51 | { 52 | debug_enabled_ = value; 53 | return *this; 54 | } 55 | 56 | bool debug_enabled() const 57 | { 58 | return debug_enabled_; 59 | } 60 | 61 | Rewriter& wf_check_enabled(bool value) 62 | { 63 | wf_check_enabled_ = value; 64 | return *this; 65 | } 66 | 67 | bool wf_check_enabled() const 68 | { 69 | return wf_check_enabled_; 70 | } 71 | 72 | Rewriter& debug_path(const std::filesystem::path& path) 73 | { 74 | debug_path_ = path; 75 | return *this; 76 | } 77 | 78 | const std::filesystem::path& debug_path() const 79 | { 80 | return debug_path_; 81 | } 82 | 83 | const wf::Wellformed& input_wf() const 84 | { 85 | return *wf_; 86 | } 87 | 88 | const wf::Wellformed& output_wf() const 89 | { 90 | return passes_.back()->wf(); 91 | } 92 | 93 | const std::vector& passes() const 94 | { 95 | return passes_; 96 | } 97 | }; 98 | } 99 | -------------------------------------------------------------------------------- /include/trieste/xoroshiro.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace xoroshiro 7 | { 8 | namespace detail 9 | { 10 | template 11 | class XorOshiro 12 | { 13 | private: 14 | static constexpr unsigned STATE_BITS = 8 * sizeof(STATE); 15 | static constexpr unsigned RESULT_BITS = 8 * sizeof(RESULT); 16 | 17 | static_assert( 18 | STATE_BITS >= RESULT_BITS, 19 | "STATE must have at least as many bits as RESULT"); 20 | 21 | STATE x; 22 | STATE y; 23 | 24 | static inline STATE rotl(STATE x, STATE k) 25 | { 26 | return (x << k) | (x >> (STATE_BITS - k)); 27 | } 28 | 29 | public: 30 | using result_type = RESULT; 31 | 32 | XorOshiro(STATE x_ = 5489, STATE y_ = 0) : x(x_), y(y_) 33 | { 34 | // If both zero, then this does not work 35 | if (x_ == 0 && y_ == 0) 36 | abort(); 37 | 38 | (*this)(); 39 | } 40 | 41 | void set_state(STATE x_, STATE y_ = 0) 42 | { 43 | // If both zero, then this does not work 44 | if (x_ == 0 && y_ == 0) 45 | abort(); 46 | 47 | x = x_; 48 | y = y_; 49 | (*this)(); 50 | } 51 | 52 | constexpr static result_type min() 53 | { 54 | return std::numeric_limits::min(); 55 | } 56 | 57 | constexpr static result_type max() 58 | { 59 | return std::numeric_limits::max(); 60 | } 61 | 62 | result_type operator()() 63 | { 64 | STATE r = x + y; 65 | y ^= x; 66 | x = rotl(x, A) ^ y ^ (y << B); 67 | y = rotl(y, C); 68 | // If both zero, then this does not work 69 | if (x == 0 && y == 0) 70 | abort(); 71 | return r >> (STATE_BITS - RESULT_BITS); 72 | } 73 | }; 74 | } 75 | 76 | using p128r64 = detail::XorOshiro; 77 | using p128r32 = detail::XorOshiro; 78 | using p64r32 = detail::XorOshiro; 79 | using p64r16 = detail::XorOshiro; 80 | using p32r16 = detail::XorOshiro; 81 | using p32r8 = detail::XorOshiro; 82 | using p16r8 = detail::XorOshiro; 83 | } 84 | -------------------------------------------------------------------------------- /samples/infix/parse.cc: -------------------------------------------------------------------------------- 1 | #include "internal.h" 2 | 3 | namespace infix 4 | { 5 | const std::initializer_list terminators = {Equals}; 6 | 7 | Parse parser() 8 | { 9 | Parse p(depth::file, wf_parser); 10 | auto indent = std::make_shared>(); 11 | 12 | p("start", // this indicates the 'mode' these rules are associated with 13 | { 14 | // Whitespace between tokens. 15 | "[[:blank:]]+" >> [](auto&) {}, // no-op 16 | 17 | // Equals. 18 | "=" >> [](auto& m) { m.seq(Equals); }, 19 | 20 | // Terminator. 21 | ";[\r\n]*" >> [](auto& m) { m.term(terminators); }, 22 | 23 | // Parens. 24 | R"((\()[[:blank:]]*)" >> 25 | [indent](auto& m) { 26 | // we push a Paren node. Subsequent nodes will be added 27 | // as its children. 28 | m.push(Paren, 1); 29 | }, 30 | 31 | R"(\))" >> 32 | [indent](auto& m) { 33 | // terminate the current group 34 | m.term(terminators); 35 | // pop back up out of the Paren 36 | m.pop(Paren); 37 | }, 38 | 39 | // Float. 40 | R"([[:digit:]]+\.[[:digit:]]+(?:e[+-]?[[:digit:]]+)?\b)" >> 41 | [](auto& m) { m.add(Float); }, 42 | 43 | // String. 44 | R"("[^"]*")" >> [](auto& m) { m.add(String); }, 45 | 46 | // Int. 47 | R"([[:digit:]]+\b)" >> [](auto& m) { m.add(Int); }, 48 | 49 | // Line comment. 50 | "//[^\r\n]*" >> [](auto&) {}, // another no-op 51 | 52 | // Print. 53 | R"(print\b)" >> [](auto& m) { m.add(Print); }, 54 | 55 | // Identifier. 56 | R"([_[:alpha:]][_[:alnum:]]*\b)" >> [](auto& m) { m.add(Ident); }, 57 | 58 | // Add ('+' is a reserved RegEx character) 59 | R"(\+)" >> [](auto& m) { m.add(Add); }, 60 | 61 | // Subtract 62 | "-" >> [](auto& m) { m.add(Subtract); }, 63 | 64 | // Multiply ('*' is a reserved RegEx character) 65 | R"(\*)" >> [](auto& m) { m.add(Multiply); }, 66 | 67 | // Divide 68 | "/" >> [](auto& m) { m.add(Divide); }, 69 | }); 70 | 71 | p.gen({ 72 | Int >> [](auto& rnd) { return std::to_string(rnd() % 100); }, 73 | Float >> 74 | [](auto& rnd) { 75 | std::uniform_real_distribution<> dist(-10.0, 10.0); 76 | return std::to_string(dist(rnd)); 77 | }, 78 | }); 79 | 80 | return p; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project Trieste 2 | 3 | Project Trieste is a term rewriting system designed for rapidly prototyping programming languages. 4 | Trieste provides three C++ DSLs to enable the rapid prototyping: 5 | 6 | * Parsing - Enables generation of an untyped abstract syntax tree (AST) from one or many input files. 7 | * Rewriting - Enables the restructuring and analysis of the AST. It can be used to simplify, elaborate to alternative representations, and lower to backend IRs or bytecodes. 8 | * Well-formedness - Trieste provides a DSL for checking that the current AST conforms to a specification. 9 | 10 | Using the well-formedness definitions Trieste can rapidly harden a language by automatically checking conformance to the specification. 11 | Additionally, Trieste uses the well-formedness definitions to generate test cases to fuzz each rewriting pass conforms with its specification. 12 | 13 | ## Getting Started 14 | 15 | If you want to dive right into understanding how to use Trieste, take 16 | a look at the [`infix` tutorial language](./samples/infix/README.md), 17 | which will walk you through implementing a simple calculator language 18 | in Trieste. 19 | 20 | ## Using Trieste 21 | 22 | Trieste is a header-only C++20 library. To get started, you'll need to define your own `trieste::Driver`, and run it from `main`: 23 | 24 | ```c++ 25 | #include 26 | 27 | int main(int argc, char** argv) 28 | { 29 | // Define your driver... 30 | trieste::Driver driver(...); 31 | return driver.run(argc, argv); 32 | } 33 | ``` 34 | 35 | ## Building the Samples 36 | 37 | Here's an example of how to build the `infix` sample and run the self-tests. Other build systems and compilers may work as well. 38 | 39 | ```sh 40 | git clone https://github.com/microsoft/trieste 41 | cd trieste 42 | mkdir build 43 | cd build 44 | cmake -G Ninja .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=clang++-14 45 | ninja install 46 | ./dist/infix/infix test 47 | ``` 48 | 49 | ## Using Trieste in Your Project 50 | 51 | You can use Trieste via FetchContent by including the following lines 52 | in your CMake: 53 | 54 | ``` cmake 55 | FetchContent_Declare( 56 | trieste 57 | GIT_REPOSITORY https://github.com/microsoft/Trieste 58 | GIT_TAG a2a7fada4ab5250a4f8d1313b749ad336202841b 59 | ) 60 | 61 | FetchContent_MakeAvailable(trieste) 62 | ``` 63 | 64 | And then adding it as a target link library, e.g. 65 | 66 | ``` cmake 67 | target_link_libraries(verona 68 | Threads::Threads 69 | CLI11::CLI11 70 | trieste::trieste 71 | ) 72 | ``` 73 | 74 | ## Contributing 75 | 76 | If you are interested in contributing to Trieste, please see our [contributing document](CONTRIBUTING.md). 77 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /include/trieste/trieste.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "parse.h" 4 | #include "passes.h" 5 | #include "reader.h" 6 | #include "rewriter.h" 7 | #include "writer.h" 8 | 9 | namespace CLI 10 | { 11 | class App; 12 | } 13 | 14 | namespace trieste 15 | { 16 | struct Options 17 | { 18 | virtual void configure(CLI::App&) {} 19 | }; 20 | 21 | inline ProcessResult operator>>(Reader& reader, Rewriter& rewriter) 22 | { 23 | ProcessResult result = reader.read(); 24 | if (result.ok) 25 | { 26 | return rewriter.rewrite(result.ast); 27 | } 28 | 29 | return result; 30 | } 31 | 32 | inline ProcessResult operator>>(Reader& reader, Rewriter&& rewriter) 33 | { 34 | ProcessResult result = reader.read(); 35 | if (result.ok) 36 | { 37 | return rewriter.rewrite(result.ast); 38 | } 39 | 40 | return result; 41 | } 42 | 43 | inline ProcessResult operator>>(Reader& reader, Writer& writer) 44 | { 45 | ProcessResult result = reader.read(); 46 | if (result.ok) 47 | { 48 | return writer.write(result.ast); 49 | } 50 | 51 | return result; 52 | } 53 | 54 | inline ProcessResult operator>>(Reader& reader, Writer&& writer) 55 | { 56 | ProcessResult result = reader.read(); 57 | if (result.ok) 58 | { 59 | return writer.write(result.ast); 60 | } 61 | 62 | return result; 63 | } 64 | 65 | inline ProcessResult 66 | operator>>(const ProcessResult& result, Rewriter& rewriter) 67 | { 68 | if (result.ok) 69 | { 70 | return rewriter.rewrite(result.ast); 71 | } 72 | 73 | return result; 74 | } 75 | 76 | inline ProcessResult 77 | operator>>(const ProcessResult& result, Rewriter&& rewriter) 78 | { 79 | if (result.ok) 80 | { 81 | return rewriter.rewrite(result.ast); 82 | } 83 | 84 | return result; 85 | } 86 | 87 | inline ProcessResult operator>>(const ProcessResult& result, Writer& writer) 88 | { 89 | if (result.ok) 90 | { 91 | return writer.write(result.ast); 92 | } 93 | 94 | return result; 95 | } 96 | 97 | inline ProcessResult operator>>(const ProcessResult& result, Writer&& writer) 98 | { 99 | if (result.ok) 100 | { 101 | return writer.write(result.ast); 102 | } 103 | 104 | return result; 105 | } 106 | 107 | inline ProcessResult operator>>(const Node& ast, Rewriter& rewriter) 108 | { 109 | return rewriter.rewrite(ast->clone()); 110 | } 111 | 112 | inline ProcessResult operator>>(const Node& ast, Rewriter&& rewriter) 113 | { 114 | return rewriter.rewrite(ast->clone()); 115 | } 116 | 117 | inline ProcessResult operator>>(const Node& ast, Writer& writer) 118 | { 119 | return writer.write(ast->clone()); 120 | } 121 | 122 | inline ProcessResult operator>>(const Node& ast, Writer&& writer) 123 | { 124 | return writer.write(ast->clone()); 125 | } 126 | 127 | inline Reader operator>>=(Reader&& reader, Rewriter&& rewriter) 128 | { 129 | auto reader_passes = reader.passes(); 130 | for (auto pass : rewriter.passes()) 131 | { 132 | reader_passes.push_back(pass); 133 | } 134 | return Reader( 135 | reader.language_name(), 136 | reader_passes, 137 | reader.parser()); 138 | } 139 | 140 | } // namespace trieste 141 | -------------------------------------------------------------------------------- /parsers/json/parse.cc: -------------------------------------------------------------------------------- 1 | #include "internal.h" 2 | 3 | namespace trieste 4 | { 5 | namespace json 6 | { 7 | Parse parser() 8 | { 9 | Parse p(depth::file, wf_parse); 10 | std::shared_ptr> stack = 11 | std::make_shared>(); 12 | 13 | p("start", 14 | {"[ \r\n\t]+" >> [](auto&) { return; }, 15 | 16 | ":" >> [](auto& m) { m.add(Colon); }, 17 | 18 | "," >> [](auto& m) { m.add(Comma); }, 19 | 20 | "{" >> 21 | [stack](auto& m) { 22 | m.push(Object); 23 | m.push(Group); 24 | stack->push_back('{'); 25 | }, 26 | 27 | "}" >> 28 | [stack](auto& m) { 29 | if (stack->empty() || stack->back() != '{') 30 | { 31 | m.error("Mismatched braces"); 32 | return; 33 | } 34 | stack->pop_back(); 35 | m.term(); 36 | m.pop(Object); 37 | }, 38 | 39 | R"(\[)" >> 40 | [stack](auto& m) { 41 | m.push(Array); 42 | m.push(Group); 43 | stack->push_back('['); 44 | }, 45 | 46 | "]" >> 47 | [stack](auto& m) { 48 | if (stack->empty() || stack->back() != '[') 49 | { 50 | m.error("Mismatched brackets"); 51 | return; 52 | } 53 | stack->pop_back(); 54 | m.term(); 55 | m.pop(Array); 56 | }, 57 | 58 | "true" >> [](auto& m) { m.add(True); }, 59 | 60 | "false" >> [](auto& m) { m.add(False); }, 61 | 62 | "null" >> [](auto& m) { m.add(Null); }, 63 | 64 | // RE for a JSON number: 65 | // -? : optional minus sign 66 | // (?:0|[1-9][0-9]*) : either a single 0, or 1-9 followed by any digits 67 | // (?:\.[0-9]+)? : optionally, a single period followed by one or more 68 | // digits (fraction) 69 | // (?:[eE][-+]?[0-9]+)? : optionally, an exponent. This can start with 70 | // e or E, 71 | // have +/-/nothing, and then 1 or more digits 72 | R"(-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][-+]?[0-9]+)?)" >> 73 | [](auto& m) { m.add(Number); }, 74 | 75 | // RE for a JSON string: 76 | // " : a double quote followed by either: 77 | // 1. [^"\\\x00-\x1F]+ : one or more characters that are not a double 78 | // quote, backslash, 79 | // or a control character from 00-1f 80 | // 2. \\["\\\/bfnrt] : a backslash followed by one of the characters ", 81 | // \, /, b, f, n, r, or t 82 | // 3. \\u[[:xdigit:]]{4} : a backslash followed by u, followed by 4 hex 83 | // digits zero or more times and then " : a double quote 84 | R"("(?:[^"\\\x00-\x1F]+|\\["\\\/bfnrt]|\\u[[:xdigit:]]{4})*")" >> 85 | [](auto& m) { m.add(String); }, 86 | 87 | "." >> [](auto& m) { m.error("Invalid character"); }}); 88 | 89 | p.done([stack](auto& m) { 90 | if (!stack->empty()) 91 | { 92 | m.error("Mismatched braces or brackets"); 93 | } 94 | stack->clear(); 95 | }); 96 | 97 | return p; 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /parsers/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## JSON 2 | 3 | add_executable(json_fuzzer json_fuzzer.cc) 4 | enable_warnings(json_fuzzer) 5 | 6 | target_link_libraries(json_fuzzer 7 | PRIVATE 8 | trieste::json) 9 | 10 | add_executable(json_pattern_checker json_checker.cc) 11 | enable_warnings(json_pattern_checker) 12 | 13 | target_link_libraries(json_pattern_checker 14 | PRIVATE 15 | trieste::json) 16 | 17 | add_executable(json_test json_test.cc) 18 | enable_warnings(json_test) 19 | 20 | target_link_libraries(json_test 21 | PRIVATE 22 | trieste::json) 23 | 24 | add_executable(json_patch_test json_patch_test.cc) 25 | enable_warnings(json_patch_test) 26 | 27 | target_link_libraries(json_patch_test 28 | PRIVATE 29 | trieste::json) 30 | 31 | set(JSON_TEST_SUITE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../JSONTestSuite/test_parsing) 32 | set(JSON_PATCH_TEST_SUITE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../json-patch-tests/) 33 | 34 | add_test(NAME json_fuzz_reader COMMAND json_fuzzer reader -f WORKING_DIRECTORY $) 35 | add_test(NAME json_fuzz_writer COMMAND json_fuzzer writer -f WORKING_DIRECTORY $) 36 | add_test(NAME json_check_patterns COMMAND json_pattern_checker -w -i json-value WORKING_DIRECTORY $) 37 | add_test(NAME json_test COMMAND json_test -wf ${JSON_TEST_SUITE_ROOT} WORKING_DIRECTORY $) 38 | add_test(NAME json_test_manual COMMAND json_test -wf -n manual WORKING_DIRECTORY $) 39 | add_test(NAME json_patch_tests COMMAND json_patch_test -f ${JSON_PATCH_TEST_SUITE_ROOT}/tests.json WORKING_DIRECTORY $) 40 | add_test(NAME json_patch_spec_tests COMMAND json_patch_test -f ${JSON_PATCH_TEST_SUITE_ROOT}/spec_tests.json WORKING_DIRECTORY $) 41 | 42 | install(TARGETS json_fuzzer json_test json_patch_test RUNTIME DESTINATION parsers) 43 | 44 | ## YAML 45 | 46 | add_executable(yaml_fuzzer yaml_fuzzer.cc) 47 | enable_warnings(yaml_fuzzer) 48 | 49 | target_link_libraries(yaml_fuzzer 50 | PRIVATE 51 | trieste::yaml) 52 | 53 | add_executable(yaml_pattern_checker yaml_checker.cc) 54 | enable_warnings(yaml_pattern_checker) 55 | 56 | target_link_libraries(yaml_pattern_checker 57 | PRIVATE 58 | trieste::yaml) 59 | 60 | add_executable(yaml_test yaml_test.cc) 61 | enable_warnings(yaml_test) 62 | 63 | target_link_libraries(yaml_test 64 | PRIVATE 65 | trieste::yaml) 66 | 67 | set(YAML_TEST_SUITE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../yaml-test-suite) 68 | 69 | add_test(NAME yaml_fuzz_reader COMMAND yaml_fuzzer reader -f WORKING_DIRECTORY $) 70 | add_test(NAME yaml_fuzz_writer COMMAND yaml_fuzzer writer -f WORKING_DIRECTORY $) 71 | add_test(NAME yaml_fuzz_event_writer COMMAND yaml_fuzzer event_writer -f WORKING_DIRECTORY $) 72 | add_test(NAME yaml_fuzz_to_json COMMAND yaml_fuzzer to_json -f WORKING_DIRECTORY $) 73 | add_test(NAME yaml_check_patterns COMMAND yaml_pattern_checker WORKING_DIRECTORY $) 74 | add_test(NAME yaml_test COMMAND yaml_test -wf ${YAML_TEST_SUITE_ROOT} WORKING_DIRECTORY $) 75 | add_test(NAME yaml_test_crlf COMMAND yaml_test --crlf -wf ${YAML_TEST_SUITE_ROOT} WORKING_DIRECTORY $) 76 | 77 | install(TARGETS yaml_fuzzer yaml_test RUNTIME DESTINATION parsers) 78 | -------------------------------------------------------------------------------- /parsers/yaml/internal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "trieste/yaml.h" 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace trieste 10 | { 11 | namespace yaml 12 | { 13 | Parse parser(); 14 | 15 | enum class Chomp 16 | { 17 | Clip, 18 | Strip, 19 | Keep, 20 | }; 21 | 22 | std::string 23 | escape_chars(const std::string_view& str, const std::set& to_escape); 24 | std::string unescape_url_chars(const std::string_view& input); 25 | std::string replace_all( 26 | const std::string_view& v, 27 | const std::string_view& find, 28 | const std::string_view& replace); 29 | 30 | inline const auto Whitespace = TokenDef("yaml-whitespace", flag::print); 31 | inline const auto Hyphen = TokenDef("yaml-hyphen"); 32 | inline const auto Colon = TokenDef("yaml-colon"); 33 | inline const auto Comma = TokenDef("yaml-comma"); 34 | inline const auto NewLine = TokenDef("yaml-newline"); 35 | inline const auto Comment = TokenDef("yaml-comment", flag::print); 36 | inline const auto VerbatimTag = TokenDef("yaml-verbatimtag", flag::print); 37 | inline const auto ShorthandTag = TokenDef("yaml-shorthandtag", flag::print); 38 | inline const auto Tag = TokenDef("yaml-tag"); 39 | inline const auto IndentIndicator = 40 | TokenDef("yaml-indentation-indicator", flag::print); 41 | inline const auto FlowSequenceStart = 42 | TokenDef("yaml-flowseqstart", flag::print); 43 | inline const auto FlowSequenceEnd = 44 | TokenDef("yaml-flowseqend", flag::print); 45 | inline const auto FlowMappingStart = 46 | TokenDef("yaml-flowmapstart", flag::print); 47 | inline const auto FlowMappingEnd = TokenDef("yaml-flowmapend", flag::print); 48 | inline const auto MaybeDirective = 49 | TokenDef("yaml-maybedirective", flag::print); 50 | inline const auto Block = TokenDef("yaml-block"); 51 | inline auto WhitespaceLine = TokenDef("yaml-whitespace-line", flag::print); 52 | 53 | inline const auto wf_parse_tokens = Stream | Document | Hyphen | NewLine | 54 | Whitespace | Value | Int | Float | Hex | True | False | Null | Colon | 55 | TagDirective | Anchor | Alias | SingleQuote | DoubleQuote | 56 | VersionDirective | UnknownDirective | DocumentStart | DocumentEnd | Tag | 57 | TagPrefix | ShorthandTag | VerbatimTag | TagPrefix | TagHandle | Literal | 58 | Folded | IndentIndicator | ChompIndicator | Key | FlowMapping | 59 | FlowMappingStart | FlowMappingEnd | FlowSequence | FlowSequenceStart | 60 | FlowSequenceEnd | Comma | Comment | MaybeDirective; 61 | 62 | // clang-format off 63 | inline const auto wf_parse = 64 | (Top <<= File) 65 | | (File <<= Group) 66 | | (Stream <<= Group++) 67 | | (Document <<= Group) 68 | | (Tag <<= Group) 69 | | (FlowMapping <<= Group++) 70 | | (FlowSequence <<= Group++) 71 | | (TagDirective <<= Group) 72 | | (Group <<= wf_parse_tokens++[1]) 73 | ; 74 | // clang-format on 75 | 76 | inline auto err(Node node, const std::string& msg) 77 | { 78 | return Error << (ErrorMsg ^ msg) << (ErrorAst << node->clone()); 79 | } 80 | 81 | inline Node err(const NodeRange& r, const std::string& msg) 82 | { 83 | return Error << (ErrorMsg ^ msg) << (ErrorAst << r); 84 | } 85 | 86 | inline auto err(const std::string& msg) 87 | { 88 | return Error << (ErrorMsg ^ msg); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /include/trieste/defaultmap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "token.h" 7 | 8 | namespace trieste::detail 9 | { 10 | /** 11 | * Maps tokens to values, with a modifiable default value. 12 | * 13 | * This is used by matching system. If a rule applies generally, it is added 14 | * to all tokens, and if it applies to a specific token, it is added to that 15 | * token only. 16 | */ 17 | template 18 | class DefaultMap 19 | { 20 | // The default value for this map. This is returned when a specific value 21 | // has has not been set for the looked up token. 22 | T def{}; 23 | 24 | // The map of specific values for tokens. 25 | std::array map; 26 | 27 | // If this is true, then the map is empty, and the default value has not 28 | // been modified. 29 | bool empty_{true}; 30 | 31 | bool is_index_default(size_t index) const 32 | { 33 | return map[index] == &def; 34 | } 35 | 36 | size_t token_index(const Token& t) const 37 | { 38 | return t.default_map_hash(); 39 | } 40 | 41 | public: 42 | DefaultMap() 43 | { 44 | map.fill(&def); 45 | } 46 | 47 | DefaultMap(const DefaultMap& dm) : def(dm.def), empty_(dm.empty_) 48 | { 49 | for (size_t index = 0; index < map.size(); index++) 50 | { 51 | if (dm.is_index_default(index)) 52 | map[index] = &def; 53 | else 54 | map[index] = new T(*dm.map[index]); 55 | } 56 | } 57 | 58 | /** 59 | * Modify all values in the map, including the default value. 60 | * 61 | * This is used for adding rules that do not specify an explicit start 62 | * token, or an explicit parent, so they need to apply generally. 63 | */ 64 | template 65 | void modify_all(F f) 66 | { 67 | empty_ = false; 68 | for (size_t i = 0; i < map.size(); i++) 69 | if (!is_index_default(i)) 70 | f(*map[i]); 71 | f(def); 72 | } 73 | 74 | /** 75 | * Get a mutable reference to the value for a token. If this does not have 76 | * a current value, first fill it with the current default value. 77 | */ 78 | T& modify(const Token& t) 79 | { 80 | auto i = token_index(t); 81 | empty_ = false; 82 | // Use existing default set of rules. 83 | if (is_index_default(i)) 84 | map[i] = new T(def); 85 | return *map[i]; 86 | } 87 | 88 | /** 89 | * Get the value for a token. If this token has no specific value, return 90 | * the default value. 91 | */ 92 | T& get(const Token& t) 93 | { 94 | return *map[token_index(t)]; 95 | } 96 | 97 | /** 98 | * Clear all the values in the map, and the default value. 99 | */ 100 | void clear() 101 | { 102 | empty_ = true; 103 | for (size_t i = 0; i < map.size(); i++) 104 | { 105 | if (!is_index_default(i)) 106 | { 107 | delete map[i]; 108 | map[i] = &def; 109 | } 110 | } 111 | def.clear(); 112 | } 113 | 114 | ~DefaultMap() 115 | { 116 | clear(); 117 | } 118 | 119 | /** 120 | * Returns true if modify has not been called since the last clear. 121 | */ 122 | bool empty() const 123 | { 124 | return empty_; 125 | } 126 | }; 127 | } 128 | -------------------------------------------------------------------------------- /.github/workflows/buildtest.yml: -------------------------------------------------------------------------------- 1 | name: CMake 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | global-cmake-flags: -DTRIESTE_ENABLE_TESTING=1 11 | 12 | jobs: 13 | build-test: 14 | strategy: 15 | matrix: 16 | platform: [ "ubuntu-22.04", "macos-latest", "windows-latest" ] 17 | build-type: [ "Release", "Debug" ] 18 | # Note: cmake-options is missing here on purpose to let it be overridden by includes 19 | standard: [ "", "-DTRIESTE_USE_CXX17=ON" ] 20 | compiler: [ "", "clang" ] 21 | variant: [""] 22 | 23 | include: 24 | # ensures ubuntu-22.04 clang uses Ninja (modifies the matrix entry) 25 | - platform: "ubuntu-22.04" 26 | compiler: "clang" 27 | cmake-options: "-DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DTRIESTE_BUILD_PARSER_TESTS=1" 28 | generator: "-G Ninja" 29 | dependencies: "sudo apt install ninja-build clang-15" 30 | 31 | - platform: "windows-latest" 32 | variant: "build-parser-tests" 33 | build-type: "Release" 34 | cmake-options: "-DTRIESTE_BUILD_PARSER_TESTS=1" 35 | 36 | - platform: "macos-latest" 37 | variant: "build-parser-tests" 38 | build-type: "Release" 39 | cmake-options: "-DTRIESTE_BUILD_PARSER_TESTS=1" 40 | 41 | - platform: "ubuntu-22.04" 42 | variant: "asan" 43 | build-type: "Release" 44 | cmake-options: "-DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DTRIESTE_SANITIZE=address -DTRIESTE_BUILD_PARSER_TESTS=1" 45 | dependencies: "sudo apt install ninja-build clang-15" 46 | 47 | - platform: "macos-latest" 48 | variant: "asan" 49 | build-type: "Release" 50 | cmake-options: "-DTRIESTE_SANITIZE=address -DTRIESTE_BUILD_PARSER_TESTS=1" 51 | 52 | - platform: "ubuntu-22.04" 53 | variant: "ubsan" 54 | build-type: "Release" 55 | cmake-options: "-DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DTRIESTE_SANITIZE=undefined -DTRIESTE_BUILD_PARSER_TESTS=1" 56 | dependencies: "sudo apt install ninja-build clang-15" 57 | 58 | exclude: 59 | # Mac is already using clang. 60 | - platform: "macos-latest" 61 | compiler: "clang" 62 | # Windows is only using MSVC. 63 | - platform: "windows-latest" 64 | compiler: "clang" 65 | 66 | # Don't abort runners if a single one fails 67 | fail-fast: false 68 | 69 | runs-on: ${{matrix.platform}} 70 | 71 | name: ${{matrix.platform}} ${{matrix.build-type}} ${{matrix.standard}} ${{matrix.compiler}} ${{matrix.variant}} 72 | 73 | steps: 74 | - uses: actions/checkout@v3 75 | 76 | - name: Install build dependencies 77 | run: ${{matrix.dependencies}} 78 | 79 | - name: Configure CMake 80 | run: cmake -B ${{github.workspace}}/build ${{env.global-cmake-flags}} -DCMAKE_BUILD_TYPE=${{matrix.build-type}} ${{matrix.generator}} ${{matrix.standard}} ${{matrix.cmake-options}} 81 | 82 | - name: Build 83 | run: cmake --build ${{github.workspace}}/build --config ${{matrix.build-type}} 84 | 85 | - name: Test 86 | working-directory: ${{github.workspace}}/build 87 | run: ctest -C ${{matrix.build-type}} --output-on-failure --timeout 400 --interactive-debug-mode 0 88 | 89 | all-checks: 90 | # Join of all build-test jobs 91 | needs: [build-test] 92 | runs-on: ubuntu-22.04 93 | steps: 94 | - name: Check all jobs 95 | run: echo "All jobs passed" 96 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | # BasedOnStyle: LLVM 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: AlwaysBreak 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveDeclarations: false 8 | AlignEscapedNewlines: DontAlign 9 | AlignOperands: false 10 | AlignTrailingComments: false 11 | AllowAllParametersOfDeclarationOnNextLine: true 12 | AllowShortBlocksOnASingleLine: false 13 | AllowShortCaseLabelsOnASingleLine: false 14 | AllowShortFunctionsOnASingleLine: Empty 15 | AllowShortIfStatementsOnASingleLine: false 16 | AllowShortLoopsOnASingleLine: false 17 | AlwaysBreakAfterDefinitionReturnType: None 18 | AlwaysBreakAfterReturnType: None 19 | AlwaysBreakBeforeMultilineStrings: true 20 | AlwaysBreakTemplateDeclarations: true 21 | BinPackArguments: false 22 | BinPackParameters: false 23 | BraceWrapping: 24 | AfterCaseLabel: true 25 | AfterClass: true 26 | AfterControlStatement: true 27 | AfterEnum: true 28 | AfterFunction: true 29 | AfterNamespace: true 30 | AfterObjCDeclaration: true 31 | AfterStruct: true 32 | AfterUnion: true 33 | AfterExternBlock: true 34 | BeforeCatch: true 35 | BeforeElse: true 36 | IndentBraces: false 37 | SplitEmptyFunction: false 38 | SplitEmptyRecord: false 39 | SplitEmptyNamespace: false 40 | BreakBeforeBinaryOperators: None 41 | BreakBeforeBraces: Custom 42 | BreakBeforeInheritanceComma: false 43 | BreakBeforeTernaryOperators: false 44 | BreakConstructorInitializersBeforeComma: false 45 | BreakConstructorInitializers: BeforeColon 46 | BreakAfterJavaFieldAnnotations: false 47 | BreakStringLiterals: true 48 | ColumnLimit: 80 49 | CommentPragmas: '^ IWYU pragma:' 50 | CompactNamespaces: false 51 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 52 | ConstructorInitializerIndentWidth: 0 53 | ContinuationIndentWidth: 2 54 | Cpp11BracedListStyle: true 55 | DerivePointerAlignment: false 56 | DisableFormat: false 57 | ExperimentalAutoDetectBinPacking: false 58 | FixNamespaceComments: false 59 | ForEachMacros: 60 | - Q_FOREACH 61 | - BOOST_FOREACH 62 | IncludeBlocks: Regroup 63 | IncludeCategories: 64 | - Regex: '^"(llvm|llvm-c|clang|clang-c)/' 65 | Priority: 2 66 | - Regex: '^(<|"(gtest|gmock|isl|json)/)' 67 | Priority: 3 68 | - Regex: '.*' 69 | Priority: 1 70 | IncludeIsMainRegex: '(Test)?$' 71 | IndentCaseLabels: true 72 | IndentPPDirectives: AfterHash 73 | IndentWidth: 2 74 | IndentWrappedFunctionNames: false 75 | InsertNewlineAtEOF: true 76 | JavaScriptQuotes: Leave 77 | JavaScriptWrapImports: true 78 | KeepEmptyLinesAtTheStartOfBlocks: false 79 | MacroBlockBegin: '' 80 | MacroBlockEnd: '' 81 | MaxEmptyLinesToKeep: 1 82 | NamespaceIndentation: All 83 | ObjCBlockIndentWidth: 2 84 | ObjCSpaceAfterProperty: false 85 | ObjCSpaceBeforeProtocolList: true 86 | PenaltyBreakAssignment: 2 87 | PenaltyBreakBeforeFirstCallParameter: 19 88 | PenaltyBreakComment: 300 89 | PenaltyBreakFirstLessLess: 120 90 | PenaltyBreakString: 1000 91 | PenaltyExcessCharacter: 1000000 92 | PenaltyReturnTypeOnItsOwnLine: 60 93 | PointerAlignment: Left 94 | ReflowComments: true 95 | SortIncludes: true 96 | SortUsingDeclarations: true 97 | SpaceAfterCStyleCast: false 98 | SpaceAfterTemplateKeyword: false 99 | SpaceBeforeAssignmentOperators: true 100 | SpaceBeforeParens: ControlStatements 101 | SpaceInEmptyParentheses: false 102 | SpacesBeforeTrailingComments: 1 103 | SpacesInAngles: false 104 | SpacesInContainerLiterals: false 105 | SpacesInCStyleCastParentheses: false 106 | SpacesInParentheses: false 107 | SpacesInSquareBrackets: false 108 | Standard: Cpp11 109 | TabWidth: 2 110 | UseTab: Never 111 | ... 112 | 113 | -------------------------------------------------------------------------------- /parsers/README.md: -------------------------------------------------------------------------------- 1 | # Parsers 2 | 3 | In addition to providing the Trieste library, we have also provided reference parsers for JSON and YAML as practical examples. You (the language implementer) can use these language implementions as templates and guides for how to implement your own language toolchain using Trieste. In this document we will highlight the key features we believe a language implementation should have. You can learn more about implementing helper classes in the [`infix` tutorial](../samples/infix/README.md). 4 | 5 | ## WF Definition 6 | 7 | Each of the language implementations exposes various things in their respective namespaces. For example, the JSON implementation exposes this WF definition at `trieste::json::wf`: 8 | 9 | ```c++ 10 | inline const auto Object = TokenDef("json-object"); 11 | inline const auto Array = TokenDef("json-array"); 12 | inline const auto String = TokenDef("json-string", flag::print); 13 | inline const auto Number = TokenDef("json-number", flag::print); 14 | inline const auto True = TokenDef("json-true"); 15 | inline const auto False = TokenDef("json-false"); 16 | inline const auto Null = TokenDef("json-null"); 17 | inline const auto Member = TokenDef("json-member"); 18 | 19 | inline const auto wf_value_tokens = 20 | Object | Array | String | Number | True | False | Null; 21 | 22 | // clang-format off 23 | inline const auto wf = 24 | (Top <<= wf_value_tokens++[1]) 25 | | (Object <<= Member++) 26 | | (Member <<= String * (Value >>= wf_value_tokens)) 27 | | (Array <<= wf_value_tokens++) 28 | ; 29 | // clang-format on 30 | ``` 31 | 32 | This is the well-formedness definition which corresponds to the AST of a successfully parsed JSON document. There are two best practices here to keep in mind. The first is the C++ namespace. As a rule, it is a good idea for your language implementation to provide its own namespace, in which the tokens, WF definitions, and helper constructs will be exposed to your users. The second is the token prefixing, *i.e.*: 33 | 34 | ```c++ 35 | inline const auto Object = TokenDef("json-object"); 36 | ``` 37 | 38 | Tokens in Trieste must have unique names, and so using a prefix like `json-` ensures that the tokens used do not collide with tokens defined by other language implementations which may be at use in the project. 39 | 40 | ## Reader 41 | 42 | Every language implementation should expose a `Reader` helper in its namespace. For example, here is the one for JSON: 43 | 44 | ```c++ 45 | Reader reader(bool allow_multiple = false); 46 | ``` 47 | 48 | Any parsing settings should be passed to this method. For example, here we see a flag saying whether the parser allows there to be multiple JSON values at the top level of a document (a very common variant of the language). As a best practice, these should always be provided with a default value (if possible) so that your users can create a `Reader` object as simply as possible. 49 | 50 | ## Writer 51 | 52 | Whether your language implementation exposes a `Writer` depends entirely on the language. In the case of data formats like JSON and YAML, it makes a lot of sense and as such we have included them in our implementations. Naming them like `json::writer()` or `yaml::event_writer()`, that is with `writer` in the name and a prefix indicating if it is not what the "default" writer would be (for example, `yaml::event_writer()` for YAML event files) is considered best practice. 53 | 54 | # Rewriter 55 | 56 | One of the unique affordances given by Trieste as a library is that the workflow to translate from one language to another is the same as everything else: multi-pass AST rewriting. To provide an example of this we expose `yaml::to_json()`, which returns a `Rewriter` that converts a YAML AST to a JSON AST. Best practice for exposing these helpers is as show here, that is to say `::to_`. 57 | -------------------------------------------------------------------------------- /parsers/yaml/yamlc.cc: -------------------------------------------------------------------------------- 1 | #include "trieste/json.h" 2 | #include "trieste/trieste.h" 3 | #include "trieste/yaml.h" 4 | 5 | #include 6 | 7 | using namespace trieste; 8 | 9 | int main(int argc, char** argv) 10 | { 11 | CLI::App app; 12 | 13 | std::filesystem::path input_path; 14 | app.add_option("input", input_path, "Path to the input file ")->required(); 15 | 16 | std::filesystem::path output_path; 17 | app.add_option("output", output_path, "Path to the output file"); 18 | 19 | std::filesystem::path debug_path; 20 | app.add_option( 21 | "-a,--ast", 22 | debug_path, 23 | "Output the AST (debugging for the reader/rewriter/writer workflows)"); 24 | 25 | bool wf_checks{false}; 26 | app.add_flag("-w,--wf", wf_checks, "Enable well-formedness checks (slow)"); 27 | 28 | bool prettyprint{false}; 29 | app.add_flag( 30 | "--prettyprint", prettyprint, "Pretty print the output (for JSON)"); 31 | 32 | bool sort_keys{false}; 33 | app.add_flag( 34 | "--sort-keys", sort_keys, "Sort object keys in the output (for JSON)"); 35 | 36 | auto modes = {"event", "json", "yaml"}; 37 | std::string mode; 38 | app.add_option("-m,--mode", mode, "Output mode.") 39 | ->transform(CLI::IsMember(modes)); 40 | 41 | try 42 | { 43 | app.parse(argc, argv); 44 | } 45 | catch (const CLI::ParseError& e) 46 | { 47 | return app.exit(e); 48 | } 49 | 50 | if (mode.empty()) 51 | { 52 | if (output_path.extension() == ".event") 53 | { 54 | mode = "event"; 55 | } 56 | else if (output_path.extension() == ".json") 57 | { 58 | mode = "json"; 59 | } 60 | else if (output_path.extension() == ".yaml") 61 | { 62 | mode = "yaml"; 63 | } 64 | else 65 | { 66 | std::cerr << "Output mode not specified and could not be inferred from " 67 | "the output file extension." 68 | << std::endl; 69 | return 1; 70 | } 71 | } 72 | 73 | trieste::Reader reader = yaml::reader() 74 | .file(input_path) 75 | .debug_enabled(!debug_path.empty()) 76 | .debug_path(debug_path / "inyaml") 77 | .wf_check_enabled(wf_checks); 78 | Destination dest = output_path.empty() ? 79 | DestinationDef::console() : 80 | DestinationDef::dir(output_path.parent_path()); 81 | if (output_path.empty()) 82 | { 83 | output_path = mode; 84 | } 85 | 86 | ProcessResult result; 87 | if (mode == "event") 88 | { 89 | result = reader >> yaml::event_writer(output_path) 90 | .destination(dest) 91 | .debug_enabled(!debug_path.empty()) 92 | .debug_path(debug_path / "event") 93 | .wf_check_enabled(wf_checks); 94 | ; 95 | } 96 | else if (mode == "json") 97 | { 98 | result = reader >> yaml::to_json() 99 | .debug_enabled(!debug_path.empty()) 100 | .debug_path(debug_path / "json") 101 | .wf_check_enabled(wf_checks) >> 102 | json::writer(output_path, prettyprint, sort_keys) 103 | .destination(dest) 104 | .debug_enabled(!debug_path.empty()) 105 | .debug_path(debug_path) 106 | .wf_check_enabled(wf_checks); 107 | ; 108 | } 109 | else 110 | { 111 | result = reader >> yaml::writer(output_path.filename().string()) 112 | .destination(dest) 113 | .debug_enabled(!debug_path.empty()) 114 | .debug_path(debug_path / "outyaml") 115 | .wf_check_enabled(wf_checks); 116 | ; 117 | } 118 | 119 | if (!result.ok) 120 | { 121 | logging::Error err; 122 | result.print_errors(err); 123 | return 1; 124 | } 125 | 126 | return 0; 127 | } 128 | -------------------------------------------------------------------------------- /notes/von.md: -------------------------------------------------------------------------------- 1 | # Verona Object Notation 2 | 3 | Design goals, in order: 4 | - Efficient zero-copy random access. 5 | - Typed. 6 | - Arbitrary graph. 7 | - Efficient to encode. 8 | - Possible to read some values with incomplete data (streaming friendly). 9 | - Bijective human readable format. 10 | 11 | Non-goals: 12 | - Updateable or versioning. A consumer may understand a different type table (or type table digest) to indicate a new version, but there is no expectation that an unaware consumer can understand a new version. 13 | - Compression. This can be applied during storage or transport, but it isn't part of the format. 14 | 15 | ## Types 16 | 17 | A type for a file is a 64-bit count of the number of types, followed by an entry for each type. This is followed by a 64-bit index into the type table for the root object type. 18 | 19 | A `name` in a type is a 64-bit length followed by a number of bytes equal to the length. A `name` is a valid UTF-8 string. These are used only for human readable accessors. 20 | 21 | A type is described as follows: 22 | 23 | 00000000: void 24 | 00000001: bit 25 | 00000010: byte 26 | 00000011: fixed size vector, followed by a type and 64-bit length 27 | 00000100: variable size vector, followed by a type 28 | 00000101: variant, followed by a 64-bit option count and a number of types equal to the option count 29 | 00000110: record, followed by a 64-bit field count and a number of pairs of `type, name` equal to the field count, where the `name` must not be repeated 30 | 00000111: pointer, followed by a 64-bit index into the type table 31 | 00001000: type index, followed by a 64-bit index into the type table 32 | 001xxxxx: floating point, with a 5 bit size class 33 | 010xxxxx: unsigned integer, with a 5 bit size class 34 | 011xxxxx: integer, with a 5 bit size class 35 | 36 | ## File Format 37 | 38 | The file begins with a 1-bit indicator whether it is self-describing or if the consumer is expected to understand the type. 39 | 40 | Next is a 63-bit file length. All offsets must be less than this length. Offsets are relative to the start of the file, and are encoded differently depending on the size of the file. 41 | 42 | If the file length is less than 2^16, then all offsets are 16-bit. If the file length is less than 2^32, then all offsets are 32-bit. Otherwise, all offsets are 64-bit. This unsigned integer size will be called a size_t in the rest of this document. 43 | 44 | If the file is self-describing, next is the type table. Otherwise, next is a SHA-256 digest of the type table. The consumer is expected to be able to map this digest to the type table. 45 | 46 | The root object follows. This must be a valid object of the root type. 47 | 48 | ## Values 49 | 50 | Void is zero length. 51 | 52 | A bit is a single byte with value 0 or 1. 53 | 54 | Bytes are encoded directly. 55 | 56 | Fixed size vectors are encoded as a series of values of the correct type. A bit vector is packed into the minimal byte count. 57 | 58 | Variable size vectors are encoded as a size_t offset into the file. At that location in the file, there is a size_t length followed by a series of values of the correct type. 59 | 60 | Variants are encoded as a size_t option index followed by a value of the correct type. This is padded with 0 bits to the size of the largest option. It may be more efficient to encode a variant as a pointer to a variant. 61 | 62 | Records are encoded as a series of values of the correct type. 63 | 64 | Floating point numbers are encoded as IEEE 754-2019 bit patterns. The size of the bit pattern is taken from the type. 65 | 66 | Integers and unsigned integers are directly encoded. The type gives the size of the encoding. 67 | 68 | Pointers are encoded as size_t offsets into the file. The object at that offset must be of the type specified by the pointer. If the offset is 0, then the pointer is null. 69 | 70 | ## Human Readable Format 71 | 72 | ```ts 73 | types 74 | { 75 | sha256: array 32 byte 76 | foobar: record { foo: foo; bar: bar } 77 | foo: variant { i32; string } 78 | bar: string 79 | string: array * byte 80 | opt_foobar: variant { pointer foobar; void } 81 | } 82 | 83 | $1: foobar = { foo = 0 42; bar = $2 } 84 | $2: string = 13:Hello, World! 85 | ``` 86 | -------------------------------------------------------------------------------- /parsers/yaml/README.md: -------------------------------------------------------------------------------- 1 | # YAML 2 | 3 | The files in this directory provide a [YAML 1.2.2](https://yaml.org/spec/1.2.2/) compliant language implementation using Trieste. In addition to parsing YAML, we also provide the capability to output YAML event files, JSON, and canonically formatted YAML. 4 | 5 | We validate our implementation against the official [YAML test suite](https://github.com/yaml/yaml-test-suite). 6 | 7 | ## Getting Started 8 | 9 | To use Trieste YAML in your own codebase you will need to configure your CMake project with the `TRIESTE_BUILD_PARSERS` flag set. Trieste YAML definitions are in the `trieste::yaml` namespace, and to access them you need to include the `trieste/yaml.h` header. The YAML implementation is based around the following well-formedness definition (copied here from the header file): 10 | 11 | ```c++ 12 | inline const auto wf_tokens = Mapping | Sequence | Value | Int | Float | 13 | True | False | Hex | Null | SingleQuote | DoubleQuote | Plain | 14 | AnchorValue | Alias | TagValue | Literal | Folded | Empty | FlowMapping | 15 | FlowSequence; 16 | 17 | inline const auto wf_flow_tokens = wf_tokens - (Mapping | Sequence); 18 | 19 | inline const auto wf = 20 | (Top <<= Stream) 21 | | (Stream <<= Directives * Documents) 22 | | (Documents <<= Document++) 23 | | (Document <<= Directives * DocumentStart * (Value >>= wf_tokens) * DocumentEnd) 24 | | (Directives <<= (TagDirective | VersionDirective | UnknownDirective)++) 25 | | (TagDirective <<= TagPrefix * TagHandle)[TagPrefix] 26 | | (Mapping <<= MappingItem++[1]) 27 | | (MappingItem <<= (Key >>= wf_tokens) * (Value >>= wf_tokens)) 28 | | (FlowMapping <<= FlowMappingItem++) 29 | | (FlowMappingItem <<= (Key >>= wf_flow_tokens) * (Value >>= wf_flow_tokens)) 30 | | (AnchorValue <<= Anchor * (Value >>= wf_tokens))[Anchor] 31 | | (TagValue <<= TagPrefix * TagName * (Value >>= wf_tokens)) 32 | | (Sequence <<= wf_tokens++[1]) 33 | | (FlowSequence <<= wf_flow_tokens++) 34 | | (SingleQuote <<= (BlockLine|EmptyLine)++[1]) 35 | | (DoubleQuote <<= (BlockLine|EmptyLine)++[1]) 36 | | (Literal <<= AbsoluteIndent * ChompIndicator * Lines) 37 | | (Folded <<= AbsoluteIndent * ChompIndicator * Lines) 38 | | (Lines <<= (BlockLine|EmptyLine)++) 39 | | (Plain <<= (BlockLine|EmptyLine)++[1]) 40 | ; 41 | ``` 42 | 43 | The language implementation exposes the following helpers: 44 | 45 | - `reader()` - `Reader` that parses any valid 1.2.2 YAML file and produces an AST that conforms to `yaml::wf`. 46 | - `writer()` - `Writer` that takes a YAML AST that conforms to `yaml::wf` and produces a YAML file. 47 | - `event_writer()` - `Writer` that takes a YAML AST and produces a YAML event file. 48 | - `to_json` - `Rewriter` that takes a YAML AST and converts it to a JSON AST that conforms to `json::wf`. 49 | 50 | ## Using the `yamlc` Executable 51 | 52 | One of the targets that will be written to the `dist/parsers` directory is an executable named `yamlc` (which will 53 | only be built if the `TRIESTE_BUILD_PARSER_TOOLS` flag is set). It has the following usage information: 54 | 55 | ``` 56 | Usage: parsers/yamlc [OPTIONS] input [output] 57 | 58 | Positionals: 59 | input TEXT REQUIRED Path to the input file 60 | output TEXT Path to the output file 61 | 62 | Options: 63 | -h,--help Print this help message and exit 64 | -a,--ast TEXT Output the AST (debugging for the reader/rewriter/writer workflows) 65 | -w,--wf Enable well-formedness checks (slow) 66 | --prettyprint Pretty print the output (for JSON) 67 | -m,--mode TEXT:{event,json,yaml} 68 | Output mode. 69 | ``` 70 | 71 | It can be used to convert YAML to one of three output formats: 72 | 73 | - `event` a YAML event file 74 | - `json` a JSON file (if the YAML stream has more than one document, this JSON will have multiple values at the top level) 75 | - `yaml` a canonical YAML file. While there is no formal definition for canonical YAML, we have tried to produce a file that would be easily parseable by the majority of YAML parsers. 76 | 77 | > **Note** 78 | > Our implementation is CRLF aware and compatible, in that if the input file contains CRLF line endings they 79 | > will be handled correctly and preserved in the output. -------------------------------------------------------------------------------- /parsers/json/reader.cc: -------------------------------------------------------------------------------- 1 | #include "internal.h" 2 | 3 | namespace 4 | { 5 | using namespace trieste; 6 | using namespace trieste::json; 7 | 8 | std::size_t 9 | invalid_tokens(Node node, const std::map& token_messages) 10 | { 11 | std::size_t changes = 0; 12 | 13 | node->traverse([&](Node& n) { 14 | if (n->type() == Error) 15 | return false; 16 | 17 | for (Node& child : *n) 18 | { 19 | if (token_messages.count(child->type()) > 0) 20 | { 21 | n->replace(child, err(child, token_messages.at(child->type()))); 22 | changes += 1; 23 | } 24 | } 25 | return true; 26 | }); 27 | 28 | return changes; 29 | } 30 | 31 | 32 | // clang-format off 33 | inline const auto wf_groups = 34 | (Top <<= wf_value_tokens++[1]) 35 | | (Object <<= ObjectGroup) 36 | | (Array <<= ArrayGroup) 37 | | (ObjectGroup <<= (wf_value_tokens | Colon | Comma)++) 38 | | (ArrayGroup <<= (wf_value_tokens | Comma)++) 39 | ; 40 | // clang-format on 41 | 42 | const auto ValueToken = T(Object, Array, String, Number, True, False, Null); 43 | 44 | PassDef groups(bool allow_multiple) 45 | { 46 | PassDef groups = { 47 | "groups", 48 | wf_groups, 49 | dir::bottomup, 50 | { 51 | In(Array) * T(Group)[Group] >> 52 | [](Match& _) { return ArrayGroup << *_[Group]; }, 53 | 54 | In(Object) * T(Group)[Group] >> 55 | [](Match& _) { return ObjectGroup << *_[Group]; }, 56 | 57 | In(Top) * 58 | (T(File) << ((T(Group) << (ValueToken++[Value] * End)) * End)) >> 59 | [allow_multiple](Match& _) { 60 | auto values = _[Value]; 61 | if (values.empty()) 62 | { 63 | return err("Invalid JSON"); 64 | } 65 | 66 | if (values.size() > 1 && !allow_multiple) 67 | { 68 | return err("Multiple top-level values not allowed"); 69 | } 70 | 71 | return Seq << _[Value]; 72 | }, 73 | 74 | // errors 75 | In(Top) * T(File)[File] >> 76 | [](Match& _) { return err(_[File], "Invalid JSON"); }, 77 | 78 | In(ArrayGroup) * T(Colon)[Colon] >> 79 | [](Match& _) { return err(_[Colon], "Invalid colon in array"); }, 80 | }}; 81 | 82 | return groups; 83 | } 84 | 85 | PassDef structure() 86 | { 87 | PassDef structure = { 88 | "structure", 89 | json::wf, 90 | dir::bottomup, 91 | { 92 | In(ArrayGroup) * (Start * ValueToken[Value]) >> 93 | [](Match& _) { return (Value << _(Value)); }, 94 | 95 | In(ArrayGroup) * (T(Value)[Lhs] * T(Comma) * ValueToken[Rhs]) >> 96 | [](Match& _) { return Seq << _(Lhs) << (Value << _(Rhs)); }, 97 | 98 | In(Array) * (T(ArrayGroup) << (T(Value)++[Array] * End)) >> 99 | [](Match& _) { return Seq << _[Array]; }, 100 | 101 | In(Array) * T(Value)[Value] >> 102 | [](Match& _) { return _(Value)->front(); }, 103 | 104 | In(ObjectGroup) * 105 | (Start * T(String)[Lhs] * T(Colon) * ValueToken[Rhs]) >> 106 | [](Match& _) { 107 | Location key = _(Lhs)->location(); 108 | key.pos += 1; 109 | key.len -= 2; 110 | return (Member << (Key ^ key) << _(Rhs)); 111 | }, 112 | 113 | In(ObjectGroup) * 114 | (T(Member)[Member] * T(Comma) * T(String)[Lhs] * T(Colon) * 115 | ValueToken[Rhs]) >> 116 | [](Match& _) { 117 | Location key = _(Lhs)->location(); 118 | key.pos += 1; 119 | key.len -= 2; 120 | return Seq << _(Member) << (Member << (Key ^ key) << _(Rhs)); 121 | }, 122 | 123 | In(Object) * (T(ObjectGroup) << (T(Member)++[Object] * End)) >> 124 | [](Match& _) { return Seq << _[Object]; }, 125 | }}; 126 | 127 | structure.post([&](Node n) { 128 | return invalid_tokens( 129 | n, {{ObjectGroup, "Invalid object"}, {ArrayGroup, "Invalid array"}}); 130 | }); 131 | 132 | return structure; 133 | } 134 | } 135 | 136 | namespace trieste 137 | { 138 | namespace json 139 | { 140 | Reader reader(bool allow_multiple) 141 | { 142 | return Reader{"json", {groups(allow_multiple), structure()}, parser()}; 143 | } 144 | } 145 | 146 | } 147 | -------------------------------------------------------------------------------- /notes/dispatch.md: -------------------------------------------------------------------------------- 1 | # Dispatch 2 | 3 | For some `(call (selector f) (args (copy a) (copy b)))`, this could have been: 4 | - `a.f(b)` 5 | - `a f b` 6 | - `f(a, b)` 7 | 8 | We need to discover all possible functions `f` that could be called. This includes: 9 | - Functions `f` that can be looked up from the source location (static dispatch). 10 | - Functions `f` defined on the dynamic type of `a` (dynamic dispatch). 11 | 12 | When calling `f`, we need to rank the possible functions and choose one. 13 | 14 | ## Motivation 15 | 16 | The supposition is that using the same syntax for static and dynamic dispatch allows for better EDSLs. 17 | 18 | ## Extending Classes 19 | 20 | For modularity, extensions should be scoped. 21 | - You can import the extensions with `use `. 22 | 23 | When making a dynamic call: 24 | - We know all extension methods that are in scope. 25 | - For each extended selector, build a map of `type descriptor -> function pointer`. 26 | - If the selector has been extended: 27 | - Look up the function pointer in the selector map. 28 | - If there's no entry in the selector map, look up the function pointer in the receiver's type descriptor. 29 | - If no `class` that's been extended with this selector is a subtype of the receiver's static type, this check can be elided. 30 | - Else look up the selector on the receiver's type descriptor. 31 | - If the static type of the receiver is concrete, all dynamic dispatch including extension can be turned into static dispatch. 32 | 33 | When making a dynamic call: 34 | - We know all extension methods that are in scope. 35 | - For each extended type, build an alternate local type descriptor. 36 | - Keep a local map of `type descriptor -> type descriptor`. 37 | - If the selector has been extended for any type: 38 | - Look up the local type descriptor in the map. 39 | - This can be optimised such that for any given object, the local type descriptor is only looked up once. 40 | - If there's no entry in the map, use the receiver's type descriptor. 41 | - Else use the receiver's type descriptor. 42 | 43 | ```rust 44 | type Eq = 45 | { 46 | ==(self: Self, other: Self): Bool 47 | } 48 | 49 | class Foo 50 | { 51 | var x: I32 52 | } 53 | 54 | Foo::==(self: Foo, other: Foo): Bool 55 | { 56 | // No access to private members of Foo unless the enclosing scope has access 57 | // to private members of Foo. 58 | self.x == other.x 59 | } 60 | 61 | ``` 62 | 63 | ## Ranking 64 | 65 | Static or Dynamic 66 | - Dot notation and unscoped names are dynamic dispatch. 67 | - Scoped names are static dispatch. 68 | - Allow `::name` as a scoped name, meaning lookup only, no lookdown phase. 69 | 70 | Arity 71 | - Given a call of arity N, select only `f/N`. 72 | - There must be only one `f/N` for a given `N`. 73 | - Generate partial application functions. 74 | - For f/3, generate f/2, f/1, f/0, if they don't exist. 75 | 76 | No static type based overloading. 77 | 78 | ## Type Inference 79 | 80 | Dynamic: 81 | ```ts 82 | (bind r T1 (call (selector f/2 (typeargs)) (args a b))) 83 | ``` 84 | Result: 85 | - `a: T2` 86 | - `b: T3` 87 | - `T2 <: { f/2: (T2, T3)->T1 }` 88 | 89 | Static: 90 | ```ts 91 | (bind r T1 (call (functionname f/2 (typeargs)) (args a b))) 92 | ``` 93 | Result: 94 | - `a: T2` 95 | - `b: T3` 96 | - `f/2: T4` 97 | - `T4 <: (T2, T3)->T1` 98 | 99 | ## Partial Application 100 | 101 | ```ts 102 | class A 103 | { 104 | class $f_0 105 | { 106 | create(): $f_0 107 | { 108 | new () 109 | } 110 | 111 | apply(self: $f_0, a: A): R 112 | { 113 | $f_1::create(a) 114 | } 115 | 116 | apply(self: $f_0, a: A, b: B): R 117 | { 118 | $f_2::create(a, b) 119 | } 120 | 121 | apply(self: $f_0, a: A, b: B, c: C): R 122 | { 123 | A::f(a, b, c) 124 | } 125 | } 126 | 127 | class $f_1 128 | { 129 | let a: A 130 | 131 | create(a: A): $f_1 132 | { 133 | new (a) 134 | } 135 | 136 | apply(self: $f_1, b: B): R 137 | { 138 | $f_2::create(self.a, b) 139 | } 140 | 141 | apply(self: $f_1, b: B, c: C): R 142 | { 143 | A::f(self.a, b, c) 144 | } 145 | } 146 | 147 | class $f_2 148 | { 149 | let a: A 150 | let b: B 151 | 152 | create(a: A, b: B): $f_2 153 | { 154 | new (a, b) 155 | } 156 | 157 | apply(self: $f_2, c: C): R 158 | { 159 | A::f(self.a, self.b, c) 160 | } 161 | } 162 | 163 | f(a: A, b: B, c: C): R 164 | { 165 | // ... 166 | } 167 | 168 | f(a: A, b: B): $f_2 169 | { 170 | $f_2::create(a, b) 171 | } 172 | 173 | f(a: A): $f_1 174 | { 175 | $f_1::create(a) 176 | } 177 | 178 | f(): $f_0 179 | { 180 | $f_0::create() 181 | } 182 | } 183 | ``` 184 | -------------------------------------------------------------------------------- /test/intrusive_ptr_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | struct Dummy : public trieste::intrusive_refcounted 8 | { 9 | size_t tag; 10 | 11 | Dummy(size_t tag_) : tag{tag_} {} 12 | }; 13 | 14 | using ptr_t = trieste::intrusive_ptr; 15 | using ActionFn = ptr_t(ptr_t); 16 | 17 | std::vector actions{ 18 | [](ptr_t ptr) -> ptr_t { 19 | if (ptr == nullptr) 20 | { 21 | std::cout << "Should only be setting to nullptr once per thread!" 22 | << std::endl; 23 | std::abort(); 24 | } 25 | return nullptr; // dec_ref on this ptr 26 | }, 27 | [](ptr_t ptr) { 28 | auto tmp = std::move(ptr); 29 | return tmp; 30 | }, 31 | [](ptr_t ptr) { 32 | auto tmp = ptr; 33 | return ptr; 34 | }, 35 | [](ptr_t ptr) { 36 | auto& alias = ptr; 37 | alias = ptr; 38 | return ptr; 39 | }, 40 | }; 41 | 42 | struct Behavior 43 | { 44 | size_t action_idx; 45 | size_t ptr_idx; 46 | 47 | bool operator<(const Behavior& other) const 48 | { 49 | return std::pair{action_idx, ptr_idx} < 50 | std::pair{other.action_idx, other.ptr_idx}; 51 | } 52 | }; 53 | 54 | struct Test 55 | { 56 | size_t ptr_count; 57 | std::vector> thread_behaviors; 58 | 59 | void run() const 60 | { 61 | // Each thread gets its own copy of an array of N pointers, where every 62 | // thread shares refcounts with every other thread. 63 | std::vector> ptrs_per_thread; 64 | ptrs_per_thread.emplace_back(); 65 | for (size_t i = 0; i < ptr_count; ++i) 66 | { 67 | ptrs_per_thread.front().push_back(ptr_t::make(i)); 68 | } 69 | while (ptrs_per_thread.size() < thread_behaviors.size()) 70 | { 71 | ptrs_per_thread.push_back(ptrs_per_thread.back()); 72 | } 73 | 74 | std::vector threads; 75 | for (size_t i = 0; i < thread_behaviors.size(); ++i) 76 | { 77 | threads.emplace_back([&, i]() { 78 | for (auto& behavior : thread_behaviors.at(i)) 79 | { 80 | auto& ptr = ptrs_per_thread.at(i).at(behavior.ptr_idx); 81 | ptr = actions[behavior.action_idx](ptr); 82 | } 83 | }); 84 | } 85 | 86 | for (auto& thread : threads) 87 | { 88 | thread.join(); 89 | } 90 | 91 | // Sanity check: every thread should be setting their ptr to nullptr at some 92 | // point 93 | for (const auto& ptrs : ptrs_per_thread) 94 | { 95 | for (const auto& ptr : ptrs) 96 | { 97 | if (ptr != nullptr) 98 | { 99 | std::cout << "non-null ptr!" << std::endl; 100 | std::abort(); 101 | } 102 | } 103 | } 104 | } 105 | }; 106 | 107 | std::vector 108 | build_tests(size_t ptr_count, size_t thread_count, size_t permutations) 109 | { 110 | std::vector all_behaviors; 111 | for (size_t action_idx = 0; action_idx < actions.size(); ++action_idx) 112 | { 113 | for (size_t ptr_idx = 0; ptr_idx < ptr_count; ++ptr_idx) 114 | { 115 | all_behaviors.push_back({ 116 | action_idx, 117 | ptr_idx, 118 | }); 119 | } 120 | } 121 | 122 | std::vector tests = {{ptr_count, {}}}; 123 | for (size_t i = 0; i < thread_count; ++i) 124 | { 125 | std::vector next_tests; 126 | for (const auto& test : tests) 127 | { 128 | // Allow adding some extra permutations if you think you're stuck at the 129 | // first few. 130 | for (size_t permutation_idx = 0; permutation_idx < permutations; 131 | ++permutation_idx) 132 | { 133 | auto mod_test = test; 134 | mod_test.thread_behaviors.push_back(all_behaviors); 135 | next_tests.push_back(mod_test); 136 | 137 | // Unconditionally permute the behaviors. We're not looking for total 138 | // coverage, just variety. 139 | std::next_permutation(all_behaviors.begin(), all_behaviors.end()); 140 | } 141 | } 142 | tests = next_tests; 143 | } 144 | return tests; 145 | } 146 | 147 | // The intention of this test is to do a lot of work to refcounts, while under 148 | // some kind of thread sanitizer. Changing the intrusive_ptr implementation to 149 | // use non-atomic refcounting should make Clang's thread sanitizer unhappy, for 150 | // instance, whereas with the current thread-safe implementation everything 151 | // _should_ be fine. 152 | int main() 153 | { 154 | // Be very careful when increasing these numbers... they can quickly eat up 155 | // your memory and time. 156 | auto tests = build_tests(3, 6, 4); 157 | std::cout << "Found " << tests.size() << " permutations." << std::endl; 158 | 159 | for (auto test : tests) 160 | { 161 | test.run(); 162 | } 163 | 164 | std::cout << "Ran " << tests.size() << " permutations." << std::endl; 165 | return 0; 166 | } 167 | -------------------------------------------------------------------------------- /parsers/json/README.md: -------------------------------------------------------------------------------- 1 | # JSON 2 | 3 | The files in this directory provide a [RFC 8259](https://www.rfc-editor.org/rfc/rfc8259) compliant JSON language implementation using Trieste. It is able to both read and write JSON files. 4 | 5 | We are grateful to the maintainers of the [JSONTestSuite](https://github.com/nst/JSONTestSuite), which we use to ensure compliance with RFC 8259. 6 | 7 | ## Getting Started 8 | 9 | To use Trieste JSON in your own codebase you will need to configure your CMAKE project with the `TRIESTE_BUILD_PARSERS` flag set. Trieste JSON definitions are in the `trieste::json` namespace, and to access them you need to include the `trieste/json.h` header. The JSON implementation is based around the following well-formedness definition (copied here from the header file): 10 | 11 | ```cpp 12 | inline const auto wf_value_tokens = 13 | Object | Array | String | Number | True | False | Null; 14 | 15 | // clang-format off 16 | inline const auto wf = 17 | (Top <<= wf_value_tokens++[1]) 18 | | (Object <<= Member++) 19 | | (Member <<= String * (Value >>= wf_value_tokens)) 20 | | (Array <<= wf_value_tokens++) 21 | ; 22 | // clang-format on 23 | ``` 24 | 25 | The language implementation exposes the following helpers: 26 | 27 | - `reader()` - `Reader` that parses any valid JSON file and produces an AST that conforms to `json::wf`. Optionally, this reader will read non-compliant JSON files which contain more than one JSON value at the top level of the file. 28 | - `writer()` - `Writer` that takes a JSON AST that conforms to `json::wf` and produces a JSON file. 29 | 30 | ## Creating JSON Nodes 31 | 32 | The public API exposes several methods to help create JSON documents: 33 | 34 | ```cpp 35 | Node object = json::object( 36 | {json::member("key_a_str", "value"), 37 | json::member("key_b_number", 42), 38 | json::member("key_c_bool", json::boolean(true)), 39 | json::member("key_d_null", json::null()), 40 | json::member("key_e_array", json::array({json::value(1), json::value(2)})), 41 | json::member( 42 | "key_f_object", 43 | json::object({json::member("key", json::value("value"))}))}); 44 | 45 | std::cout << json::to_string(object) << std::endl; 46 | // {"key_a_str":"value","key_b_number":42,"key_c_bool":true,"key_d_null":null,"key_e_array":[1,2],"key_f_object":{"key":"value"}} 47 | 48 | Nodes elements; 49 | elements.push_back(json::value(1)); 50 | elements.push_back(json::value("two")); 51 | elements.push_back(json::boolean(false)); 52 | elements.push_back(json::null()); 53 | Node array = json::array(elements.begin(), elements.end()); 54 | ``` 55 | 56 | ## Reading values 57 | There are also methods to get C++ values back out of nodes, such as: 58 | 59 | - `optional get_number(const Node&)` 60 | 61 | Attempts to get a number out of a `Node`. If the node is not of type `Number`, or cannot be parsed as a double, returns `nullopt`. 62 | - `optional get_boolean(const Node&)` 63 | 64 | Attempts to get a boolean value out of a `Node`. If the node is not of type `True` or `False`, returns `nullopt`. 65 | - `optional get_string(const Node&)` 66 | 67 | Attempts to get a string out of a `Node`. This will remove the double quotation marks. If the node is not of type `String`, returns `nullopt`. 68 | 69 | You can also use a [JSON Pointer](https://www.rfc-editor.org/rfc/rfc6901) to select nodes out of a document using the `select()` function: 70 | 71 | ```cpp 72 | std::cout << "c: " << json::select(object, {"/key_c_bool"}).value(); 73 | // c: (json-true) 74 | 75 | std::cout << "a: " << json::select_string(object, {"/key_a_str"}).value() << std::endl; 76 | // a: value 77 | 78 | std::cout << "e1[1]: " << json::select_number(object, {"/key_e_array/1"}).value() << std::endl; 79 | // e[1]: 2 80 | 81 | std::cout << "missingkey: " << json::select(object, {"/missingkey"}); 82 | // missingkey: missing key: (error (errormsg 42:Member does not exist with key: missingkey) (errorast ... ) 83 | ``` 84 | 85 | Note the `select_` functions which mirror those above and perform a select and then a `get_` call. 86 | 87 | ## Json Patch 88 | The `patch(const Node&, const Location&)` function provides support for [JSON Patch](https://www.rfc-editor.org/rfc/rfc6902). 89 | The implementation is fully compliant with RFC 6902 and we thank the maintainers of the 90 | [JSON Patch Test Suite](https://github.com/json-patch/json-patch-tests), which we have integrated with our CI and pass in full. 91 | 92 | ```cpp 93 | auto reader = json::reader(); 94 | auto doc = 95 | reader.synthetic(R"json({"foo": {"bar": {"baz": [{"boo": "net"}]}}})json") 96 | .read() 97 | .ast->front(); 98 | auto patch = reader 99 | .synthetic(R"json([ 100 | {"op": "copy", "from": "/foo", "path": "/bak"}, 101 | {"op": "replace", "path": "/foo/bar/baz/0/boo", "value": "qux"} 102 | ])json") 103 | .read() 104 | .ast->front(); 105 | 106 | auto patched = json::patch(doc, patch); 107 | std::cout << "patched: " << json::to_string(patched) << std::endl; 108 | // patched: {"foo":{"bar":{"baz":[{"boo":"qux"}]}},"bak":{"bar":{"baz":[{"boo":"net"}]}}} 109 | -------------------------------------------------------------------------------- /include/trieste/regex.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "logging.h" 6 | #include "ast.h" 7 | 8 | #include 9 | 10 | namespace trieste 11 | { 12 | class REMatch 13 | { 14 | friend class REIterator; 15 | 16 | private: 17 | std::vector match; 18 | std::vector locations; 19 | size_t matches = 0; 20 | 21 | bool match_regexp(const RE2& regex, re2::StringPiece& sp, Source& source) 22 | { 23 | matches = regex.NumberOfCapturingGroups() + 1; 24 | 25 | if (match.size() < matches) 26 | match.resize(matches); 27 | 28 | if (locations.size() < matches) 29 | locations.resize(matches); 30 | 31 | auto matched = regex.Match( 32 | sp, 33 | 0, 34 | sp.length(), 35 | re2::RE2::ANCHOR_START, 36 | match.data(), 37 | static_cast(matches)); 38 | 39 | if (!matched) 40 | { 41 | return false; 42 | } 43 | 44 | for (size_t i = 0; i < matches; i++) 45 | { 46 | locations[i] = { 47 | source, 48 | static_cast(match.at(i).data() - source->view().data()), 49 | match.at(i).size()}; 50 | } 51 | 52 | return true; 53 | } 54 | 55 | public: 56 | REMatch(size_t max_capture = 0) 57 | { 58 | match.resize(max_capture + 1); 59 | locations.resize(max_capture + 1); 60 | } 61 | 62 | const Location& at(size_t index = 0) const 63 | { 64 | if (index >= matches) 65 | return locations.at(0); 66 | 67 | return locations.at(index); 68 | } 69 | 70 | template 71 | T parse(size_t index = 0) const 72 | { 73 | if (index >= matches) 74 | return T(); 75 | 76 | T t; 77 | RE2::Arg arg(&t); 78 | auto& m = match.at(index); 79 | arg.Parse(m.data(), m.size()); 80 | return t; 81 | } 82 | }; 83 | 84 | class REIterator 85 | { 86 | private: 87 | Source source; 88 | re2::StringPiece sp; 89 | 90 | public: 91 | REIterator(Source source_) : source(source_), sp(source_->view()) {} 92 | 93 | bool empty() 94 | { 95 | return sp.empty(); 96 | } 97 | 98 | bool consume(const RE2& regex, REMatch& m) 99 | { 100 | if (!m.match_regexp(regex, sp, source)) 101 | return false; 102 | 103 | sp.remove_prefix(m.at(0).len); 104 | return true; 105 | } 106 | 107 | Location current() const 108 | { 109 | return { 110 | source, static_cast(sp.data() - source->view().data()), 1}; 111 | } 112 | 113 | void skip(size_t count = 1) 114 | { 115 | sp.remove_prefix(count); 116 | } 117 | }; 118 | 119 | inline Node build_ast(Source source, size_t pos) 120 | { 121 | auto hd = RE2("[[:space:]]*\\([[:space:]]*([^[:space:]\\(\\)]*)"); 122 | auto st = RE2("[[:space:]]*\\{[^\\}]*\\}"); 123 | auto id = RE2("[[:space:]]*([[:digit:]]+):"); 124 | auto tl = RE2("[[:space:]]*\\)"); 125 | 126 | REMatch re_match(2); 127 | REIterator re_iterator(source); 128 | re_iterator.skip(pos); 129 | 130 | Node top; 131 | Node ast; 132 | 133 | while (!re_iterator.empty()) 134 | { 135 | // Find the type of the node. If we didn't find a node, it's an error. 136 | if (!re_iterator.consume(hd, re_match)) 137 | { 138 | auto loc = re_iterator.current(); 139 | logging::Error() << loc.origin_linecol() << ": expected node" 140 | << std::endl 141 | << loc.str() << std::endl; 142 | return {}; 143 | } 144 | 145 | // If we don't have a valid node type, it's an error. 146 | auto type_loc = re_match.at(1); 147 | auto type = detail::find_token(type_loc.view()); 148 | 149 | if (type == Invalid) 150 | { 151 | logging::Error() << type_loc.origin_linecol() << ": unknown type" 152 | << std::endl 153 | << type_loc.str() << std::endl; 154 | return {}; 155 | } 156 | 157 | // Find the source location of the node as a netstring. 158 | auto ident_loc = type_loc; 159 | 160 | if (re_iterator.consume(id, re_match)) 161 | { 162 | auto len = re_match.parse(1); 163 | ident_loc = 164 | Location(source, re_match.at().pos + re_match.at().len, len); 165 | re_iterator.skip(len); 166 | } 167 | 168 | // Push the node into the AST. 169 | auto node = NodeDef::create(type, ident_loc); 170 | 171 | if (ast) 172 | ast->push_back(node); 173 | else 174 | top = node; 175 | 176 | ast = node; 177 | 178 | // Skip the symbol table. 179 | re_iterator.consume(st, re_match); 180 | 181 | // `)` ends the node. Otherwise, we'll add children to this node. 182 | while (re_iterator.consume(tl, re_match)) 183 | { 184 | auto parent = ast->parent(); 185 | 186 | if (!parent) 187 | return ast; 188 | 189 | ast = parent; 190 | } 191 | } 192 | 193 | // We never finished the AST, so it's an error. 194 | auto loc = re_iterator.current(); 195 | logging::Error() << loc.origin_linecol() << ": incomplete AST" << std::endl 196 | << loc.str() << std::endl; 197 | return {}; 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /parsers/include/trieste/yaml.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "trieste/trieste.h" 6 | 7 | namespace trieste 8 | { 9 | namespace yaml 10 | { 11 | using namespace wf::ops; 12 | 13 | inline const auto Stream = 14 | TokenDef("yaml-stream", flag::symtab | flag::defbeforeuse); 15 | inline const auto Directives = TokenDef("yaml-directives"); 16 | inline const auto UnknownDirective = 17 | TokenDef("yaml-unknowndirective", flag::print); 18 | inline const auto VersionDirective = 19 | TokenDef("yaml-versiondirective", flag::print); 20 | inline const auto TagDirective = 21 | TokenDef("yaml-tagdirective", flag::lookup | flag::shadowing); 22 | inline const auto TagPrefix = TokenDef("yaml-tagprefix", flag::print); 23 | inline const auto TagHandle = TokenDef("yaml-taghandle", flag::print); 24 | inline const auto Documents = TokenDef("yaml-documents"); 25 | inline const auto Document = 26 | TokenDef("yaml-document", flag::symtab | flag::defbeforeuse); 27 | inline const auto DocumentStart = TokenDef("yaml-docstart", flag::print); 28 | inline const auto DocumentEnd = TokenDef("yaml-docend", flag::print); 29 | inline const auto Sequence = TokenDef("yaml-sequence"); 30 | inline const auto Mapping = TokenDef("yaml-mapping"); 31 | inline const auto MappingItem = TokenDef("yaml-mappingitem"); 32 | inline const auto Key = TokenDef("yaml-key"); 33 | inline const auto Value = TokenDef("yaml-value", flag::print); 34 | inline const auto Int = TokenDef("yaml-int", flag::print); 35 | inline const auto Hex = TokenDef("yaml-hex", flag::print); 36 | inline const auto Float = TokenDef("yaml-float", flag::print); 37 | inline const auto Null = TokenDef("yaml-null"); 38 | inline const auto True = TokenDef("yaml-true"); 39 | inline const auto False = TokenDef("yaml-false"); 40 | inline const auto SingleQuote = TokenDef("yaml-singlequote"); 41 | inline const auto DoubleQuote = TokenDef("yaml-doublequote"); 42 | inline const auto BlockLine = TokenDef("yaml-blockline", flag::print); 43 | inline const auto EmptyLine = TokenDef("yaml-emptyline"); 44 | inline const auto Literal = TokenDef("yaml-literal"); 45 | inline const auto Folded = TokenDef("yaml-folded"); 46 | inline const auto AbsoluteIndent = 47 | TokenDef("yaml-absoluteindent", flag::print); 48 | inline const auto ChompIndicator = 49 | TokenDef("yaml-chomp-indicator", flag::print); 50 | inline const auto Lines = TokenDef("yaml-lines"); 51 | inline const auto Plain = TokenDef("yaml-plain"); 52 | inline const auto AnchorValue = TokenDef("yaml-anchorvalue", flag::lookup); 53 | inline const auto Anchor = TokenDef("yaml-anchor", flag::print); 54 | inline const auto TagValue = TokenDef("yaml-tagvalue"); 55 | inline const auto TagName = TokenDef("yaml-tagname", flag::print); 56 | inline const auto Alias = TokenDef("yaml-alias", flag::print); 57 | inline const auto Empty = TokenDef("yaml-empty"); 58 | inline const auto FlowMapping = TokenDef("yaml-flowmapping"); 59 | inline const auto FlowMappingItem = TokenDef("yaml-flowmappingitem"); 60 | inline const auto FlowSequence = TokenDef("yaml-flowsequence"); 61 | 62 | inline const auto wf_tokens = Mapping | Sequence | Value | Int | Float | 63 | True | False | Hex | Null | SingleQuote | DoubleQuote | Plain | 64 | AnchorValue | Alias | TagValue | Literal | Folded | Empty | FlowMapping | 65 | FlowSequence; 66 | 67 | inline const auto wf_flow_tokens = 68 | wf_tokens - (Literal | Folded | Mapping | Sequence); 69 | 70 | // clang-format off 71 | inline const auto wf = 72 | (Top <<= Stream) 73 | | (Stream <<= Directives * Documents) 74 | | (Documents <<= Document++) 75 | | (Document <<= Directives * DocumentStart * (Value >>= wf_tokens) * DocumentEnd) 76 | | (Directives <<= (TagDirective | VersionDirective | UnknownDirective)++) 77 | | (TagDirective <<= TagPrefix * TagHandle)[TagPrefix] 78 | | (Mapping <<= MappingItem++[1]) 79 | | (MappingItem <<= (Key >>= wf_tokens) * (Value >>= wf_tokens)) 80 | | (FlowMapping <<= FlowMappingItem++) 81 | | (FlowMappingItem <<= (Key >>= wf_flow_tokens) * (Value >>= wf_flow_tokens)) 82 | | (AnchorValue <<= Anchor * (Value >>= wf_tokens))[Anchor] 83 | | (TagValue <<= TagPrefix * TagName * (Value >>= wf_tokens)) 84 | | (Sequence <<= wf_tokens++[1]) 85 | | (FlowSequence <<= wf_flow_tokens++) 86 | | (SingleQuote <<= (BlockLine|EmptyLine)++[1]) 87 | | (DoubleQuote <<= (BlockLine|EmptyLine)++[1]) 88 | | (Literal <<= AbsoluteIndent * ChompIndicator * Lines) 89 | | (Folded <<= AbsoluteIndent * ChompIndicator * Lines) 90 | | (Lines <<= (BlockLine|EmptyLine)++) 91 | | (Plain <<= (BlockLine|EmptyLine)++[1]) 92 | ; 93 | // clang-format on 94 | 95 | Reader reader(); 96 | Writer event_writer( 97 | const std::filesystem::path& path, const std::string& newline = "\n"); 98 | Writer writer( 99 | const std::filesystem::path& path, 100 | const std::string& newline = "\n", 101 | std::size_t indent = 2, 102 | bool canonical = false); 103 | Rewriter to_json(); 104 | std::ostream& block_to_string( 105 | std::ostream& os, const Node& node, bool raw_quotes = false); 106 | std::ostream& quote_to_string( 107 | std::ostream& os, const Node& quote, bool raw_quotes = false); 108 | std::string to_string( 109 | Node yaml, 110 | const std::string& newline = "\n", 111 | std::size_t indent = 2, 112 | bool canonical = false); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /parsers/json/writer.cc: -------------------------------------------------------------------------------- 1 | #include "internal.h" 2 | #include "trieste/wf.h" 3 | 4 | namespace 5 | { 6 | using namespace trieste; 7 | using namespace trieste::json; 8 | 9 | struct WriteSettings 10 | { 11 | bool prettyprint; 12 | bool sort_keys; 13 | const std::string& indent; 14 | }; 15 | 16 | void write_value( 17 | std::ostream& os, 18 | const WriteSettings& settings, 19 | const std::string& indent, 20 | const Node& value); 21 | 22 | void write_object( 23 | std::ostream& os, 24 | const WriteSettings& settings, 25 | const std::string& indent, 26 | const Node& object) 27 | { 28 | if (object->empty()) 29 | { 30 | os << "{}"; 31 | return; 32 | } 33 | 34 | std::string new_indent = indent + settings.indent; 35 | os << "{"; 36 | if (settings.prettyprint) 37 | { 38 | os << std::endl; 39 | } 40 | 41 | std::vector members; 42 | if (settings.sort_keys) 43 | { 44 | std::vector keys; 45 | std::transform( 46 | object->begin(), 47 | object->end(), 48 | std::back_inserter(keys), 49 | [](Node member) { return (member / Key)->location(); }); 50 | std::sort(keys.begin(), keys.end()); 51 | for (const Location& key : keys) 52 | { 53 | Nodes defs = object->lookdown(key); 54 | members.insert(members.end(), defs.begin(), defs.end()); 55 | } 56 | } 57 | else 58 | { 59 | members.insert(members.end(), object->begin(), object->end()); 60 | } 61 | 62 | for (std::size_t i = 0; i < members.size(); ++i) 63 | { 64 | Node member = members[i]; 65 | assert(member == Member); 66 | 67 | if (settings.prettyprint) 68 | { 69 | os << new_indent; 70 | } 71 | 72 | write_value(os, settings, new_indent, member / Key); 73 | os << ":"; 74 | 75 | if (settings.prettyprint) 76 | { 77 | os << " "; 78 | } 79 | 80 | write_value(os, settings, new_indent, member / Value); 81 | 82 | if (i < object->size() - 1) 83 | { 84 | os << ","; 85 | } 86 | 87 | if (settings.prettyprint) 88 | { 89 | os << std::endl; 90 | } 91 | } 92 | 93 | if (settings.prettyprint) 94 | { 95 | os << indent; 96 | } 97 | 98 | os << "}"; 99 | } 100 | 101 | void write_array( 102 | std::ostream& os, 103 | const WriteSettings& settings, 104 | const std::string& indent, 105 | const Node& array) 106 | { 107 | if (array->empty()) 108 | { 109 | os << "[]"; 110 | return; 111 | } 112 | 113 | std::string new_indent = indent + settings.indent; 114 | os << "["; 115 | if (settings.prettyprint) 116 | { 117 | os << std::endl; 118 | } 119 | for (std::size_t i = 0; i < array->size(); ++i) 120 | { 121 | Node element = array->at(i); 122 | if (settings.prettyprint) 123 | { 124 | os << new_indent; 125 | } 126 | write_value(os, settings, new_indent, element); 127 | 128 | if (i < array->size() - 1) 129 | { 130 | os << ","; 131 | } 132 | 133 | if (settings.prettyprint) 134 | { 135 | os << std::endl; 136 | } 137 | } 138 | if (settings.prettyprint) 139 | { 140 | os << indent; 141 | } 142 | os << "]"; 143 | } 144 | 145 | void write_value( 146 | std::ostream& os, 147 | const WriteSettings& settings, 148 | const std::string& indent, 149 | const Node& value) 150 | { 151 | if (value->in({Number, String, True, False, Null})) 152 | { 153 | os << value->location().view(); 154 | } 155 | else if (value == Key) 156 | { 157 | os << '"' << value->location().view() << '"'; 158 | } 159 | else if (value == Object) 160 | { 161 | write_object(os, settings, indent, value); 162 | } 163 | else if (value == Array) 164 | { 165 | write_array(os, settings, indent, value); 166 | } 167 | else if (value == Top) 168 | { 169 | write_value(os, settings, indent, value->front()); 170 | } 171 | else 172 | { 173 | std::ostringstream message; 174 | message << "Unexpected node type: " << value->type().str(); 175 | throw std::runtime_error(message.str()); 176 | } 177 | } 178 | 179 | const auto ValueToken = T(Object, Array, String, Number, True, False, Null); 180 | 181 | // clang-format off 182 | inline const auto wf_to_file = 183 | json::wf 184 | | (Top <<= File) 185 | | (File <<= Path * Contents) 186 | | (Contents <<= wf_value_tokens++[1]) 187 | ; 188 | // clang-format on 189 | 190 | PassDef to_file(const std::filesystem::path& path) 191 | { 192 | return { 193 | "to_file", 194 | wf_to_file, 195 | dir::bottomup | dir::once, 196 | { 197 | In(Top) * ValueToken++[Value] >> 198 | [path](Match& _) { 199 | return File << (Path ^ path.string()) << (Contents << _[Value]); 200 | }, 201 | }}; 202 | } 203 | } 204 | 205 | namespace trieste 206 | { 207 | namespace json 208 | { 209 | Writer writer( 210 | const std::filesystem::path& path, 211 | bool prettyprint, 212 | bool sort_keys, 213 | const std::string& indent) 214 | { 215 | return Writer( 216 | "json", 217 | {to_file(path)}, 218 | json::wf, 219 | [prettyprint, sort_keys, indent](std::ostream& os, Node contents) { 220 | for (const Node& value : *contents) 221 | { 222 | write_value(os, {prettyprint, sort_keys, indent}, "", value); 223 | os << std::endl; 224 | } 225 | return true; 226 | }); 227 | } 228 | 229 | std::string to_string( 230 | Node json, bool prettyprint, bool sort_keys, const std::string& indent) 231 | { 232 | WFContext context(json::wf); 233 | std::ostringstream os; 234 | write_value(os, {prettyprint, sort_keys, indent}, "", json); 235 | return os.str(); 236 | } 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /samples/shrubbery/reader.cc: -------------------------------------------------------------------------------- 1 | #include "shrubbery.h" 2 | #include "wf.h" 3 | 4 | namespace shrubbery 5 | { 6 | auto err(const NodeRange& r, const std::string& msg) 7 | { 8 | return Error << (ErrorMsg ^ msg) << (ErrorAst << r); 9 | } 10 | 11 | auto err(Node node, const std::string& msg) 12 | { 13 | return Error << (ErrorMsg ^ msg) << (ErrorAst << node); 14 | } 15 | 16 | PassDef check_parsing() 17 | { 18 | return { 19 | "check parsing", 20 | wf_check_parser, 21 | dir::bottomup | dir::once, 22 | { 23 | // An empty block followed by alternatives is ignored 24 | (T(Group) << (!T(Block))++[Terms] * (T(Block) << End)) * (T(Group) << T(Alt))[Alt] >> 25 | [](Match& _) { return Seq << (Group << _[Terms]) 26 | << _(Alt); 27 | }, 28 | 29 | (T(Block) << (T(Group)[Group] << T(Alt))) >> 30 | [](Match& _) { return Seq << *_[Group]; }, 31 | 32 | // An empty group caused by a semicolon is ignored 33 | (In(Semi) * ((T(Group) << End))) >> 34 | [](Match&) { return Seq ^ ""; }, 35 | 36 | // Commas must separate (non-empty) groups 37 | (T(Comma) << End)[Comma] >> 38 | [](Match& _) { return err(_[Comma], "Comma does not separate groups"); }, 39 | 40 | (In(Comma) * (T(Group) << End)[Group]) >> 41 | [](Match& _) { return err(_[Group], "Comma does not separate groups"); }, 42 | 43 | // A comma can only appear inside a paren, brace or bracket 44 | ((--In(Paren, Brace, Bracket)) * T(Comma)[Comma]) >> 45 | [](Match& _) { return err(_[Comma], "Commas can only separate groups in parentheses/braces/brackets"); }, 46 | 47 | // Opener-closer pairs must have comma-separated groups 48 | (In(Paren, Brace, Bracket) * Any * Any)[Group] >> 49 | [](Match& _) { return err(_[Group], "Groups in parentheses/braces/brackets must be comma separated"); }, 50 | 51 | // Opener-closer pairs cannot have semicolon-separated groups 52 | (In(Paren, Brace, Bracket, Comma) * T(Semi))[Semi] >> 53 | [](Match& _) { return err(_[Semi], "Semicolons cannot separate groups in parentheses/brackets/braces. Use commas."); }, 54 | 55 | // Blocks cannot be empty, except immediately under opener-closer pairs 56 | // and as the only term in a top-level group 57 | (--(In(Paren, Brace, Bracket, Comma, File))) * ((T(Group) << ((!T(Block))++ * (T(Block)[Block] << End)))) >> 58 | [](Match& _) { return err(_[Block], "Blocks may not be empty"); }, 59 | 60 | In(File) * (T(Group) << (((!T(Block)) * (!T(Block))++ * (T(Block)[Block] << End) * End))) >> 61 | [](Match& _) { return err(_[Block], "Blocks may not be empty"); }, 62 | 63 | // Alternatives cannot be empty 64 | T(Alt)[Alt] << End >> 65 | [](Match& _) { return err(_[Alt], "Alternatives may not be empty"); }, 66 | 67 | } 68 | }; 69 | } 70 | 71 | // Alternatives belong to the preceeding Group and keep their contents in 72 | // blocks 73 | PassDef merge_alternatives() 74 | { 75 | return { 76 | "merge alternatives", 77 | wf_alternatives, 78 | dir::bottomup | dir::once, 79 | { 80 | // Alternatives keep their contents in a block 81 | (T(Alt)[Alt] << !T(Block)) >> 82 | [](Match& _) { return Alt << (Block << *_[Alt]); }, 83 | 84 | // Move a trailing alternatives into the preceding group but do not 85 | // cross a comma or semi-colon. Merge the alternatives into one 86 | (--In(Comma, Semi) * T(Group)[Group] * ((T(Group) << T(Alt)[Alt]) * (T(Group) << T(Alt))++[Terms])) >> 87 | [](Match& _) { 88 | Node alt = _(Alt); 89 | for (auto& node : _[Terms]) { 90 | alt << node->front()->front(); // Group->Alt->Block 91 | } 92 | return _(Group) << alt; 93 | }, 94 | } 95 | }; 96 | } 97 | 98 | // Remove nodes for commas and semicolons and replace them by their children. 99 | PassDef drop_separators() 100 | { 101 | return { 102 | "drop separators", 103 | wf_no_semis_or_commas, 104 | dir::bottomup | dir::once, 105 | { 106 | (T(Comma)[Comma]) >> 107 | [](Match& _) { return Seq << *_[Comma]; }, 108 | 109 | T(Semi)[Semi] >> 110 | [](Match& _) { return Seq << *_[Semi]; }, 111 | } 112 | }; 113 | } 114 | 115 | // Check that groups starting with alternatives only appear immediately under 116 | // braces and brackets 117 | PassDef check_alternatives() 118 | { 119 | return { 120 | "check alternatives", 121 | wf_no_semis_or_commas, 122 | dir::bottomup | dir::once, 123 | { 124 | (--In(Brace, Bracket)) * T(Group) << T(Alt)[Alt] >> 125 | [](Match& _) { return err(_[Alt], "Alternative cannot appear first in a group"); }, 126 | } 127 | }; 128 | } 129 | 130 | // Structure groups so that they contain their atoms in a Contents node, 131 | // followed by a 132 | PassDef group_structure() 133 | { 134 | return { 135 | "group structure", 136 | wf, 137 | dir::bottomup | dir::once, 138 | { 139 | In(Group) * Start * (!T(Block, Alt))++[Atom] * ~T(Block)[Block] * ~T(Alt)[Alt] * End >> 140 | [](Match& _) { 141 | return Seq << (Terms << _[Atom]) 142 | << (_(Block)? _(Block): None) 143 | << (_(Alt)? _(Alt): None); 144 | }, 145 | 146 | // Groups cannot be empty 147 | T(Group)[Group] << End >> 148 | [](Match& _) { return err(_[Group], "Groups cannot be empty"); }, 149 | 150 | // Overly permissive wf rules from before allows groups to have 151 | // impossible structure. To pass fuzz testing, we add this rule 152 | T(Group)[Group] << !T(Terms) >> 153 | [](Match& _) { return err(_[Group], "Should never happen"); }, 154 | } 155 | }; 156 | } 157 | 158 | 159 | Reader reader() 160 | { 161 | return { 162 | "shrubbery", 163 | { 164 | check_parsing(), 165 | merge_alternatives(), 166 | drop_separators(), 167 | check_alternatives(), 168 | group_structure(), 169 | }, 170 | parser(), 171 | }; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /notes/regions.md: -------------------------------------------------------------------------------- 1 | # Regions again 2 | 3 | *TODO* 4 | - what if I want to return `(A & in, B & out)`? 5 | - what's the capability of the returned tuple? 6 | - what if a lambda wants to capture an `out` as a free variable? 7 | - what's the capability of the lambda? 8 | - need a `local` or `stack` capability? 9 | - seems painful to write functions that accept `in | local` 10 | 11 | ## Four Capabilities 12 | 13 | `in` 14 | can put things in its fields 15 | can be put in a field 16 | `out` 17 | can't put things in its fields 18 | can't be put in a field 19 | `const` 20 | can't put things in its fields 21 | can be put in a field 22 | 23 | enter `region[T] & in` 24 | get `T & in` in the lambda 25 | ```ts 26 | enter (x: region[T] & in) 27 | { 28 | x: T & in => ... 29 | } 30 | ``` 31 | free variables that are `in` become `out` 32 | region rc++ 33 | 34 | enter `region[T] & out` 35 | get `T & out` in the lambda 36 | ```ts 37 | enter (x: region[T] & out) 38 | { 39 | x: T & out => ... 40 | } 41 | ``` 42 | free variables that are `in` become `out` 43 | region rc++ 44 | *safe even if the target region is `in` in some context* 45 | *doesn't allow any `in` region to coexist with a non-ancestor `out` region* 46 | 47 | lateral-enter `region[T] & out` 48 | get `T & in` in the lambda 49 | ```ts 50 | enter (x: region[T] & out) 51 | { 52 | x: T & in => ... // exciting 53 | } 54 | ``` 55 | fails if region rc != 1 56 | *maybe not rc=1? because of cown aliasing* 57 | *there's already an `in` or `out` reference, so we would coexist* 58 | *could be separate from the alias rc* 59 | free variables that are `in` become `out` 60 | region rc++ 61 | 62 | explore `region[T] & (in | out)` 63 | get `T & out` in the lambda 64 | ```ts 65 | explore (x: region[T] & (in | out)) 66 | { 67 | x: T & out => ... 68 | } 69 | ``` 70 | fails if region == open region 71 | *would cause `in` and `out` to coexist* 72 | free variables that are `in` *stay* `in` 73 | region rc++ 74 | 75 | exit 76 | region rc-- 77 | 78 | ```ts 79 | class region[T] 80 | { 81 | var val: in.T 82 | var region_rc: usize = 0 83 | 84 | exit(self) = (ref self.region_rc)-- 85 | 86 | explore(self): out.T | throw AlreadyOpen 87 | { 88 | if ($regions.top == self) {throw AlreadyOpen} 89 | (ref self.region_rc)++ 90 | self.val 91 | } 92 | 93 | freeze(self: in): const.T | throw NotUnique 94 | { 95 | 96 | } 97 | 98 | move(self): (region[T] & in) | throw NotUnique 99 | } 100 | ``` 101 | 102 | ## Viewpoint Adaption 103 | 104 | It's ok to use `out` in a field position. It means the same thing as `in` due to viewpoint adaptation: `in.(T & out)` = `T & in`, etc. 105 | 106 | Field access on `C & k` gives `k.(ref[C.f] & in)`. This means that references to fields of a `lin` object are `in`, not `lin`. Linearity is for the individual object, not the transitive closure. 107 | 108 | ```ts 109 | k ∈ capability := lin | in | out | const 110 | 111 | // Note that lin.k and in.k are the same. 112 | lin.k = ∅ if k = lin 113 | in if k = out 114 | k otherwise 115 | in.k = ∅ if k = lin 116 | in if k = out 117 | k otherwise 118 | out.k = out if k ∈ {lin, in} 119 | k otherwise 120 | const.k = const 121 | 122 | k.(T1 & T2) = k.T1 & k.T2 123 | k.(T1 | T2) = k.T1 | k.T2 124 | k.(T1, T2) = k.T1, k.T2 125 | k.C = C 126 | (T1 & T2).T = T1.T & T2.T 127 | (T1 | T2).T = T1.T | T2.T 128 | (T1, T2).T = ∅ 129 | C.T = ∅ 130 | 131 | // K, TP, C, I, A, V, tuple, |, &, list, (), true, false 132 | T*.{C, I, tuple, (), true, false} = rhs 133 | T*.(T1 | T2) = T*.T1 | T*.T2 134 | T*.(T1 & T2) = T*.T1 | T*.T2 135 | T*.(T1...) = (T*.T1)... 136 | T*.A = T*.(def A) 137 | 138 | // rhs = K, TP 139 | 140 | ``` 141 | 142 | ## Ref[T] 143 | 144 | ```ts 145 | 146 | class Ref[T] 147 | { 148 | var val: T 149 | 150 | // we lose any disjunction that is `lin` when loading 151 | load(self): self.T = self.val 152 | 153 | // T is unadapted, so if T is `lin`, we accept and return it as `lin` 154 | // TODO: has to be adapted, eg T: String & out should yield String & in 155 | store(self: in, val: T): T = (self.val = val) 156 | 157 | // fully specified 158 | load[$self: Ref[T] & (in | out | const)](self: $self): $self.T = self.val 159 | 160 | store[$self: Ref[T] & (in | out | count) & in](self: $self, val: $self.T): 161 | $self.T = (self.val = val) 162 | } 163 | ``` 164 | 165 | ## No Capability 166 | 167 | If a disjunction in a type has no capability after being fully worked out, we want it to be an efficient `readonly` type. Adding `& (in | out | const)` to it works (note that we leave out `lin`), but results in expensive dynamic reference counting. 168 | 169 | If, at the end of compilation, all types are replaced with type parameters that have the type as an upper bound, this gives the most efficient way to handle both structural types and `readonly` capabilities. 170 | 171 | ## Example 172 | 173 | ```ts 174 | // no need for `stack`? escape analysis for stack allocation 175 | when(a0: cown[A], b0: cown[B]) 176 | { 177 | a1: region[A] & in, b1: region[B] & in => 178 | // we can enter an `in` region, but it's only sendable if it has rc=1. 179 | // if a1 and b1 are aliases, then we can't send them. 180 | enter a1 181 | { 182 | a2: A & in => 183 | 184 | // on enter, all free variables are `out` 185 | b1: region[B] & out 186 | enter b1 187 | { 188 | b1f: B & out => 189 | ... 190 | } 191 | 192 | enter(a2.f: region[AF] & in) 193 | { 194 | // af.rc = 2, one for a2.f, one for enter 195 | af: AF & in => 196 | // a2 is also `out` now 197 | a2: A & out 198 | // if we read ourselves, we would get an `out` region 199 | a2.f: region[T2] & out 200 | } 201 | 202 | // af.rc = 1 203 | let r0: region[AF] & in = a2.f 204 | // af.rc = 2 205 | enter(r0: region[AF] & in) 206 | { 207 | // af.rc = 3 208 | af: AF & in => 209 | enter(b1: region[B] & out) 210 | { 211 | // b.rc = 1, no rc-inc on an out region 212 | // if b is an alias of a, does anything go wrong? 213 | b2: B & out => 214 | enter(b2.f: region[BF] & out) 215 | { 216 | // bf.rc = 1, no rc-inc on an out region 217 | bf: BF & out => 218 | ... 219 | } 220 | } 221 | } 222 | 223 | // could we `out` read an `in` region safely? 224 | // yes, if the `in` region is `out` - which it is, because all free 225 | // variables are `out` 226 | 227 | // a region is only sendable if it has rc=1 228 | // will get collected when rc=0 229 | } 230 | } 231 | ``` 232 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14.0) 2 | project(trieste VERSION 1.0.0 LANGUAGES CXX) 3 | 4 | # ############################################# 5 | # Options 6 | option(TRIESTE_ENABLE_TESTING "Specifies whether to enable Trieste's tests" OFF) 7 | option(TRIESTE_BUILD_SAMPLES "Specifies whether to build the samples" ON) 8 | option(TRIESTE_BUILD_PARSERS "Specifies whether to build the parsers" ON) 9 | option(TRIESTE_BUILD_PARSER_TESTS "Specifies whether to build the parser tests" OFF) 10 | option(TRIESTE_BUILD_PARSER_TOOLS "Specifies whether to build parser tools" OFF) 11 | option(TRIESTE_USE_CXX17 "Specifies whether to target the C++17 standard" OFF) 12 | option(TRIESTE_CLEAN_INSTALL "Specifies whether to delete all files (recursively) from the install prefix before install" OFF) 13 | option(TRIESTE_USE_SNMALLOC "Specifies that new/delete should be overridden with snmalloc" ON) 14 | 15 | set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) 16 | 17 | # ############################################# 18 | # Dependencies 19 | 20 | include(FetchContent) 21 | 22 | if(TRIESTE_USE_CXX17) 23 | set(CMAKE_CXX_STANDARD 17) 24 | else() 25 | set(CMAKE_CXX_STANDARD 20) 26 | endif() 27 | 28 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 29 | 30 | set(SNMALLOC_BUILD_TESTING OFF CACHE INTERNAL "Turn off snmalloc tests") 31 | set(SNMALLOC_USE_CXX17 ${TRIESTE_USE_CXX17}) 32 | 33 | set(RE2_BUILD_TESTING OFF CACHE INTERNAL "Turn off RE2 tests") 34 | 35 | set(TRIESTE_SANITIZE "" CACHE STRING "Argument to pass to sanitize (disabled by default)") 36 | 37 | # Used to provide 38 | # FetchContent_MakeAvailable_ExcludeFromAll 39 | FetchContent_Declare( 40 | cmake_utils 41 | GIT_REPOSITORY https://github.com/mjp41/cmake_utils 42 | GIT_TAG 2bf98b5773ea7282197c823e205547d8c2e323c0 43 | GIT_SHALLOW FALSE 44 | ) 45 | 46 | FetchContent_MakeAvailable(cmake_utils) 47 | 48 | 49 | FetchContent_Declare( 50 | snmalloc 51 | GIT_REPOSITORY https://github.com/microsoft/snmalloc 52 | GIT_TAG b8e28be14b3fd98e27c2fe87c0296570f6d3990e 53 | # per https://gitlab.kitware.com/cmake/cmake/-/issues/17770, do not use GIT_SHALLOW here 54 | # or it will silently break new builds days later, once the commit hash above no longer 55 | # matches a named branch, like main. 56 | # Tracking a named branch with GIT_SHALLOW enabled is ok, however. 57 | # GIT_SHALLOW TRUE 58 | ) 59 | 60 | FetchContent_MakeAvailable_ExcludeFromAll(snmalloc) 61 | 62 | FetchContent_Declare( 63 | re2 64 | GIT_REPOSITORY https://github.com/google/re2 65 | GIT_TAG 2022-12-01 66 | GIT_SHALLOW TRUE 67 | ) 68 | 69 | FetchContent_MakeAvailable_ExcludeFromAll(re2) 70 | 71 | FetchContent_Declare( 72 | cli11 73 | GIT_REPOSITORY https://github.com/CLIUtils/CLI11 74 | GIT_TAG 4160d259d961cd393fd8d67590a8c7d210207348 75 | GIT_SHALLOW TRUE 76 | ) 77 | 78 | FetchContent_MakeAvailable_ExcludeFromAll(cli11) 79 | 80 | # ############################################# 81 | # Create target and set properties 82 | add_library(trieste INTERFACE) 83 | 84 | # Add an alias so that library can be used inside the build tree, e.g. when testing 85 | add_library(trieste::trieste ALIAS trieste) 86 | 87 | # Set target properties 88 | target_include_directories(trieste 89 | INTERFACE $ 90 | $ 91 | ) 92 | 93 | target_link_libraries(trieste 94 | INTERFACE 95 | re2::re2 96 | CLI11::CLI11 97 | snmalloc 98 | ) 99 | 100 | if(TRIESTE_USE_SNMALLOC) 101 | if(TRIESTE_SANITIZE) 102 | message(WARNING "Sanitizers are not supported with snmalloc, disabling snmalloc.") 103 | else() 104 | target_link_libraries(trieste INTERFACE snmalloc-new-override) 105 | endif() 106 | endif() 107 | 108 | if(TRIESTE_USE_CXX17) 109 | target_compile_definitions(trieste INTERFACE cxx_std_17 TRIESTE_USE_CXX17) 110 | else() 111 | target_compile_definitions(trieste INTERFACE cxx_std_20) 112 | endif() 113 | 114 | if (TRIESTE_SANITIZE) 115 | target_compile_options(trieste INTERFACE -g -fsanitize=${TRIESTE_SANITIZE} -fno-omit-frame-pointer) 116 | target_link_libraries(trieste INTERFACE -fsanitize=${TRIESTE_SANITIZE}) 117 | endif() 118 | 119 | function(enable_warnings target) 120 | if(MSVC) 121 | target_compile_options(${target} PRIVATE /W4 /WX /wd5030 /bigobj) 122 | else() 123 | target_compile_options(${target} PRIVATE 124 | -Wall -Wextra -Wpedantic -Werror -Wshadow) 125 | endif() 126 | endfunction() 127 | 128 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 129 | target_compile_options(trieste INTERFACE -Wmismatched-tags -fstandalone-debug) 130 | endif() 131 | 132 | if(TRIESTE_CLEAN_INSTALL) 133 | message("${CMAKE_INSTALL_PREFIX} will be recursively cleaned before install") 134 | # Clear all existing files and folders from the install directory 135 | install(CODE [[ 136 | file(REMOVE_RECURSE ${CMAKE_INSTALL_PREFIX}/.) 137 | ]]) 138 | endif() 139 | 140 | # ############################################# 141 | # Installation instructions 142 | set(CMAKE_INSTALL_PREFIX ${PROJECT_BINARY_DIR}/dist) 143 | 144 | set(INSTALL_DEPS snmalloc re2 CLI11) 145 | if (TRIESTE_USE_SNMALLOC) 146 | list(APPEND INSTALL_DEPS snmalloc-new-override) 147 | endif() 148 | 149 | install(TARGETS trieste ${INSTALL_DEPS} 150 | EXPORT ${PROJECT_NAME}_Targets 151 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 152 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 153 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 154 | ) 155 | 156 | # Create a ConfigVersion.cmake file 157 | include(CMakePackageConfigHelpers) 158 | write_basic_package_version_file( 159 | ${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake 160 | VERSION ${PROJECT_VERSION} 161 | COMPATIBILITY AnyNewerVersion 162 | ) 163 | 164 | configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/triesteConfig.cmake.in 165 | ${PROJECT_BINARY_DIR}/triesteConfig.cmake 166 | INSTALL_DESTINATION 167 | ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake 168 | ) 169 | 170 | install(EXPORT ${PROJECT_NAME}_Targets 171 | FILE ${PROJECT_NAME}Targets.cmake 172 | NAMESPACE ${PROJECT_NAME}:: 173 | DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) 174 | 175 | install(FILES ${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake 176 | ${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake 177 | DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) 178 | 179 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/trieste DESTINATION include) 180 | 181 | # ############################################# 182 | # # Exporting from the build tree 183 | export(EXPORT ${PROJECT_NAME}_Targets 184 | FILE ${CMAKE_CURRENT_BINARY_DIR}/triesteTargets.cmake 185 | NAMESPACE trieste::) 186 | 187 | # Register package in the User Package Registry 188 | export(PACKAGE trieste) 189 | 190 | # ############################################# 191 | # # Add core Trieste tests 192 | if(TRIESTE_ENABLE_TESTING) 193 | enable_testing() 194 | add_subdirectory(test) 195 | endif() 196 | 197 | # ############################################# 198 | # # Add samples 199 | if(TRIESTE_BUILD_SAMPLES) 200 | enable_testing() 201 | add_subdirectory(samples/infix) 202 | add_subdirectory(samples/shrubbery) 203 | endif() 204 | 205 | # ############################################# 206 | # # Add parsers 207 | if(TRIESTE_BUILD_PARSERS) 208 | add_subdirectory(parsers) 209 | endif() 210 | -------------------------------------------------------------------------------- /cmake/testsuite.cmake: -------------------------------------------------------------------------------- 1 | find_program(DIFF_TOOL NAMES 2 | diff) 3 | 4 | set(DIR_OF_TESTSUITE_CMAKE ${CMAKE_CURRENT_LIST_DIR}) 5 | 6 | if (DIFF_TOOL STREQUAL DIFF_TOOL-NOTFOUND) 7 | set(DIFF_TOOL "") 8 | endif() 9 | 10 | # How to use this testsuite system. 11 | # In a directory with the testsuite files, create a CMakeLists.txt file. 12 | # * Include this file. 13 | # * Call the testsuite function with the name of the tool. 14 | # E.g. something like this: 15 | # 16 | # include (${CMAKE_SOURCE_DIR}/cmake/testsuite.cmake) 17 | # testsuite(infix) 18 | # 19 | # The testsuite function will find all adjacent .cmake files they should contain the following: 20 | # * A variable TESTSUITE_REGEX, which specifies which files are to be considered a test. 21 | # This should be a regular expression that matches the test files. E.g. "test_type/.*\\.infix" 22 | # which matches all files with the .infix extension in the test_type directory. 23 | # * A variable TESTSUITE_EXE which is the executable to run for the tests. This can be a generator expression 24 | # to allow for different executables in different configurations. 25 | # * A macro toolinvoke which takes the arguments ARGS, local_build, testfile and outputdir. 26 | # This macro should set ARGS to the command line arguments for the tool. 27 | # * A function test_output_dir which takes the output directory out and the test file test. 28 | # This function should set out to the output directory for the test. This is relative to 29 | # 30 | # An example of this is in samples/infix/testsuite/infix.cmake. 31 | function(testsuite name) 32 | message(STATUS "Building test suite: ${name}") 33 | # Iterate each tool 34 | set(UPDATE_DUMPS_TARGETS) 35 | # Each test collection has its own cmake file for its configuration. 36 | file (GLOB test_collections CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cmake) 37 | file (GLOB_RECURSE all_files CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *) 38 | 39 | foreach(test_collection ${test_collections}) 40 | set (test_set) 41 | 42 | # Grab specific settings for this tool 43 | include(${CMAKE_CURRENT_SOURCE_DIR}/${test_collection}) 44 | 45 | set (tests ${all_files}) 46 | list(FILTER tests 47 | INCLUDE REGEX 48 | ${TESTSUITE_REGEX} 49 | ) 50 | 51 | foreach(test ${tests}) 52 | test_output_dir(output_dir_relative ${test}) 53 | get_filename_component(test_dir ${test} DIRECTORY) 54 | get_filename_component(test_file ${test} NAME) 55 | # Create command to create the output for this test. 56 | set (output_dir ${CMAKE_CURRENT_BINARY_DIR}/${output_dir_relative}) 57 | set (test_output_cmd 58 | ${CMAKE_COMMAND} 59 | -DTESTFILE=${test_file} 60 | -DTEST_EXE=${TESTSUITE_EXE} 61 | -DWORKING_DIR=${CMAKE_CURRENT_SOURCE_DIR}/${test_dir} 62 | -DCOLLECTION=${CMAKE_CURRENT_SOURCE_DIR}/${test_collection} 63 | -DOUTPUT_DIR=${output_dir} 64 | -P ${DIR_OF_TESTSUITE_CMAKE}/runcommand.cmake 65 | ) 66 | 67 | # Add test that rebuilds the compiler output 68 | add_test(NAME ${output_dir_relative} 69 | COMMAND ${test_output_cmd} 70 | ) 71 | 72 | # Add command that rebuilts the compiler output for updating golden files. 73 | add_custom_command(OUTPUT "${output_dir_relative}_fake" 74 | COMMAND ${test_output_cmd} 75 | ) 76 | set_source_files_properties("${output_dir_relative}_fake" PROPERTIES SYMBOLIC "true") 77 | list(APPEND test_set "${output_dir_relative}_fake") 78 | 79 | # Make json for debugging. 80 | toolinvoke(launch_json_args ${test_file} ${output_dir}) 81 | # Convert to a json format list. 82 | string(REPLACE "\"" "\\\"" launch_json_args "${launch_json_args}") 83 | string(REPLACE ";" "\", \"" launch_json_args "${launch_json_args}") 84 | list(APPEND LAUNCH_JSON 85 | " { 86 | \"name\": \"${output_dir_relative}\", 87 | \"type\": \"cppdbg\", 88 | \"request\": \"launch\", 89 | \"program\": \"${TESTSUITE_EXE}\", 90 | \"args\": [\"${launch_json_args}\"], 91 | \"stopAtEntry\": false, 92 | \"cwd\": \"${CMAKE_CURRENT_SOURCE_DIR}/${test_dir}\", 93 | },") 94 | 95 | # Add output comparison for each golden / output file 96 | set (golden_dir ${CMAKE_CURRENT_SOURCE_DIR}/${output_dir_relative} ) 97 | file (GLOB_RECURSE results CONFIGURE_DEPENDS RELATIVE ${golden_dir} ${golden_dir}/*) 98 | # Check if there are any files to compare for this test. 99 | list(LENGTH results res_length) 100 | if(res_length EQUAL 0) 101 | message(WARNING "Test does not have results directory: ${golden_dir}\nRun `update-dump` to generate golden files.") 102 | # Add to generate golden output target 103 | add_custom_command(OUTPUT ${output_dir_relative}_fake 104 | COMMAND 105 | ${CMAKE_COMMAND} 106 | -E make_directory 107 | ${golden_dir} 108 | APPEND 109 | ) 110 | add_custom_command(OUTPUT ${output_dir_relative}_fake 111 | COMMAND 112 | ${CMAKE_COMMAND} 113 | -E copy_if_different 114 | ${output_dir}/* 115 | ${golden_dir}/ 116 | APPEND 117 | ) 118 | else() 119 | foreach (result ${results}) 120 | # Check each file is correct as a test target 121 | add_test (NAME ${output_dir_relative}-${result} 122 | COMMAND 123 | ${CMAKE_COMMAND} 124 | -Doriginal_file=${golden_dir}/${result} 125 | -Dnew_file=${output_dir}/${result} 126 | -Ddiff_tool=${DIFF_TOOL} 127 | -P ${DIR_OF_TESTSUITE_CMAKE}/compare.cmake 128 | ) 129 | set_tests_properties(${output_dir_relative}-${result} PROPERTIES DEPENDS ${output_dir_relative}) 130 | 131 | # Override out of date files. 132 | add_custom_command(OUTPUT "${output_dir_relative}_fake" 133 | COMMAND 134 | ${CMAKE_COMMAND} 135 | -E copy_if_different 136 | ${output_dir}/${result} 137 | ${golden_dir}/${result} 138 | APPEND 139 | ) 140 | endforeach() 141 | # All tests require an error_code. 142 | add_custom_command(OUTPUT "${output_dir_relative}_fake" 143 | COMMAND 144 | ${CMAKE_COMMAND} 145 | -E copy_if_different 146 | ${output_dir}/exit_code.txt 147 | ${golden_dir}/exit_code.txt 148 | APPEND 149 | ) 150 | 151 | endif() 152 | endforeach() 153 | add_custom_target("update-dump-${test_collection}" DEPENDS ${test_set}) 154 | list(APPEND UPDATE_DUMPS_TARGETS "update-dump-${test_collection}") 155 | endforeach() 156 | 157 | string(REPLACE ";" "\n" LAUNCH_JSON2 "${LAUNCH_JSON}") 158 | 159 | if (TRIESTE_GENERATE_LAUNCH_JSON) 160 | file(GENERATE OUTPUT ${CMAKE_SOURCE_DIR}/.vscode/launch.json 161 | CONTENT 162 | "{ 163 | \"version\": \"0.2.0\", 164 | \"configurations\": [ 165 | ${LAUNCH_JSON2} 166 | ] 167 | }") 168 | endif() 169 | 170 | 171 | if (TARGET update-dump) 172 | add_dependencies(update-dump ${UPDATE_DUMPS_TARGETS}) 173 | else() 174 | add_custom_target(update-dump DEPENDS ${UPDATE_DUMPS_TARGETS}) 175 | endif() 176 | endfunction() -------------------------------------------------------------------------------- /notes/control-flow.md: -------------------------------------------------------------------------------- 1 | # Control Flow 2 | 3 | ## Non-local Returns 4 | 5 | The result of every call is checked. If it's a `nonlocal[T]`, then it's immediately returned by the function or lambda. A function first unwraps the non-local value by calling `load` on it, whereas a lambda doesn't. 6 | 7 | This allows `throw[T]` to be implemented as a `nonlocal[throw[T]]`, such that it propagates upwards until it's explicitly caught, while `return[T]` can be implemented as `nonlocal[T]`, such that it causes the calling function to return a value of type `T`. 8 | 9 | If a call is syntactically marked as `try`, then the check for a non-local value is suppressed. 10 | 11 | ## Altering the default behavior 12 | 13 | A function can be made to behave as a lambda with `try`. A lambda can be made to behave like a function as follows: 14 | 15 | ```rust 16 | let f = { x -> ... } 17 | let f = { x -> returning { ... } } 18 | 19 | returning[T1, T2](f: ()->(non_local[T1] | T2)): T1 | T2 20 | { 21 | match (try f()) 22 | { 23 | { x: nlr[T1] -> x.load() } 24 | { x: T2 -> x } 25 | } 26 | } 27 | 28 | (return[A] | throw[B]) <: non_local[A | throw[B]] 29 | (return[C] | return[D]) <: non_local[C | D] 30 | 31 | let x = try f() // x: T1 | non_local[T2] 32 | let y = { x1 -> x1 } x // y: T1 33 | 34 | match x 35 | { 36 | { x1: T1 -> ... } 37 | { x2: non_local[T2] -> ... } 38 | } 39 | 40 | ``` 41 | 42 | ## NLRCheck 43 | 44 | ```rust 45 | type non_local[T] = 46 | { 47 | trait_non_local(): () 48 | load(self): Self.T 49 | } 50 | 51 | class return[T]: non_local[T] 52 | { 53 | let value: T 54 | 55 | trait_non_local(): () = () 56 | load(self): Self.T = self.value 57 | 58 | create(): return[()] = return[()]::create(()) 59 | create(value: T): return[T] = new value 60 | up(value: T): return[return[T]] = return[return[T]]::create(new value) 61 | } 62 | 63 | class throw[T]: non_local[throw[T]] 64 | { 65 | let value: T 66 | 67 | trait_non_local(): () = () 68 | load(self): self.throw[T] = self 69 | 70 | create(): throw[()] = throw[()]::create(()) 71 | create(value: T): value.throw[T] = new value 72 | } 73 | 74 | type Break = throw[break & const] 75 | type Continue = throw[continue & const] 76 | 77 | class break 78 | { 79 | create(): Break = throw(new const) 80 | } 81 | 82 | class continue 83 | { 84 | create(): Continue = throw(new const) 85 | } 86 | 87 | f() 88 | { 89 | try 90 | { 91 | throw x // as expected, `try` catches this 92 | return y // unexpected: `try` also catches this 93 | } 94 | 95 | y 96 | 97 | match try 98 | { 99 | ... 100 | } 101 | { 102 | { x: return[T] -> x } 103 | } 104 | } 105 | 106 | // TODO: establishing `value: T1 & ¬T2` in the `else` branch 107 | catch(value: T1, handlers: T2->T3): (T1 & ¬T2) | T3 108 | { 109 | if value 110 | { 111 | x: T2 -> handler value 112 | } 113 | else 114 | { 115 | value 116 | } 117 | } 118 | 119 | f() 120 | { 121 | for iter 122 | { 123 | subiter -> 124 | for subiter 125 | { 126 | value -> 127 | if something 128 | { 129 | // want to return from f here 130 | // `if` catches it and hands it to the subiter lambda 131 | // the subiter catches it and returns it to `for subiter` 132 | // `for subiter` catches it and hands it to the iter lambda 133 | // the iter catches it and returns it to `for iter` 134 | // `for iter` catches it (explicit) and hands it to f() 135 | // f() catches it, unwraps it, and returns it to the caller 136 | return value 137 | } 138 | 139 | if something 140 | { 141 | throw "fail" 142 | } 143 | 144 | // if we end here, `for` gets a 3 145 | // it probably just continues the loop 146 | // but it could build a list of values, track the last returned value, 147 | // or something else 148 | 3 149 | 150 | // if we end here, f() returns `value` 151 | return value 152 | } 153 | } 154 | 155 | if something 156 | { 157 | if something_else 158 | { 159 | // if we end here, f() returns `value` 160 | return value 161 | } 162 | } 163 | 164 | // immediate return if we end here 165 | value 166 | 167 | // also an immediate return, because the function unwraps it 168 | return value 169 | } 170 | 171 | for[T1, T2, T3](iter: Iterator[T1], body: T1->(non_local[T2] | T3)): 172 | (non_local[T2] & ¬throw[Break] & ¬throw[Continue]) | () 173 | { 174 | try 175 | { 176 | while (iter.has_next) 177 | { 178 | // If this is a non-local, it goes to the `body()` call in `while`, 179 | // which returns it to our `try`, which forwards it to the caller. 180 | body iter.next 181 | } 182 | } 183 | } 184 | 185 | while[T1, T2](cond: ()->Bool, body: ()->(non_local[T1] | T2)): 186 | (non_local[T1] & ¬throw[Break] & ¬throw[Continue]) | () 187 | { 188 | if (cond()) 189 | { 190 | match try body() 191 | { 192 | { _: throw[Break] -> () } 193 | { _: throw[Continue] -> while cond body } 194 | { r: non_local[T1] -> r } 195 | { _: T2 -> while cond body } 196 | } 197 | } 198 | } 199 | 200 | catch[T, U, V](value: T, body: U->V): T | V 201 | { 202 | if value 203 | { 204 | x: U -> 205 | // Don't unwrap any return[T] 206 | try body x 207 | } 208 | else 209 | { 210 | value 211 | } 212 | } 213 | 214 | ``` 215 | 216 | ## Conditionals 217 | 218 | Two types of conditional are built in. 219 | 220 | ```ts 221 | // boolean conditional 222 | // executes the lambda if `cond` is true 223 | if cond 224 | { 225 | ... 226 | } 227 | 228 | // type conditional 229 | // executes the lambda if `value <: T` 230 | if value 231 | { 232 | x: T -> ... 233 | } 234 | ``` 235 | 236 | ## Pattern Matching 237 | 238 | The result of the match should be: 239 | - A `return[T]` for the result of any successful match. 240 | - A `throw[E]` for an error result of any successful match. 241 | - A `match[T]` if not yet matched. 242 | 243 | What's needed? 244 | - structural pattern 245 | - type pattern 246 | 247 | What about exhaustive matching? 248 | 249 | ```ts 250 | class match[T] 251 | { 252 | class NoMatch{} 253 | 254 | let value: T 255 | 256 | create(value: T): match[T] & lin = new (value, true) 257 | 258 | |[G: {==(G, T1): Bool}, T1, T2, E]( 259 | self: lin, guard: G, case: T1->(T2 | throw E)): 260 | ((match[T] | match[T \ T1] | matched[T2]) & lin) | throw E 261 | { 262 | if self.value 263 | { 264 | x: T1 -> 265 | if (guard == x) 266 | { 267 | matched(case x) 268 | } 269 | else 270 | { 271 | match(x) 272 | } 273 | } 274 | else 275 | { 276 | x: T \ T1 -> match(x) 277 | } 278 | } 279 | 280 | |[T1, T2, E](self: lin, case: T1->(T2 | throw E)): 281 | ((match[T \ T1] | matched[T2]) & lin) | throw E 282 | { 283 | if self.value 284 | { 285 | x: T1 -> matched(case x) 286 | } 287 | else 288 | { 289 | x: T \ T1 -> match(x) 290 | } 291 | } 292 | 293 | end(self: lin): throw NoMatch 294 | { 295 | throw NoMatch 296 | } 297 | } 298 | 299 | class matched[T] 300 | { 301 | let value: T 302 | 303 | create(value: T): matched[T] & lin = new value 304 | 305 | |[G: {==(G, T1): Bool}, T1, T2](self: lin, guard: G, case: T1->T2): Self = 306 | self 307 | 308 | |[T1, T2](self: lin, case: T1->T2): Self = self 309 | 310 | end(self: lin): T = self.value 311 | } 312 | 313 | // match[type x] | matched[type case1] | matched[type case2] | throw 314 | match x 315 | | true { 0 } 316 | | { a, b -> a + b } 317 | end 318 | ``` 319 | -------------------------------------------------------------------------------- /include/trieste/reader.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "parse.h" 6 | #include "passes.h" 7 | 8 | #include 9 | #include 10 | 11 | namespace trieste 12 | { 13 | class Reader 14 | { 15 | private: 16 | constexpr static auto parse_only = "parse"; 17 | 18 | using InputSpec = 19 | std::optional>; 20 | 21 | std::string language_name_; 22 | std::vector passes_; 23 | Parse parser_; 24 | InputSpec input_{}; 25 | bool debug_enabled_; 26 | bool wf_check_enabled_; 27 | std::filesystem::path debug_path_; 28 | std::string start_pass_; 29 | std::string end_pass_; 30 | std::size_t offset_; 31 | 32 | public: 33 | Reader( 34 | const std::string& language_name, 35 | const std::vector& passes, 36 | const Parse& parser) 37 | : language_name_(language_name), 38 | passes_(passes), 39 | parser_(parser), 40 | debug_enabled_(false), 41 | wf_check_enabled_(false), 42 | debug_path_("."), 43 | start_pass_(""), 44 | end_pass_(""), 45 | offset_(0) 46 | {} 47 | 48 | ProcessResult read() 49 | { 50 | if (!input_) 51 | { 52 | return {false, parse_only, nullptr, {(Error ^ "No source provided")}}; 53 | } 54 | 55 | auto& input = *input_; 56 | PassRange pass_range( 57 | passes_.begin(), passes_.end(), parser_.wf(), parse_only); 58 | 59 | if (!end_pass_.empty()) 60 | { 61 | if (end_pass_ == parse_only) 62 | { 63 | pass_range.disable(); 64 | } 65 | else if (!pass_range.move_end(end_pass_)) 66 | { 67 | return { 68 | false, 69 | parse_only, 70 | nullptr, 71 | {Error ^ ("Unknown pass: " + end_pass_)}}; 72 | } 73 | } 74 | 75 | Node ast; 76 | auto parse_start = std::chrono::high_resolution_clock::now(); 77 | if (!start_pass_.empty()) 78 | { 79 | if (!pass_range.move_start(start_pass_)) 80 | { 81 | return { 82 | false, 83 | parse_only, 84 | nullptr, 85 | {Error ^ ("Unknown pass: " + start_pass_)}}; 86 | } 87 | 88 | Source source; 89 | if (std::holds_alternative(input)) 90 | { 91 | auto& path = std::get(input); 92 | if (std::filesystem::is_directory(path)) 93 | return { 94 | false, 95 | parse_only, 96 | nullptr, 97 | {Error ^ "Cannot use directory with intermediate pass."}}; 98 | source = SourceDef::load(path); 99 | } 100 | else 101 | { 102 | source = std::get(input); 103 | } 104 | 105 | // Pass range is currently pointing at pass, but the output is the 106 | // dump of that, so advance it one, so we start processing on the 107 | // next pass. 108 | ++pass_range; 109 | 110 | ast = build_ast(source, offset_); 111 | } 112 | else 113 | { 114 | std::visit([&](auto x) { ast = parser_.parse(x); }, input); 115 | } 116 | auto parse_end = std::chrono::high_resolution_clock::now(); 117 | 118 | logging::Info summary; 119 | std::filesystem::path debug_path; 120 | if (debug_enabled_) 121 | { 122 | debug_path = debug_path_; 123 | } 124 | 125 | summary << "---------" << std::endl; 126 | summary << "Parse time (us): " 127 | << std::chrono::duration_cast( 128 | parse_end - parse_start).count() 129 | << std::endl; 130 | 131 | auto result = 132 | Process(pass_range) 133 | .set_check_well_formed(wf_check_enabled_) 134 | .set_default_pass_complete(summary, language_name_, debug_path) 135 | .run(ast); 136 | summary << "---------" << std::endl; 137 | return result; 138 | } 139 | 140 | template 141 | size_t pass_index(const StringLike& name_) const 142 | { 143 | if (name_ == parse_only) 144 | return 0; 145 | 146 | for (size_t i = 0; i < passes_.size(); i++) 147 | { 148 | if (passes_[i]->name() == name_) 149 | return i + 1; 150 | } 151 | 152 | return std::numeric_limits::max(); 153 | } 154 | 155 | std::vector pass_names() const 156 | { 157 | std::vector names; 158 | names.push_back(parse_only); 159 | std::transform( 160 | passes_.begin(), 161 | passes_.end(), 162 | std::back_inserter(names), 163 | [](const auto& p) { return p->name(); }); 164 | return names; 165 | } 166 | 167 | Reader& executable(const std::filesystem::path& path) 168 | { 169 | parser_.executable(path); 170 | return *this; 171 | } 172 | 173 | Reader& language_name(const std::string& name) 174 | { 175 | language_name_ = name; 176 | return *this; 177 | } 178 | 179 | const std::string& language_name() const 180 | { 181 | return language_name_; 182 | } 183 | 184 | const std::vector& passes() const 185 | { 186 | return passes_; 187 | } 188 | 189 | const Parse& parser() const 190 | { 191 | return parser_; 192 | } 193 | 194 | Reader& debug_enabled(bool value) 195 | { 196 | debug_enabled_ = value; 197 | return *this; 198 | } 199 | 200 | bool debug_enabled() const 201 | { 202 | return debug_enabled_; 203 | } 204 | 205 | Reader& wf_check_enabled(bool value) 206 | { 207 | wf_check_enabled_ = value; 208 | return *this; 209 | } 210 | 211 | bool wf_check_enabled() const 212 | { 213 | return wf_check_enabled_; 214 | } 215 | 216 | Reader& debug_path(const std::filesystem::path& path) 217 | { 218 | debug_path_ = path; 219 | return *this; 220 | } 221 | 222 | const std::filesystem::path& debug_path() const 223 | { 224 | return debug_path_; 225 | } 226 | 227 | template 228 | Reader& start_pass(const StringLike& pass) 229 | { 230 | start_pass_ = pass; 231 | return *this; 232 | } 233 | 234 | const std::string& start_pass() const 235 | { 236 | return start_pass_; 237 | } 238 | 239 | template 240 | Reader& end_pass(const StringLike& pass) 241 | { 242 | end_pass_ = pass; 243 | return *this; 244 | } 245 | 246 | const std::string& end_pass() const 247 | { 248 | return end_pass_; 249 | } 250 | 251 | Reader& offset(std::size_t pos) 252 | { 253 | offset_ = pos; 254 | return *this; 255 | } 256 | 257 | std::size_t offset() const 258 | { 259 | return offset_; 260 | } 261 | 262 | Reader& source(const Source& s) 263 | { 264 | input_ = s; 265 | return *this; 266 | } 267 | 268 | Reader& file(const std::filesystem::path& path) 269 | { 270 | input_ = path; 271 | return *this; 272 | } 273 | 274 | Reader& synthetic(const std::string& contents, const std::string& origin="") 275 | { 276 | input_ = SourceDef::synthetic(contents, origin); 277 | return *this; 278 | } 279 | 280 | Reader& postparse(Parse::PostF func) 281 | { 282 | parser_.postparse(func); 283 | return *this; 284 | } 285 | 286 | const wf::Wellformed& output_wf() const 287 | { 288 | return passes_.back()->wf(); 289 | } 290 | }; 291 | } 292 | -------------------------------------------------------------------------------- /include/trieste/token.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "logging.h" 6 | #include "source.h" 7 | 8 | #include 9 | #include 10 | 11 | namespace trieste 12 | { 13 | class NodeDef; 14 | 15 | // Certain uses of the Node alias before the full definition of NodeDef can 16 | // cause incomplete type errors, so this manually relocates the problematic 17 | // code to after NodeDef is fully defined. See the docs on the specialized 18 | // trait for details. 19 | // 20 | // Note: this is only needed by our C++17 implementation of NodeRange (in 21 | // ast.h). If we stop supporting C++17, this can be deleted. 22 | template<> 23 | struct intrusive_refcounted_traits 24 | { 25 | static constexpr void intrusive_inc_ref(NodeDef*); 26 | inline static void intrusive_dec_ref(NodeDef*); 27 | }; 28 | 29 | using Node = intrusive_ptr; 30 | 31 | struct TokenDef; 32 | struct Token; 33 | 34 | namespace detail 35 | { 36 | void register_token(const TokenDef& def); 37 | } 38 | 39 | struct TokenDef 40 | { 41 | using flag = uint32_t; 42 | const char* name; 43 | flag fl; 44 | 45 | // Hash id for this token. This is used to determine the hash function for 46 | // the default map for the main rewrite loop. This is not a general purpose 47 | // hash function. 48 | uint32_t default_map_id; 49 | static constexpr size_t DEFAULT_MAP_TABLE_SIZE{128}; 50 | 51 | TokenDef(const char* name_, flag fl_ = 0) : name(name_), fl(fl_) 52 | { 53 | static std::atomic next_id = 0; 54 | default_map_id = (next_id++ % DEFAULT_MAP_TABLE_SIZE) * sizeof(void*); 55 | 56 | detail::register_token(*this); 57 | } 58 | 59 | TokenDef() = delete; 60 | TokenDef(const TokenDef&) = delete; 61 | 62 | operator Node() const; 63 | 64 | bool has(TokenDef::flag f) const 65 | { 66 | return (fl & f) != 0; 67 | } 68 | }; 69 | 70 | struct Token 71 | { 72 | const TokenDef* def; 73 | 74 | Token() : def(nullptr) {} 75 | Token(const TokenDef& def_) : def(&def_) {} 76 | 77 | operator Node() const; 78 | 79 | /** 80 | * Special hash for looking up in tables of size DEFAULT_MAP_TABLE_SIZE with 81 | * elements of size sizeof(void*). 82 | */ 83 | uint32_t default_map_hash() const 84 | { 85 | return def->default_map_id / sizeof(void*); 86 | } 87 | 88 | bool operator&(TokenDef::flag f) const 89 | { 90 | return def->has(f); 91 | } 92 | 93 | bool operator==(const Token& that) const 94 | { 95 | return def == that.def; 96 | } 97 | 98 | bool operator!=(const Token& that) const 99 | { 100 | return def != that.def; 101 | } 102 | 103 | bool operator<(const Token& that) const 104 | { 105 | return def < that.def; 106 | } 107 | 108 | bool operator>(const Token& that) const 109 | { 110 | return def > that.def; 111 | } 112 | 113 | bool operator<=(const Token& that) const 114 | { 115 | return def <= that.def; 116 | } 117 | 118 | bool operator>=(const Token& that) const 119 | { 120 | return def >= that.def; 121 | } 122 | 123 | bool in(const std::initializer_list& list) const 124 | { 125 | return std::find(list.begin(), list.end(), *this) != list.end(); 126 | } 127 | 128 | bool in(const std::vector& list) const 129 | { 130 | return std::find(list.begin(), list.end(), *this) != list.end(); 131 | } 132 | 133 | const char* str() const 134 | { 135 | return def->name; 136 | } 137 | }; 138 | 139 | namespace flag 140 | { 141 | constexpr TokenDef::flag none = 0; 142 | 143 | // Print the location when printing an AST node of this type. 144 | constexpr TokenDef::flag print = 1 << 0; 145 | 146 | // Include a symbol table in an AST node of this type. 147 | constexpr TokenDef::flag symtab = 1 << 1; 148 | 149 | // If an AST node of this type has a symbol table, definitions can only be 150 | // found from later in the same source file. 151 | constexpr TokenDef::flag defbeforeuse = 1 << 2; 152 | 153 | // If a definition of this type is in a symbol table, it don't recurse into 154 | // parent symbol tables. 155 | constexpr TokenDef::flag shadowing = 1 << 3; 156 | 157 | // If a definition of this type is in a symbol table, it can be found when 158 | // looking up. 159 | constexpr TokenDef::flag lookup = 1 << 4; 160 | 161 | // If a definition of this type in a symbol table, it can be found when 162 | // looking down. 163 | constexpr TokenDef::flag lookdown = 1 << 5; 164 | 165 | // Used for AST nodes to represent internal Trieste features. Rewriting 166 | // should not occur inside an internal node. 167 | constexpr TokenDef::flag internal = 1 << 6; 168 | } 169 | 170 | // Built-in grouping 171 | inline const auto Top = TokenDef("top", flag::symtab); 172 | inline const auto Directory = TokenDef("directory"); 173 | inline const auto File = TokenDef("file"); 174 | inline const auto Group = TokenDef("group"); 175 | 176 | // Special tokens for effects 177 | inline const auto Seq = TokenDef("seq", flag::internal); 178 | inline const auto Lift = TokenDef("lift", flag::internal); 179 | inline const auto NoChange = TokenDef("nochange", flag::internal); 180 | inline const auto Reapply = TokenDef("reapply", flag::internal); 181 | 182 | // Special tokens for symbol tables 183 | inline const auto Include = TokenDef("include", flag::internal); 184 | 185 | // Special tokens for error handling 186 | inline const auto Invalid = TokenDef("invalid", flag::internal); 187 | inline const auto Error = TokenDef("error", flag::internal); 188 | inline const auto ErrorMsg = TokenDef("errormsg", flag::print | flag::internal); 189 | inline const auto ErrorAst = TokenDef("errorast", flag::internal); 190 | 191 | // Special tokens for reified patterns 192 | namespace reified 193 | { 194 | inline const auto Token = TokenDef("pattern::token", flag::print); 195 | inline const auto Regex = TokenDef("pattern::regex", flag::print); 196 | inline const auto Any = TokenDef("pattern::anything"); 197 | inline const auto TokenMatch = TokenDef("pattern::tokenmatch"); 198 | inline const auto RegexMatch = TokenDef("pattern::regexmatch"); 199 | inline const auto Cap = TokenDef("pattern::cap"); 200 | inline const auto Opt = TokenDef("pattern::opt"); 201 | inline const auto Rep = TokenDef("pattern::rep"); 202 | inline const auto Not = TokenDef("pattern::not"); 203 | inline const auto Choice = TokenDef("pattern::choice"); 204 | inline const auto Children = TokenDef("pattern::children"); 205 | inline const auto First = TokenDef("pattern::first"); 206 | inline const auto Last = TokenDef("pattern::last"); 207 | inline const auto Inside = TokenDef("pattern::inside"); 208 | inline const auto InsideStar = TokenDef("pattern::insidestar"); 209 | inline const auto Pred = TokenDef("pattern::pred"); 210 | inline const auto NegPred = TokenDef("pattern::negpred"); 211 | inline const auto Action = TokenDef("pattern::action"); 212 | } 213 | 214 | namespace detail 215 | { 216 | inline std::map& token_map() 217 | { 218 | static std::map global_map; 219 | return global_map; 220 | } 221 | 222 | inline void register_token(const TokenDef& def) 223 | { 224 | auto& map = token_map(); 225 | auto it = map.find(def.name); 226 | if (it != map.end()) 227 | throw std::runtime_error( 228 | "Duplicate token definition: " + std::string(def.name)); 229 | 230 | Token t = def; 231 | map[t.str()] = t; 232 | } 233 | 234 | inline Token find_token(std::string_view str) 235 | { 236 | auto& map = token_map(); 237 | auto it = map.find(str); 238 | 239 | if (it != map.end()) 240 | return it->second; 241 | 242 | return Invalid; 243 | } 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /samples/infix/reader.cc: -------------------------------------------------------------------------------- 1 | #include "internal.h" 2 | 3 | namespace 4 | { 5 | using namespace trieste::wf::ops; 6 | using namespace infix; 7 | 8 | // | is used to create a Choice between all the elements 9 | // this indicates that literals can be an Int or a Float 10 | 11 | // A <<= B indicates that B is a child of A 12 | // ++ indicates that there are zero or more instances of the token 13 | 14 | inline const auto wf_expressions_tokens = 15 | (wf_parse_tokens - (String | Paren | Print)) | Expression; 16 | 17 | // clang-format off 18 | inline const auto wf_pass_expressions = 19 | (Top <<= Calculation) 20 | | (Calculation <<= (Assign | Output)++) 21 | // [Ident] here indicates that the Ident node is a symbol that should 22 | // be stored in the symbol table 23 | | (Assign <<= Ident * Expression)[Ident] 24 | | (Output <<= String * Expression) 25 | // [1] here indicates that there should be at least one token 26 | | (Expression <<= wf_expressions_tokens++[1]) 27 | ; 28 | // clang-format on 29 | 30 | // clang-format off 31 | inline const auto wf_pass_multiply_divide = 32 | wf_pass_expressions 33 | | (Multiply <<= Expression * Expression) 34 | | (Divide <<= Expression * Expression) 35 | ; 36 | // clang-format on 37 | 38 | // clang-format off 39 | inline const auto wf_pass_add_subtract = 40 | wf_pass_multiply_divide 41 | | (Add <<= Expression * Expression) 42 | | (Subtract <<= Expression * Expression) 43 | ; 44 | // clang-format on 45 | 46 | inline const auto wf_operands_tokens = wf_expressions_tokens - Expression; 47 | 48 | // clang-format off 49 | inline const auto wf_pass_trim = 50 | wf_pass_add_subtract 51 | | (Expression <<= wf_operands_tokens) 52 | ; 53 | //clang-format on 54 | 55 | inline const auto wf_check_refs_tokens = (wf_operands_tokens - Ident) | Ref; 56 | 57 | // clang-format off 58 | inline const auto wf_pass_check_refs = 59 | wf_pass_trim 60 | | (Expression <<= wf_check_refs_tokens) 61 | | (Ref <<= Ident) 62 | ; 63 | // clang-format on 64 | 65 | PassDef expressions() 66 | { 67 | return { 68 | "expressions", 69 | wf_pass_expressions, 70 | dir::topdown, 71 | { 72 | // In() indicates this is the root node of the pattern match. 73 | // What we return will replace the nodes we specify after the *. 74 | // The [] gives us a hook in the Match to use for referring to the 75 | // matched entity. Here we're saying that we want to create a 76 | // Calculation node and make all of the values in File (*_[File]) its 77 | // children. 78 | In(Top) * T(File)[File] >> 79 | [](Match& _) { return Calculation << *_[File]; }, 80 | 81 | // This rule selects an Equals node with the right structure, 82 | // i.e. a single ident being assigned. We replace it with 83 | // an Assign node that has two children: the Ident and the 84 | // an Expression, which will take the children of the Group. 85 | In(Calculation) * 86 | (T(Equals) << ((T(Group) << T(Ident)[Id]) * T(Group)[Rhs])) >> 87 | [](Match& _) { return Assign << _(Id) << (Expression << *_[Rhs]); }, 88 | 89 | // This rule selects a Group that matches the Output pattern 90 | // of `print `. In this case, Any++ indicates that 91 | // Rhs should contain all the remaining tokens in the group. 92 | // When used here, * means nodes that are children of the In() 93 | // node in the specified order. They can be anywhere inside 94 | // the In() child sequence. 95 | In(Calculation) * 96 | (T(Group) << (T(Print) * T(String)[Lhs] * Any++[Rhs])) >> 97 | [](Match& _) { return Output << _(Lhs) << (Expression << _[Rhs]); }, 98 | 99 | // This node unwraps Groups that are inside Parens, making them 100 | // Expression nodes. 101 | In(Expression) * (T(Paren) << T(Group)[Group]) >> 102 | [](Match& _) { return Expression << *_[Group]; }, 103 | 104 | // errors 105 | 106 | // because rules are matched in order, this catches any 107 | // Paren nodes that had no children (because the rule above 108 | // will have handled those *with* children) 109 | T(Paren)[Paren] >> 110 | [](Match& _) { return err(_(Paren), "Empty paren"); }, 111 | 112 | // Ditto for malformed equals nodes 113 | T(Equals)[Equals] >> 114 | [](Match& _) { return err(_(Equals), "Invalid assign"); }, 115 | 116 | // Orphaned print node will catch bad output statements 117 | T(Print)[Print] >> 118 | [](Match& _) { return err(_(Print), "Invalid output"); }, 119 | 120 | // Our WF definition allows this, so we need to handle it. 121 | T(Expression)[Rhs] << End >> 122 | [](Match& _) { return err(_(Rhs), "Empty expression"); }, 123 | 124 | // Same with this. 125 | In(Expression) * T(String)[String] >> 126 | [](Match& _) { 127 | return err(_(String), "Expressions cannot contain strings"); 128 | }, 129 | 130 | T(Group)[Group] >> 131 | [](Match& _) { return err(_[Group], "syntax error"); }, 132 | }}; 133 | } 134 | 135 | inline const auto ExpressionArg = T(Expression, Ident) / Number; 136 | 137 | PassDef multiply_divide() 138 | { 139 | return { 140 | "multiply_divide", 141 | wf_pass_multiply_divide, 142 | dir::topdown, 143 | { 144 | // Group multiply and divide operations together. This rule will 145 | // select any triplet of *|/ in an expression list and 146 | // replace it with a single node that has the triplet as 147 | // its children. 148 | In(Expression) * 149 | (ExpressionArg[Lhs] * (T(Multiply, Divide))[Op] * 150 | ExpressionArg[Rhs]) >> 151 | [](Match& _) { 152 | return Expression 153 | << (_(Op) << (Expression << _(Lhs)) << (Expression << _[Rhs])); 154 | }, 155 | (T(Multiply, Divide))[Op] << End >> 156 | [](Match& _) { return err(_(Op), "No arguments"); }, 157 | }}; 158 | } 159 | 160 | PassDef add_subtract() 161 | { 162 | return { 163 | "add_subtract", 164 | wf_pass_add_subtract, 165 | dir::topdown, 166 | { 167 | In(Expression) * 168 | (ExpressionArg[Lhs] * (T(Add, Subtract))[Op] * 169 | ExpressionArg[Rhs]) >> 170 | [](Match& _) { 171 | return Expression 172 | << (_(Op) << (Expression << _(Lhs)) << (Expression << _[Rhs])); 173 | }, 174 | (T(Add, Subtract))[Op] << End >> 175 | [](Match& _) { return err(_(Op), "No arguments"); }, 176 | }}; 177 | } 178 | 179 | PassDef trim() 180 | { 181 | return { 182 | "trim", 183 | wf_pass_trim, 184 | dir::topdown, 185 | { 186 | // End is a special pattern which indicates that there 187 | // are no further nodes. So in this case we are matching 188 | // an Expression which has a single Expression as a 189 | // child. 190 | T(Expression) << (T(Expression)[Expression] * End) >> 191 | [](Match& _) { return _(Expression); }, 192 | 193 | T(Expression) << (Any * Any[Rhs]) >> 194 | [](Match& _) { 195 | return err(_(Rhs), "Only one value allowed per expression"); 196 | }, 197 | }}; 198 | } 199 | 200 | inline const auto Arg = T(Int) / T(Float) / T(Ident) / T(Expression); 201 | 202 | PassDef check_refs() 203 | { 204 | return { 205 | "check_refs", 206 | wf_pass_check_refs, 207 | dir::topdown, 208 | { 209 | In(Expression) * T(Ident)[Id] >> 210 | [](Match& _) { 211 | auto id = _(Id); // the Node object for the identifier 212 | auto defs = id->lookup(); // a list of matching symbols 213 | if (defs.size() == 0) 214 | { 215 | // there are no symbols with this identifier 216 | return err(id, "undefined"); 217 | } 218 | 219 | return Ref << id; 220 | }, 221 | }}; 222 | } 223 | } 224 | 225 | namespace infix 226 | { 227 | Reader reader() 228 | { 229 | return { 230 | "infix", 231 | {expressions(), multiply_divide(), add_subtract(), trim(), check_refs()}, 232 | parser(), 233 | }; 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /include/trieste/source.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "intrusive_ptr.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace trieste 17 | { 18 | class SourceDef; 19 | struct Location; 20 | 21 | using Source = intrusive_ptr; 22 | 23 | class SourceDef final : public intrusive_refcounted 24 | { 25 | private: 26 | std::string origin_; 27 | std::string contents; 28 | std::vector lines; 29 | 30 | public: 31 | static Source load(const std::filesystem::path& file) 32 | { 33 | std::ifstream f(file, std::ios::binary | std::ios::in | std::ios::ate); 34 | 35 | if (!f) 36 | return {}; 37 | 38 | auto size = f.tellg(); 39 | f.seekg(0, std::ios::beg); 40 | 41 | auto source = Source::make(); 42 | source->origin_ = std::filesystem::relative(file).string(); 43 | source->contents.resize(static_cast(size)); 44 | f.read(&source->contents[0], size); 45 | 46 | if (!f) 47 | return {}; 48 | 49 | source->find_lines(); 50 | return source; 51 | } 52 | 53 | static Source synthetic(const std::string& contents, const std::string& origin="") 54 | { 55 | auto source = Source::make(); 56 | source->contents = contents; 57 | source->origin_ = origin; 58 | source->find_lines(); 59 | return source; 60 | } 61 | 62 | const std::string& origin() const 63 | { 64 | return origin_; 65 | } 66 | 67 | std::string_view view() const 68 | { 69 | return std::string_view(contents); 70 | } 71 | 72 | std::pair linecol(size_t pos) const 73 | { 74 | // Lines and columns are 0-indexed. 75 | auto it = std::lower_bound(lines.begin(), lines.end(), pos); 76 | 77 | auto line = it - lines.begin(); 78 | auto col = pos; 79 | 80 | if (it != lines.begin()) 81 | col -= *(it - 1) + 1; 82 | 83 | return {line, col}; 84 | } 85 | 86 | std::pair linepos(size_t line) const 87 | { 88 | // Lines are 0-indexed. 89 | if (line > lines.size()) 90 | return {std::string::npos, 0}; 91 | 92 | size_t start = 0; 93 | auto end = contents.size(); 94 | 95 | if (line > 0) 96 | start = lines[line - 1] + 1; 97 | 98 | if (line < lines.size()) 99 | end = lines[line]; 100 | 101 | return {start, end - start}; 102 | } 103 | 104 | private: 105 | // Semantics note: 106 | // The code here only looks for \n and is not intended to be 107 | // platform-sensitive. Effectively, sources operate in binary mode and leave 108 | // encoding issues to the language implementation. There are however some 109 | // cosmetic fixes in error printing, such as in Location::str(), which 110 | // ensure that control characters don't leak into Trieste's output in that 111 | // case. 112 | void find_lines() 113 | { 114 | // Find the lines. 115 | auto pos = contents.find('\n'); 116 | 117 | while (pos != std::string::npos) 118 | { 119 | lines.push_back(pos); 120 | pos = contents.find('\n', pos + 1); 121 | } 122 | } 123 | }; 124 | 125 | struct Location 126 | { 127 | Source source; 128 | size_t pos; 129 | size_t len; 130 | 131 | Location() = default; 132 | 133 | Location(Source source_, size_t pos_, size_t len_) 134 | : source(source_), pos(pos_), len(len_) 135 | {} 136 | 137 | Location(const std::string& s) 138 | : source(SourceDef::synthetic(s)), pos(0), len(s.size()) 139 | {} 140 | 141 | std::string_view view() const 142 | { 143 | if (!source) 144 | return {}; 145 | 146 | return source->view().substr(pos, len); 147 | } 148 | 149 | std::string origin_linecol() const 150 | { 151 | std::stringstream ss; 152 | 153 | if (source && !source->origin().empty()) 154 | { 155 | auto [line, col] = linecol(); 156 | ss << source->origin() << ":" << (line + 1) << ":" << (col + 1); 157 | } 158 | 159 | return ss.str(); 160 | } 161 | 162 | std::string str() const 163 | { 164 | if (!source) 165 | return {}; 166 | 167 | std::stringstream ss; 168 | auto write_chars_skipping_r = [&ss](const std::string_view& str) -> void { 169 | for (char ch : str) 170 | { 171 | if (ch != '\r') 172 | { 173 | ss << ch; 174 | } 175 | } 176 | }; 177 | auto write_indexed_skipping_r = 178 | [&ss](const std::string_view& str, auto fn) -> void { 179 | size_t idx = 0; 180 | for (char ch : str) 181 | { 182 | if (ch != '\r') 183 | { 184 | ss << fn(idx); 185 | } 186 | ++idx; 187 | } 188 | }; 189 | 190 | auto [line, col] = linecol(); 191 | auto [linepos, linelen] = source->linepos(line); 192 | 193 | if (view().find_first_of('\n') != std::string::npos) 194 | { 195 | auto line_view_first = source->view().substr(linepos, linelen); 196 | size_t col_last; 197 | std::string_view interim_view; 198 | std::string_view line_view_last; 199 | { 200 | auto [line2, col2] = source->linecol(pos + len); 201 | auto [linepos2, linelen2] = source->linepos(line2); 202 | line_view_last = source->view().substr(linepos2, linelen2); 203 | col_last = col2; 204 | 205 | // Find the lines in between first and last to insert, if there are 206 | // any such lines. If the lines are adjacent, this creates a 1 char 207 | // line view with the new line between the two. 208 | size_t interim_pos = linepos + linelen; 209 | interim_view = 210 | source->view().substr(interim_pos, linepos2 - interim_pos); 211 | } 212 | 213 | write_indexed_skipping_r(line_view_first, [&ccol = col](size_t idx) { 214 | return idx < ccol ? ' ' : '~'; 215 | }); 216 | ss << std::endl; 217 | write_chars_skipping_r(line_view_first); 218 | write_chars_skipping_r(interim_view); 219 | write_chars_skipping_r(line_view_last); 220 | ss << std::endl; 221 | write_indexed_skipping_r( 222 | line_view_last.substr(0, col_last), [&](size_t) { return '~'; }); 223 | ss << std::endl; 224 | } 225 | else 226 | { 227 | auto line_view = source->view().substr(linepos, linelen); 228 | write_chars_skipping_r(line_view); 229 | ss << std::endl; 230 | 231 | assert(pos >= linepos); 232 | write_indexed_skipping_r( 233 | line_view.substr(0, pos - linepos + len), 234 | [&ccol = col](size_t idx) { return idx < ccol ? ' ' : '~'; }); 235 | ss << std::endl; 236 | } 237 | 238 | return ss.str(); 239 | } 240 | 241 | std::pair linecol() const 242 | { 243 | if (!source) 244 | return {0, 0}; 245 | 246 | return source->linecol(pos); 247 | } 248 | 249 | Location operator*(const Location& that) const 250 | { 251 | if (source != that.source) 252 | return *this; 253 | 254 | auto lo = std::min(pos, that.pos); 255 | auto hi = std::max(pos + len, that.pos + that.len); 256 | return {source, lo, hi - lo}; 257 | } 258 | 259 | Location& operator*=(const Location& that) 260 | { 261 | *this = *this * that; 262 | return *this; 263 | } 264 | 265 | bool operator==(const Location& that) const 266 | { 267 | return view() == that.view(); 268 | } 269 | 270 | bool operator!=(const Location& that) const 271 | { 272 | return !(*this == that); 273 | } 274 | 275 | bool operator<(const Location& that) const 276 | { 277 | return view() < that.view(); 278 | } 279 | 280 | bool operator<=(const Location& that) const 281 | { 282 | return (*this < that) || (*this == that); 283 | } 284 | 285 | bool operator>(const Location& that) const 286 | { 287 | return !(*this <= that); 288 | } 289 | 290 | bool operator>=(const Location& that) const 291 | { 292 | return !(*this < that); 293 | } 294 | }; 295 | } 296 | -------------------------------------------------------------------------------- /include/trieste/writer.h: -------------------------------------------------------------------------------- 1 | // Copyright Microsoft and Project Verona Contributors. 2 | // SPDX-License-Identifier: MIT 3 | #pragma once 4 | 5 | #include "passes.h" 6 | #include "trieste/intrusive_ptr.h" 7 | #include "trieste/wf.h" 8 | 9 | #include 10 | 11 | namespace trieste 12 | { 13 | class DestinationDef; 14 | using Destination = intrusive_ptr; 15 | 16 | class DestinationDef : public intrusive_refcounted 17 | { 18 | private: 19 | enum class Mode 20 | { 21 | // Files will be written to the file system 22 | FileSystem, 23 | // Files will be output directly to console 24 | Console, 25 | // Files are stored in memory and accessible via dst.files() 26 | Synthetic 27 | }; 28 | 29 | Mode mode_; 30 | std::ofstream fstream_; 31 | std::ostringstream sstream_; 32 | std::map files_; 33 | std::filesystem::path path_; 34 | bool is_open_; 35 | 36 | public: 37 | ~DestinationDef() 38 | { 39 | close(); 40 | } 41 | 42 | std::ostream& stream() 43 | { 44 | switch (mode_) 45 | { 46 | case Mode::FileSystem: 47 | return fstream_; 48 | 49 | case Mode::Console: 50 | return std::cout; 51 | 52 | case Mode::Synthetic: 53 | return sstream_; 54 | 55 | default: 56 | throw std::runtime_error("Invalid destination mode"); 57 | } 58 | } 59 | 60 | bool open(const std::filesystem::path& path) 61 | { 62 | close(); 63 | path_ = path_ / path; 64 | switch (mode_) 65 | { 66 | case Mode::FileSystem: 67 | if (!path_.parent_path().empty()) 68 | { 69 | std::filesystem::create_directories(path_.parent_path()); 70 | } 71 | fstream_.open(path_); 72 | return is_open_ = fstream_.is_open(); 73 | 74 | case Mode::Console: 75 | std::cout << "OPEN " << path_ << std::endl << std::endl; 76 | return is_open_ = true; 77 | 78 | case Mode::Synthetic: 79 | return is_open_ = true; 80 | 81 | default: 82 | throw std::runtime_error("Invalid destination mode"); 83 | } 84 | } 85 | 86 | void close() 87 | { 88 | if (!is_open_) 89 | { 90 | return; 91 | } 92 | 93 | std::string contents; 94 | switch (mode_) 95 | { 96 | case Mode::FileSystem: 97 | if (fstream_.is_open()) 98 | { 99 | fstream_.close(); 100 | } 101 | break; 102 | 103 | case Mode::Console: 104 | std::cout << std::endl << "CLOSE " << path_ << std::endl; 105 | break; 106 | 107 | case Mode::Synthetic: 108 | contents = sstream_.str(); 109 | if (!contents.empty()) 110 | { 111 | files_[path_.string()] = contents; 112 | sstream_ = std::ostringstream(); 113 | } 114 | break; 115 | } 116 | 117 | path_ = path_.parent_path(); 118 | is_open_ = false; 119 | } 120 | 121 | const std::map& files() const 122 | { 123 | return files_; 124 | } 125 | 126 | const std::string& file(const std::filesystem::path& path) const 127 | { 128 | return files_.at(path.string()); 129 | } 130 | 131 | static Destination dir(const std::filesystem::path& path) 132 | { 133 | auto d = Destination::make(); 134 | d->mode_ = Mode::FileSystem; 135 | d->path_ = path; 136 | return d; 137 | } 138 | 139 | static Destination console() 140 | { 141 | auto d = Destination::make(); 142 | d->mode_ = Mode::Console; 143 | d->path_ = "."; 144 | return d; 145 | } 146 | 147 | static Destination synthetic() 148 | { 149 | auto d = Destination::make(); 150 | d->mode_ = Mode::Synthetic; 151 | d->path_ = "."; 152 | return d; 153 | } 154 | }; 155 | 156 | using WriteFile = std::function; 157 | using namespace wf::ops; 158 | 159 | inline const auto Path = TokenDef("path", flag::print); 160 | inline const auto FileSeq = TokenDef("fileseq"); 161 | inline const auto Contents = TokenDef("contents"); 162 | 163 | // clang-format off 164 | inline const auto wf_writer = 165 | (Top <<= Directory | File) 166 | | (Directory <<= Path * FileSeq) 167 | | (FileSeq <<= (Directory | File)++) 168 | | (File <<= Path * Contents) 169 | ; 170 | // clang-format on 171 | 172 | class Writer 173 | { 174 | private: 175 | std::string language_name_; 176 | std::vector passes_; 177 | const wf::Wellformed* wf_; 178 | WriteFile write_file_; 179 | Destination destination_; 180 | bool debug_enabled_; 181 | bool wf_check_enabled_; 182 | std::filesystem::path debug_path_; 183 | 184 | public: 185 | Writer( 186 | const std::string& language_name, 187 | const std::vector& passes, 188 | const wf::Wellformed& input_wf, 189 | WriteFile write_file) 190 | : language_name_(language_name), 191 | passes_(passes), 192 | wf_(&input_wf), 193 | write_file_(write_file), 194 | debug_enabled_(false), 195 | wf_check_enabled_(true), 196 | debug_path_(".") 197 | { 198 | console(); 199 | } 200 | 201 | ProcessResult write(Node ast) 202 | { 203 | PassRange pass_range( 204 | passes_.begin(), passes_.end(), *wf_, language_name_); 205 | 206 | logging::Info summary; 207 | std::filesystem::path debug_path; 208 | if (debug_enabled_) 209 | { 210 | debug_path = debug_path_; 211 | } 212 | 213 | summary << "---------" << std::endl; 214 | auto result = 215 | Process(pass_range) 216 | .set_check_well_formed(wf_check_enabled_) 217 | .set_default_pass_complete(summary, language_name_, debug_path) 218 | .run(ast); 219 | summary << "---------" << std::endl; 220 | 221 | if (!result.ok) 222 | { 223 | return result; 224 | } 225 | 226 | Destination dest = destination_; 227 | WFContext context({wf_, &wf_writer}); 228 | 229 | Nodes error_nodes; 230 | std::vector stack; 231 | stack.push_back(ast); 232 | while (!stack.empty()) 233 | { 234 | Node current = stack.back(); 235 | stack.pop_back(); 236 | if (current == Directory) 237 | { 238 | auto files = current / FileSeq; 239 | for (auto& file : *files) 240 | { 241 | stack.push_back(file); 242 | } 243 | } 244 | else if (current == File) 245 | { 246 | try 247 | { 248 | dest->open((current / Path)->location().view()); 249 | write_file_(dest->stream(), current / Contents); 250 | dest->close(); 251 | } 252 | catch (std::exception& e) 253 | { 254 | error_nodes.push_back( 255 | Error << (ErrorMsg ^ e.what()) << (ErrorAst << current->clone())); 256 | } 257 | } 258 | else if (current == Top) 259 | { 260 | stack.insert(stack.end(), current->begin(), current->end()); 261 | } 262 | } 263 | 264 | if (!error_nodes.empty()) 265 | { 266 | result.ok = false; 267 | result.errors = error_nodes; 268 | } 269 | 270 | return result; 271 | } 272 | 273 | Writer& debug_enabled(bool value) 274 | { 275 | debug_enabled_ = value; 276 | return *this; 277 | } 278 | 279 | bool debug_enabled() const 280 | { 281 | return debug_enabled_; 282 | } 283 | 284 | Writer& wf_check_enabled(bool value) 285 | { 286 | wf_check_enabled_ = value; 287 | return *this; 288 | } 289 | 290 | bool wf_check_enabled() const 291 | { 292 | return wf_check_enabled_; 293 | } 294 | 295 | Writer& debug_path(const std::filesystem::path& path) 296 | { 297 | debug_path_ = path; 298 | return *this; 299 | } 300 | 301 | const std::filesystem::path& debug_path() const 302 | { 303 | return debug_path_; 304 | } 305 | 306 | Writer& destination(const Destination& destination) 307 | { 308 | destination_ = destination; 309 | return *this; 310 | } 311 | 312 | Writer& dir(const std::filesystem::path& path) 313 | { 314 | destination_ = DestinationDef::dir(path); 315 | return *this; 316 | } 317 | 318 | Writer& console() 319 | { 320 | destination_ = DestinationDef::console(); 321 | return *this; 322 | } 323 | 324 | Writer& synthetic() 325 | { 326 | destination_ = DestinationDef::synthetic(); 327 | return *this; 328 | } 329 | 330 | Destination destination() const 331 | { 332 | return destination_; 333 | } 334 | 335 | const wf::Wellformed& input_wf() const 336 | { 337 | return *wf_; 338 | } 339 | 340 | const std::vector& passes() const 341 | { 342 | return passes_; 343 | } 344 | }; 345 | } 346 | -------------------------------------------------------------------------------- /include/trieste/passes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "pass.h" 4 | #include "wf.h" 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace trieste 11 | { 12 | template 13 | class PassRange 14 | { 15 | PassIterator start; 16 | PassIterator end; 17 | // Well-formed condition for entry into this Range. 18 | const wf::Wellformed* wf; 19 | std::string entry_name; 20 | 21 | public: 22 | PassRange( 23 | PassIterator start_, 24 | PassIterator end_, 25 | const wf::Wellformed& wf_, 26 | std::string entry_name_) 27 | : start(start_), end(end_), wf(&wf_), entry_name(entry_name_) 28 | {} 29 | 30 | template 31 | PassRange(Range& range, const wf::Wellformed& wf_, std::string entry_name_) 32 | : start(range.begin()), end(range.end()), wf(&wf_), entry_name(entry_name_) 33 | {} 34 | 35 | template 36 | bool move_start(StringLike name) 37 | { 38 | auto it = std::find_if( 39 | start, end, [&](auto& pass) { return pass->name() == name; }); 40 | if (it == end) 41 | return false; 42 | 43 | wf = &((*it)->wf()); 44 | entry_name = (*it)->name(); 45 | start = it; 46 | return true; 47 | } 48 | 49 | template 50 | bool move_end(StringLike name) 51 | { 52 | auto it = std::find_if( 53 | start, end, [&](auto& pass) { return pass->name() == name; }); 54 | if (it == end) 55 | return false; 56 | end = ++it; 57 | return true; 58 | } 59 | 60 | void disable() 61 | { 62 | start = end; 63 | } 64 | 65 | Pass& operator()() 66 | { 67 | return *start; 68 | } 69 | 70 | void operator++() 71 | { 72 | wf = &((*start)->wf()); 73 | entry_name = (*start)->name(); 74 | start++; 75 | } 76 | 77 | bool has_next() 78 | { 79 | return start != end; 80 | } 81 | 82 | const wf::Wellformed& input_wf() const 83 | { 84 | return *wf; 85 | } 86 | 87 | Pass& last_pass() 88 | { 89 | for (auto it = start; it != end; ++it) 90 | { 91 | if (it + 1 == end) 92 | return *it; 93 | } 94 | throw std::runtime_error("No passes in range"); 95 | } 96 | 97 | std::string entry_pass_name() 98 | { 99 | return entry_name; 100 | } 101 | }; 102 | 103 | // Deduction guide require for constructor of PassRange 104 | template 105 | PassRange(PassIterator, PassIterator, const wf::Wellformed&, std::string) 106 | -> PassRange; 107 | 108 | // Deduction guide require for constructor of PassRange 109 | template 110 | PassRange(Range, const wf::Wellformed&, std::string) 111 | -> PassRange; 112 | 113 | struct PassStatistics 114 | { 115 | size_t count; 116 | size_t changes; 117 | std::chrono::microseconds duration; 118 | }; 119 | 120 | struct ProcessResult 121 | { 122 | bool ok; 123 | std::string last_pass; 124 | Node ast; 125 | Nodes errors; 126 | size_t total_changes = 0; 127 | 128 | void print_errors(logging::Log& err) const 129 | { 130 | logging::Sep sep{"----------------"}; 131 | err << "Errors:"; 132 | 133 | size_t count = 0; 134 | 135 | for (auto& error : errors) 136 | { 137 | err << sep << std::endl; 138 | for (auto& child : *error) 139 | { 140 | if (child->type() == ErrorMsg) 141 | err << child->location().view() << std::endl; 142 | else 143 | { 144 | err << "-- " << child->location().origin_linecol() << std::endl 145 | << child->location().str() << std::endl; 146 | } 147 | } 148 | if (count++ > 20) 149 | { 150 | err << "Too many errors, stopping here" << std::endl; 151 | break; 152 | } 153 | } 154 | err << "Pass " << last_pass << " failed with " << errors.size() 155 | << (count > 1 ? " errors!" : " error!") << std::endl; 156 | } 157 | }; 158 | 159 | /** 160 | * @brief Process is used to run a collection of rewrite passes on an Ast. 161 | * It provides a collection of hooks to produce output. 162 | */ 163 | template 164 | class Process 165 | { 166 | PassRange pass_range; 167 | 168 | bool check_well_formed{true}; 169 | 170 | std::function 171 | pass_complete; 172 | 173 | std::function error_pass; 174 | 175 | public: 176 | Process(const PassRange& passes) : pass_range(passes) {} 177 | 178 | /** 179 | * @brief After each pass the supplied function is called with the current 180 | * AST and details of the pass that has just completed. 181 | */ 182 | Process& set_pass_complete( 183 | std::function f) 184 | { 185 | pass_complete = f; 186 | return *this; 187 | } 188 | 189 | Process& set_default_pass_complete( 190 | logging::Log& summary, 191 | const std::string& language_name = "", 192 | std::filesystem::path output_directory = {}) 193 | { 194 | pass_complete = [output_directory, language_name, &summary]( 195 | Node& ast, 196 | std::string pass_name, 197 | size_t index, 198 | PassStatistics& stats) { 199 | auto [count, changes, duration] = stats; 200 | std::string delim{"\t"}; 201 | if (index == 0) 202 | { 203 | summary << "Pass" << delim << "Iterations" << delim << "Changes" 204 | << delim << "Time (us)" << std::endl; 205 | } 206 | else 207 | { 208 | summary << pass_name << delim << count << delim << changes << delim 209 | << static_cast(duration.count()) << std::endl; 210 | } 211 | 212 | if (output_directory.empty()) 213 | return true; 214 | 215 | // Check if output_directory exists, and if not create it. 216 | if (!std::filesystem::exists(output_directory)) 217 | { 218 | if (!std::filesystem::create_directories(output_directory)) 219 | { 220 | logging::Error() 221 | << "Could not create output directory " << output_directory; 222 | return false; 223 | } 224 | } 225 | 226 | std::filesystem::path output; 227 | if (index < 10) 228 | { 229 | output = output_directory / 230 | ("0" + std::to_string(index) + "_" + pass_name + ".trieste"); 231 | } 232 | else 233 | { 234 | output = output_directory / 235 | (std::to_string(index) + "_" + pass_name + ".trieste"); 236 | } 237 | 238 | std::ofstream f(output, std::ios::binary | std::ios::out); 239 | 240 | if (!f) 241 | { 242 | logging::Error() << "Could not open " << output << " for writing."; 243 | return false; 244 | } 245 | 246 | // Write the AST to the output file. 247 | f << language_name << std::endl << pass_name << std::endl << ast; 248 | return true; 249 | }; 250 | 251 | return *this; 252 | } 253 | 254 | /** 255 | * @brief Specified is well-formedness should be checked between passes. 256 | */ 257 | Process& set_check_well_formed(bool b) 258 | { 259 | check_well_formed = b; 260 | return *this; 261 | } 262 | 263 | bool validate(Node ast, Nodes& errors) 264 | { 265 | auto wf = pass_range.input_wf(); 266 | auto ok = bool(ast); 267 | 268 | ok = ok && wf.build_st(ast); 269 | 270 | if (ast) 271 | ast->get_errors(errors); 272 | ok = ok && errors.empty(); 273 | 274 | ok = ok && (!check_well_formed || wf.check(ast)); 275 | 276 | return ok; 277 | } 278 | 279 | /** 280 | * @brief Run the supplied passes on the Ast. 281 | * 282 | * Returns the rewritten Ast, or an empty Node if the process failed. 283 | */ 284 | ProcessResult run(Node& ast) 285 | { 286 | size_t index = 1; 287 | size_t total_changes = 0; 288 | 289 | WFContext context(pass_range.input_wf()); 290 | 291 | Nodes errors; 292 | 293 | // Check ast is well-formed before starting. 294 | auto ok = validate(ast, errors); 295 | 296 | PassStatistics stats; 297 | std::string last_pass = pass_range.entry_pass_name(); 298 | ok = pass_complete(ast, pass_range.entry_pass_name(), 0, stats) && ok; 299 | 300 | for (; ok && pass_range.has_next(); index++) 301 | { 302 | logging::Debug() << "Starting pass: \"" << pass_range()->name() << "\""; 303 | 304 | auto now = std::chrono::high_resolution_clock::now(); 305 | auto& pass = pass_range(); 306 | context.push_back(pass->wf()); 307 | 308 | auto [new_ast, count, changes] = pass->run(ast); 309 | total_changes += changes; 310 | ast = new_ast; 311 | context.pop_front(); 312 | 313 | ++pass_range; 314 | 315 | ok = validate(ast, errors); 316 | 317 | auto then = std::chrono::high_resolution_clock::now(); 318 | stats = { 319 | count, 320 | changes, 321 | std::chrono::duration_cast(then - now)}; 322 | 323 | ok = pass_complete(ast, pass->name(), index, stats) && ok; 324 | 325 | last_pass = pass->name(); 326 | } 327 | 328 | return {ok, last_pass, ast, errors, total_changes}; 329 | } 330 | }; 331 | } // namespace trieste 332 | --------------------------------------------------------------------------------