├── samples
    ├── infix
    │   ├── testsuite
    │   │   ├── examples
    │   │   │   ├── mixed_out
    │   │   │   │   ├── stderr.txt
    │   │   │   │   ├── exit_code.txt
    │   │   │   │   └── stdout.txt
    │   │   │   ├── simple_out
    │   │   │   │   ├── exit_code.txt
    │   │   │   │   ├── stderr.txt
    │   │   │   │   └── stdout.txt
    │   │   │   ├── multi_define_out
    │   │   │   │   ├── stderr.txt
    │   │   │   │   ├── exit_code.txt
    │   │   │   │   └── stdout.txt
    │   │   │   ├── multi_ident_out
    │   │   │   │   ├── exit_code.txt
    │   │   │   │   ├── stderr.txt
    │   │   │   │   └── stdout.txt
    │   │   │   ├── multi_define.infix
    │   │   │   ├── simple.infix
    │   │   │   ├── mixed.infix
    │   │   │   └── multi_ident.infix
    │   │   ├── CMakeLists.txt
    │   │   └── infix.cmake
    │   ├── infix_trieste.cc
    │   ├── CMakeLists.txt
    │   ├── internal.h
    │   ├── infix.h
    │   ├── infix.cc
    │   ├── parse.cc
    │   └── reader.cc
    └── shrubbery
    │   ├── shrubbery.cc
    │   ├── CMakeLists.txt
    │   ├── shrubbery.h
    │   ├── wf.h
    │   └── reader.cc
├── .gitignore
├── cmake
    ├── triesteConfig.cmake.in
    ├── runcommand.cmake
    ├── compare.cmake
    └── testsuite.cmake
├── CODE_OF_CONDUCT.md
├── include
    └── trieste
    │   ├── gen.h
    │   ├── debug.h
    │   ├── rewriter.h
    │   ├── xoroshiro.h
    │   ├── trieste.h
    │   ├── defaultmap.h
    │   ├── regex.h
    │   ├── reader.h
    │   ├── token.h
    │   ├── source.h
    │   ├── writer.h
    │   └── passes.h
├── test
    ├── CMakeLists.txt
    └── intrusive_ptr_test.cc
├── notes
    ├── packages.md
    ├── lookup.md
    ├── arguments.md
    ├── von.md
    ├── dispatch.md
    ├── regions.md
    └── control-flow.md
├── parsers
    ├── CMakeLists.txt
    ├── test
    │   ├── json_checker.cc
    │   ├── json_fuzzer.cc
    │   ├── yaml_checker.cc
    │   ├── yaml_fuzzer.cc
    │   └── CMakeLists.txt
    ├── json
    │   ├── internal.h
    │   ├── CMakeLists.txt
    │   ├── parse.cc
    │   ├── reader.cc
    │   ├── README.md
    │   └── writer.cc
    ├── yaml
    │   ├── CMakeLists.txt
    │   ├── internal.h
    │   ├── yamlc.cc
    │   └── README.md
    ├── README.md
    └── include
    │   └── trieste
    │       └── yaml.h
├── LICENSE
├── CONTRIBUTING.md
├── CMakePresets.json
├── README.md
├── SECURITY.md
├── .github
    └── workflows
    │   └── buildtest.yml
├── .clang-format
└── CMakeLists.txt


/samples/infix/testsuite/examples/mixed_out/stderr.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/mixed_out/exit_code.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/simple_out/exit_code.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/simple_out/stderr.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_define_out/stderr.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_ident_out/exit_code.txt:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_ident_out/stderr.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_define.infix:
--------------------------------------------------------------------------------
1 | x = 5;
2 | x = 6;


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_define_out/exit_code.txt:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_ident_out/stdout.txt:
--------------------------------------------------------------------------------
1 | 1 22
2 | 2 10
3 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/simple_out/stdout.txt:
--------------------------------------------------------------------------------
1 | x 5
2 | 1 + 10 11
3 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/mixed_out/stdout.txt:
--------------------------------------------------------------------------------
1 | x -11.700000
2 | y -32.440000
3 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | include (../../../cmake/testsuite.cmake)
2 | 
3 | testsuite(infix)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build*/
2 | .vscode/
3 | .vs/
4 | **/dump/*
5 | .*.swp
6 | compile_commands.json
7 | notes/
8 | .cache
9 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/simple.infix:
--------------------------------------------------------------------------------
1 | x = 5;
2 | print "x" x;
3 | y = 2 - 1;
4 | print "1 + 10" 1 + 10;
5 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/mixed.infix:
--------------------------------------------------------------------------------
1 | x = 1 + 2 * 3 + 5.3 - 4 - 2 / 0.1;
2 | y = 3.2 * x + 5;
3 | print "x" x;
4 | print "y" y;


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_ident.infix:
--------------------------------------------------------------------------------
1 | x = 5 + 10;
2 | y = 1 - 9 + x;
3 | print "1" x + y;
4 | z = (5 * x) / y;
5 | print "2" z;


--------------------------------------------------------------------------------
/cmake/triesteConfig.cmake.in:
--------------------------------------------------------------------------------
1 | @PACKAGE_INIT@
2 | 
3 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
4 | check_required_components("@PROJECT_NAME@")


--------------------------------------------------------------------------------
/samples/shrubbery/shrubbery.cc:
--------------------------------------------------------------------------------
1 | #include "shrubbery.h"
2 | 
3 | #include <trieste/driver.h>
4 | 
5 | int main(int argc, char** argv)
6 | {
7 |   return trieste::Driver(shrubbery::reader()).run(argc, argv);
8 | }
9 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/examples/multi_define_out/stdout.txt:
--------------------------------------------------------------------------------
1 | multi_define.infix:2:1: conflicting definitions of `x`:
2 | x = 5;
3 | ~~~~~
4 | x = 6;
5 | ~~~~~
6 | 
7 | Errors:Pass expressions failed with 0 error!
8 | 
9 | 


--------------------------------------------------------------------------------
/samples/infix/infix_trieste.cc:
--------------------------------------------------------------------------------
 1 | #include "infix.h"
 2 | 
 3 | #include <trieste/driver.h>
 4 | 
 5 | int main(int argc, char** argv)
 6 | {
 7 |   using namespace trieste;
 8 |   Reader read_and_calculate = infix::reader() >>= infix::calculate();
 9 |   return Driver(read_and_calculate).run(argc, argv);
10 | }
11 | 


--------------------------------------------------------------------------------
/samples/shrubbery/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(shrubbery
 2 |   reader.cc
 3 |   parse.cc
 4 |   shrubbery.cc
 5 |   )
 6 | enable_warnings(shrubbery)
 7 | 
 8 | target_link_libraries(shrubbery
 9 |   trieste::trieste
10 | )
11 | 
12 | add_test(NAME shrubbery COMMAND shrubbery test -f)
13 | add_test(NAME shrubbery_check COMMAND shrubbery check -w)
14 | 
15 | install(TARGETS shrubbery RUNTIME DESTINATION shrubbery)
16 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/include/trieste/gen.h:
--------------------------------------------------------------------------------
 1 | // Copyright Microsoft and Project Verona Contributors.
 2 | // SPDX-License-Identifier: MIT
 3 | #pragma once
 4 | 
 5 | #include "ast.h"
 6 | #include "xoroshiro.h"
 7 | 
 8 | #include <functional>
 9 | 
10 | namespace trieste
11 | {
12 |   using Rand = xoroshiro::p128r32;
13 |   using Seed = uint64_t;
14 |   using Result = uint32_t;
15 | 
16 |   using GenLocationF = std::function<std::string(Rand& rnd)>;
17 |   using GenNodeLocationF = std::function<Location(Rand&, Node)>;
18 | }
19 | 


--------------------------------------------------------------------------------
/samples/infix/testsuite/infix.cmake:
--------------------------------------------------------------------------------
 1 | # Arguments for testing infix samples
 2 | macro(toolinvoke ARGS testfile outputdir)
 3 |   set(${ARGS} ${testfile})
 4 | endmacro()
 5 | 
 6 | # Regular expression to match test files
 7 | # This regex matches files with the .infix extension
 8 | set(TESTSUITE_REGEX ".*\\.infix")
 9 | 
10 | set(TESTSUITE_EXE "$<TARGET_FILE:infix>")
11 | 
12 | function (test_output_dir out test)
13 |   # Use get_filename_component to remove the file extension and keep the directory structure
14 |   get_filename_component(test_dir ${test} DIRECTORY)
15 |   get_filename_component(test_name ${test} NAME_WE)
16 |   # Create the output directory relative to the test directory
17 |   set(${out} "${test_dir}/${test_name}_out" PARENT_SCOPE)
18 | endfunction()
19 | 


--------------------------------------------------------------------------------
/samples/infix/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(infix_trieste
 2 |   reader.cc
 3 |   writers.cc
 4 |   infix_trieste.cc
 5 |   parse.cc
 6 |   )
 7 | enable_warnings(infix_trieste)
 8 | 
 9 | target_link_libraries(infix_trieste
10 |   trieste::trieste
11 | )
12 | 
13 | add_test(NAME infix COMMAND infix_trieste test -f)
14 | add_test(NAME invalid_input COMMAND infix ./infix)
15 | set_property(TEST invalid_input PROPERTY WILL_FAIL On)
16 | add_test(NAME infix_check COMMAND infix_trieste check -w)
17 | 
18 | add_executable(infix
19 |   reader.cc
20 |   writers.cc
21 |   infix.cc
22 |   parse.cc
23 | )
24 | enable_warnings(infix)
25 | 
26 | target_link_libraries(infix
27 |   trieste::trieste
28 | )
29 | 
30 | add_subdirectory(testsuite)
31 | 
32 | install(TARGETS infix infix_trieste RUNTIME DESTINATION infix)
33 | install(DIRECTORY testsuite/examples DESTINATION infix)
34 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | add_executable(trieste_intrusive_ptr_test
 3 |   intrusive_ptr_test.cc
 4 | )
 5 | enable_warnings(trieste_intrusive_ptr_test)
 6 | target_link_libraries(trieste_intrusive_ptr_test trieste::trieste)
 7 | 
 8 | # This test might not make so much sense without asan enabled, but might as well
 9 | # check that the test compiles and doesn't crash on other compilers.
10 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT TRIESTE_SANITIZE)
11 |   target_compile_options(trieste_intrusive_ptr_test PUBLIC -g -fsanitize=thread)
12 |   target_link_libraries(trieste_intrusive_ptr_test -fsanitize=thread)
13 | endif()
14 | 
15 | add_test(NAME trieste_intrusive_ptr_test COMMAND trieste_intrusive_ptr_test WORKING_DIRECTORY $<TARGET_FILE_DIR:trieste_intrusive_ptr_test>)
16 | 
17 | add_executable(trieste_source_test
18 |   source_test.cc
19 | )
20 | enable_warnings(trieste_source_test)
21 | target_link_libraries(trieste_source_test trieste::trieste)
22 | 
23 | add_test(NAME trieste_source_test COMMAND trieste_source_test --depth 6 WORKING_DIRECTORY $<TARGET_FILE_DIR:trieste_source_test>)
24 | 


--------------------------------------------------------------------------------
/notes/packages.md:
--------------------------------------------------------------------------------
 1 | # Packages
 2 | 
 3 | The string that represents a package is a resolver.
 4 | 
 5 | Packages go in `Top`, inside a class, where the class name is derived from the resolver. Two resolvers that end up at the same package should get the same class name.
 6 | 
 7 | On disk, packages are directories. The package name is the directory name. Package directories go in your build directory, not in the source directory.
 8 | 
 9 | ## Build Directory
10 | 
11 | ```ts
12 | build
13 | - packages
14 |   - package1
15 |   - ...
16 | - debug
17 |   - program1
18 |   - program2
19 |   - ...
20 | - release
21 |   - program1
22 |   - program2
23 |   - ...
24 | ```
25 | 
26 | ## Resolvers
27 | 
28 | https://cmake.org/cmake/help/latest/module/ExternalProject.html#id1
29 | - URL
30 | - git
31 | - local directory?
32 | 
33 | ## Prologue
34 | 
35 | This could be added as the prologue to every program:
36 | 
37 | ```ts
38 | type std = "std lib resolver"
39 | use std::builtin
40 | 
41 | (TypeAlias
42 |   (Ident std)
43 |   (TypeParams)
44 |   (Type (TypeVar $0))
45 |   (Type
46 |     (Package (String "std lib resolver"))))
47 | ```
48 | 


--------------------------------------------------------------------------------
/cmake/runcommand.cmake:
--------------------------------------------------------------------------------
 1 | # This is used to run a command that can fail.
 2 | # Dumping all the output and error code into a file
 3 | # Also handles timeouts
 4 | 
 5 | file(REMOVE_RECURSE ${OUTPUT_DIR})
 6 | make_directory(${OUTPUT_DIR})
 7 | 
 8 | include(${COLLECTION})
 9 | 
10 | toolinvoke(TOOLINVOKE ${TESTFILE} ${OUTPUT_DIR})
11 | 
12 | list(JOIN TOOLINVOKE " " TOOLINVOKE_SEP)
13 | message ("Running")
14 | message ("   ${TEST_EXE} ${TOOLINVOKE_SEP}")
15 | message ("in working directory")
16 | message ("   ${WORKING_DIR}")
17 | message ("output sent to")
18 | message ("   ${OUTPUT_DIR}")
19 | 
20 | # Run command
21 | execute_process(
22 |     COMMAND ${TEST_EXE} ${TOOLINVOKE}
23 |     WORKING_DIRECTORY ${WORKING_DIR}
24 |     OUTPUT_FILE ${OUTPUT_DIR}/stdout.txt
25 |     ERROR_FILE ${OUTPUT_DIR}/stderr.txt
26 |     TIMEOUT 20   # Timeout at 20 seconds, may need to increase this.
27 |     RESULT_VARIABLE status
28 | )
29 | 
30 | # Push exit code into dump and make sure both stdout and stderr exist
31 | file(WRITE ${OUTPUT_DIR}/exit_code.txt ${status})
32 | file(TOUCH ${OUTPUT_DIR}/stdout.txt)
33 | file(TOUCH ${OUTPUT_DIR}/stderr.txt)
34 | 


--------------------------------------------------------------------------------
/parsers/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory(json)
 2 | add_subdirectory(yaml)
 3 | 
 4 | if(TRIESTE_BUILD_PARSER_TESTS)
 5 |     enable_testing()
 6 |     add_subdirectory(test)
 7 | 
 8 |     if(NOT IS_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/JSONTestSuite)
 9 |     execute_process(COMMAND ${GIT_EXECUTABLE} clone --depth=1 https://github.com/nst/JSONTestSuite
10 |                     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
11 |                     OUTPUT_QUIET)
12 |     endif()
13 | 
14 |     if(NOT IS_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/json-patch-tests)
15 |     execute_process(COMMAND ${GIT_EXECUTABLE} clone --depth=1 https://github.com/json-patch/json-patch-tests
16 |                     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
17 |                     OUTPUT_QUIET)
18 |     endif()
19 | 
20 |     if(NOT IS_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/yaml-test-suite)
21 |     execute_process(COMMAND ${GIT_EXECUTABLE} clone --depth=1 --branch data-2022-01-17 https://github.com/yaml/yaml-test-suite
22 |                     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
23 |                     OUTPUT_QUIET)
24 |     endif()
25 | endif()
26 | 


--------------------------------------------------------------------------------
/include/trieste/debug.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <version>
 4 | #ifdef __cpp_lib_source_location
 5 | #  include <source_location>
 6 | #endif
 7 | 
 8 | namespace trieste
 9 | {
10 |   namespace detail
11 |   {
12 |     /*
13 |      * Type used to track where a particular value was constructed in the
14 |      * source. This aids in debugging and error reporting.
15 |      */
16 | #ifdef __cpp_lib_source_location
17 |     struct DebugLocation
18 |     {
19 |       std::source_location location;
20 | 
21 |       DebugLocation(std::source_location l = std::source_location::current())
22 |       : location(l)
23 |       {}
24 |     };
25 | #else
26 |     struct DebugLocation
27 |     {
28 |       // Dummy value as we got a UBSan Misaligned Use without this.
29 |       // I am assuming that the empty struct was trigger some kind of compiler bug. (MJP)
30 |       size_t dummy{0};
31 |       DebugLocation() {}
32 |     };
33 | #endif
34 | 
35 |     template<typename T>
36 |     struct Located
37 |     {
38 |       T value;
39 |       DebugLocation location;
40 | 
41 |       Located(T t, DebugLocation l = {})
42 |       : value(t), location(l)
43 |       {}
44 |     };
45 |   }
46 | }


--------------------------------------------------------------------------------
/parsers/test/json_checker.cc:
--------------------------------------------------------------------------------
 1 | #include "trieste/logging.h"
 2 | 
 3 | #include <CLI/CLI.hpp>
 4 | #include <trieste/checker.h>
 5 | #include <trieste/json.h>
 6 | 
 7 | using namespace trieste;
 8 | 
 9 | int main(int argc, char** argv)
10 | {
11 |   CLI::App app;
12 | 
13 |   app.set_help_all_flag("--help-all", "Expand all help");
14 | 
15 |   bool check_against_wf = false;
16 |   app.add_flag(
17 |     "-w", check_against_wf, "Check pattern against well-formedness rules");
18 | 
19 |   std::vector<std::string> ignored_tokens;
20 |   app.add_option(
21 |     "-i,--ignore_token",
22 |     ignored_tokens,
23 |     "Ignore this token when checking patterns against well-formedness rules.");
24 | 
25 |   try
26 |   {
27 |     app.parse(argc, argv);
28 |   }
29 |   catch (const CLI::ParseError& e)
30 |   {
31 |     return app.exit(e);
32 |   }
33 | 
34 |   Checker reader_checker =
35 |     Checker(json::reader()).check_against_wf(check_against_wf).ignored_tokens(ignored_tokens);
36 |   Checker writer_checker =
37 |     Checker(json::writer("checker")).check_against_wf(check_against_wf).ignored_tokens(ignored_tokens);
38 | 
39 |   return reader_checker.check() + writer_checker.check();
40 | }
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/parsers/json/internal.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "json.h"
 4 | 
 5 | namespace trieste
 6 | {
 7 |   namespace json
 8 |   {
 9 |     inline const auto Comma = TokenDef("json-comma");
10 |     inline const auto Colon = TokenDef("json-colon");
11 |     inline const auto Lhs = TokenDef("json-lhs");
12 |     inline const auto Rhs = TokenDef("json-rhs");
13 | 
14 |     Parse parser();
15 | 
16 |     inline const auto wf_parse_tokens =
17 |       Object | Array | String | Number | True | False | Null | Comma | Colon;
18 | 
19 |     // clang-format off
20 |     inline const auto wf_parse =
21 |       (Top <<= File)
22 |       | (File <<= ~Group)
23 |       | (Value <<= Group)
24 |       | (Array <<= Group)
25 |       | (Object <<= Group)
26 |       | (Member <<= Group)
27 |       | (Group <<= wf_parse_tokens++)
28 |       ;
29 |     // clang-format on
30 | 
31 |     template<typename T>
32 |     inline auto err(Node node, const T& msg)
33 |     {
34 |       return Error << (ErrorMsg ^ msg) << (ErrorAst << node->clone());
35 |     }
36 | 
37 |     template<typename T>
38 |     inline Node err(const NodeRange& r, const T& msg)
39 |     {
40 |       return Error << (ErrorMsg ^ msg) << (ErrorAst << r);
41 |     }
42 | 
43 |     template<typename T>
44 |     inline auto err(const T& msg)
45 |     {
46 |       return Error << (ErrorMsg ^ msg);
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/cmake/compare.cmake:
--------------------------------------------------------------------------------
 1 | # This is used to compare to files using CMake.
 2 | # It improves on the default behaviour to print the
 3 | # the files if there is a difference.
 4 | 
 5 | execute_process(
 6 |     COMMAND ${CMAKE_COMMAND} -E compare_files --ignore-eol ${original_file} ${new_file}
 7 |     RESULT_VARIABLE status
 8 | )
 9 | 
10 | if (${status} EQUAL 1)
11 |     message ("Compare ${original_file} with ${new_file}")
12 |     if (diff_tool STREQUAL "")
13 |         file(READ ${original_file} original_text)
14 |         file(READ ${new_file} new_text)
15 |         message("--Original File-----------------------------------------------------------------")
16 |         if (NOT original_text STREQUAL "")
17 |             message("${original_text}")
18 |         endif()
19 |         message("--------------------------------------------------------------------------------")
20 |         message("  ")
21 |         message("--New File----------------------------------------------------------------------")
22 |         if (NOT new_text STREQUAL "")
23 |             message(${new_text})
24 |         endif()
25 |         message("--------------------------------------------------------------------------------")
26 |     else ()
27 |         execute_process(
28 |             COMMAND ${diff_tool} ${original_file} ${new_file}
29 |         )
30 |     endif ()
31 |     message(FATAL_ERROR "Files differ!")
32 | endif ()


--------------------------------------------------------------------------------
/parsers/json/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set( SOURCES
 2 | parse.cc
 3 | reader.cc
 4 | writer.cc
 5 | json.cc
 6 | )
 7 | 
 8 | if( BUILD_SHARED_LIBS )
 9 |   add_library(json SHARED ${SOURCES})
10 | else()
11 |   add_library(json STATIC ${SOURCES})
12 | endif()
13 | enable_warnings(json)
14 | 
15 | add_library(trieste::json ALIAS json)
16 | 
17 | target_link_libraries(json
18 |   PUBLIC
19 |     trieste::trieste
20 | )
21 | 
22 | if(MSVC)
23 |   target_compile_options(json PUBLIC "/Zc:__cplusplus")
24 |   target_compile_definitions(json PUBLIC "_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING")
25 | endif()
26 | 
27 | if(TRIESTE_USE_CXX17)
28 |   target_compile_features(json PUBLIC cxx_std_17)
29 |   target_compile_definitions(json PUBLIC TRIESTE_USE_CXX17)
30 | else()
31 |   target_compile_features(json PUBLIC cxx_std_20)
32 | endif()
33 | 
34 | target_include_directories( json
35 |   PUBLIC
36 |     $<INSTALL_INTERFACE:include>
37 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
38 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
39 |   PRIVATE
40 |     ${CMAKE_CURRENT_SOURCE_DIR}
41 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include/trieste>
42 | )
43 | 
44 | install(TARGETS json
45 |   EXPORT ${PROJECT_NAME}_Targets
46 |   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
47 |   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
48 |   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
49 | )
50 | 
51 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../include/trieste/json.h DESTINATION include/trieste)
52 | 


--------------------------------------------------------------------------------
/samples/infix/internal.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "infix.h"
 4 | 
 5 | namespace infix
 6 | {
 7 |   inline const auto Paren = TokenDef("infix-paren");
 8 |   inline const auto Equals = TokenDef("infix-equals");
 9 |   inline const auto Print = TokenDef("infix-print");
10 | 
11 |   inline const auto wf_literal = Int | Float;
12 |   inline const auto wf_parse_tokens = wf_literal | String | Paren | Print |
13 |     Ident | Add | Subtract | Divide | Multiply;
14 | 
15 |   // clang-format off
16 |   inline const auto wf_parser =
17 |       (Top <<= File)
18 |     | (File <<= (Group | Equals)++)
19 |     | (Paren <<= Group++)
20 |     | (Equals <<= Group++)
21 |     | (Group <<= wf_parse_tokens++)
22 |     ;
23 |   // clang-format on
24 | 
25 |   inline const auto Number = T(Int, Float);
26 | 
27 |   // The Error token allows the creation of a special node which we can
28 |   // use to replace the erroneous node. This will then exempt that subtree
29 |   // from the well-formedness check. This is the mechanism by which we can
30 |   // use the testing system to discover edges cases, i.e. the testing will
31 |   // not proceed to the next pass until all of the invalid subtrees have
32 |   // been marked as `Error`.
33 |   inline auto err(const NodeRange& r, const std::string& msg)
34 |   {
35 |     return Error << (ErrorMsg ^ msg) << (ErrorAst << r);
36 |   }
37 | 
38 |   inline auto err(Node node, const std::string& msg)
39 |   {
40 |     return Error << (ErrorMsg ^ msg) << (ErrorAst << node);
41 |   }
42 | 
43 |   Parse parser();
44 | }
45 | 


--------------------------------------------------------------------------------
/notes/lookup.md:
--------------------------------------------------------------------------------
 1 | # Lookup
 2 | 
 3 | Given a Node that names something, find the Node that defines it.
 4 | 
 5 | - Names may be scoped.
 6 |   - After looking up, we may need to look down.
 7 | - Names may be overloaded.
 8 |   - We may need to return multiple results.
 9 |   - This can come from the same scope or from different scopes.
10 |   - It can even come from lookup on an algrebraic type.
11 | - Names may be imported from other scopes.
12 |   - And those scopes may need additional information, such as type arguments.
13 | 
14 | Lookup may want to return more information than just the definition node. For Verona, we want to return a map of type parameter bindings as well.
15 | 
16 | ```f#
17 | // TODO: `use`, multidef, shadowing, lookdown
18 | // multidef => not shadowing
19 | // not multidef => shadowing
20 | 
21 | // one unidef: done
22 | // unidef with anything else: {}
23 | // all multidef: union with parent
24 | let LU scope name =
25 |   let defs =
26 |     { def |
27 |       def ∈ scope.map name, !def.type.defbeforeuse or (def < name) }
28 |   if ∃def ∈ defs: !def.type.multidef then 
29 |     if |defs| = 1 then
30 |       defs
31 |     else
32 |       {}
33 |   else
34 |     defs ∪ (LU scope.parent name)
35 | 
36 | let LD map elems =
37 |   match elems
38 |   | none -> map
39 |   | elem, elems ->
40 |     // TODO:
41 |     let defs = { def | def ∈ map.def, def.type.exported }
42 | 
43 | let Resolve scope elems =
44 |   match elems
45 |   | none -> {}
46 |   | elem, elems ->
47 |     let maps = LU scope elem.name elem.args
48 |     { LD map elems | map ∈ maps }
49 | ```
50 | 


--------------------------------------------------------------------------------
/samples/shrubbery/shrubbery.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <trieste/trieste.h>
 4 | 
 5 | namespace shrubbery
 6 | {
 7 |   using namespace trieste;
 8 | 
 9 |   inline const auto Paren   = TokenDef("shrub-paren");     // ()
10 |   inline const auto Bracket = TokenDef("shrub-bracket");   // []
11 |   inline const auto Brace   = TokenDef("shrub-brace");     // {}
12 |   inline const auto Block   = TokenDef("shrub-block");     // :
13 |   inline const auto Comma   = TokenDef("shrub-comma");     // ,
14 |   inline const auto Semi    = TokenDef("shrub-semicolon"); // ;
15 |   inline const auto Alt     = TokenDef("shrub-alt");       // |
16 |   inline const auto Op      = TokenDef("shrub-op", flag::print); // Operators
17 |   inline const auto Atom    = TokenDef("shrub-atom", flag::print); // Everything else
18 | 
19 |   // Used for final structure
20 |   inline const auto Terms = TokenDef("shrub-terms");
21 |   inline const auto None  = TokenDef("shrub-none");
22 | 
23 |   // Used as identifiers
24 |   inline const auto Id = TokenDef("shrub-id");
25 |   inline const auto Lhs = TokenDef("shrub-lhs");
26 |   inline const auto Rhs = TokenDef("shrub-rhs");
27 | 
28 |   // clang-format off
29 |   inline const auto wf =
30 |     (Top <<= File)
31 |     | (File <<= Group++)
32 |     | (Paren <<= Group++)
33 |     | (Bracket <<= Group++)
34 |     | (Brace <<= Group++)
35 |     | (Block <<= Group++)
36 |     | (Alt <<= Block++[1])
37 |     | (Group <<= Terms * (Block >>= Block | None) * (Alt >>= Alt | None))
38 |     | (Terms <<= (Paren | Bracket | Brace | Op | Atom)++)
39 |     ;
40 |   // clang-format on
41 | 
42 |   Parse parser();
43 |   Reader reader();
44 | }
45 | 


--------------------------------------------------------------------------------
/parsers/test/json_fuzzer.cc:
--------------------------------------------------------------------------------
 1 | #include <CLI/CLI.hpp>
 2 | #include <trieste/fuzzer.h>
 3 | #include <trieste/json.h>
 4 | 
 5 | using namespace trieste;
 6 | 
 7 | int main(int argc, char** argv)
 8 | {
 9 |   CLI::App app;
10 | 
11 |   app.set_help_all_flag("--help-all", "Expand all help");
12 | 
13 |   std::string transform;
14 |   app.add_option("transform", transform, "Transform to test")
15 |     ->check(CLI::IsMember({"reader", "writer"}))
16 |     ->required(true);
17 | 
18 |   uint32_t seed = std::random_device()();
19 |   app.add_option("-s,--seed", seed, "Random seed");
20 | 
21 |   uint32_t count = 100;
22 |   app.add_option("-c,--count", count, "Number of seed to test");
23 | 
24 |   bool failfast = false;
25 |   app.add_flag("-f,--failfast", failfast, "Stop on first failure");
26 | 
27 |   std::string log_level;
28 |   app
29 |     .add_option(
30 |       "-l,--log_level",
31 |       log_level,
32 |       "Set Log Level to one of "
33 |       "Trace, Debug, Info, "
34 |       "Warning, Output, Error, "
35 |       "None")
36 |     ->check(logging::set_log_level_from_string);
37 | 
38 |   try
39 |   {
40 |     app.parse(argc, argv);
41 |   }
42 |   catch (const CLI::ParseError& e)
43 |   {
44 |     return app.exit(e);
45 |   }
46 | 
47 |   logging::Output() << "Testing x" << count << ", seed: " << seed << std::endl;
48 | 
49 |   Fuzzer fuzzer;
50 |   Reader reader = json::reader();
51 |   if (transform == "reader")
52 |   {
53 |     fuzzer = Fuzzer(reader);
54 |   }
55 |   else
56 |   {
57 |     fuzzer = Fuzzer(json::writer("fuzzer"), reader.parser().generators());
58 |   }
59 | 
60 |   return fuzzer.start_seed(seed).seed_count(count).failfast(failfast).test();
61 | }
62 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Most contributions require you to agree to a
 4 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
 5 | the rights to use your contribution. For details, visit https://cla.microsoft.com.
 6 | 
 7 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
 8 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
 9 | provided by the bot. You will only need to do this once across all repos using our CLA.
10 | 
11 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
12 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
13 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
14 | 
15 | # Bugs and patches
16 | 
17 | We use Github [issues](https://github.com/microsoft/Trieste/issues) to track bugs and features. If you found a bug or want to propose a new feature, please file an issue on our project.
18 | 
19 | We use Github [pull requests](https://github.com/microsoft/Trieste/pulls) for contributions in code, documents, tests, etc.
20 | 
21 | Every PR must pass the CI builds (which include CLA checks and formatting) and the appropriate set of tests on Windows, Linux (clang & gcc) and Mac, on x86_64. PRs cannot be merged if any of the tests fail.
22 | 
23 | You are not, however, required to run all these tests on your own, before submitting the PR. Running on at least one of those above and passing should be fine. We can work out the remaining issues during the review process.
24 | 


--------------------------------------------------------------------------------
/notes/arguments.md:
--------------------------------------------------------------------------------
 1 | # Type Based Dispatch
 2 | 
 3 | TODO: don't know how to do this yet
 4 | 
 5 | # Partial Application
 6 | 
 7 | Given the initial N₀ arguments and a sequence of applications, each of which adds Nᵢ arguments, the number of arguments at a given application j is argsⱼ = N₀ + ∑ᵢ₌₀₋ⱼ Nᵢ. Over j ∈ i..0, select the lowest arity function that takes at least argsⱼ arguments. Absorb j applications into a single call. Any remaining applications are applied to the result of the call.
 8 | 
 9 | # Default Arguments
10 | 
11 | A default argument at the end effectively creates a pair of functions. The implicit function is one arity shorter, and calls the longer one with the default argument.
12 | 
13 | The caller can't select the default argument, because it may depend on virtual dispatch.
14 | 
15 | ```ts
16 | f(a, b, c = 3)
17 | {
18 |   ...
19 | }
20 | 
21 | let x = f(a, b) // default argument, not partial application
22 | ```
23 | 
24 | # Default Field Values
25 | 
26 | If there's no `create` method, implicitly generate one with all the fields as parameters, with each field taking the default value.
27 | 
28 | # Named Arguments
29 | 
30 | Possible format for a named argument:
31 | `(assign (expr dot ident) (expr...))`
32 | 
33 | ```ts
34 | let x = f(.b = 2, .a = 1)
35 | ```
36 | 
37 | # Function Types
38 | 
39 | A function may be:
40 | - `lin`: the function may only be called once.
41 | - `in`: the function may mutate its free variables.
42 | - `const`: the function only closes over `const` variables.
43 | - `?`: the function is sendable, but may mutate its free variables.
44 | 
45 | An `iso` function could be `Region[A->B] & lin`.
46 | 
47 | ```ts
48 | f: A...->B
49 | 
50 | type Fun[A..., B, K] = { apply(Self & K, A...): B } & K
51 | 
52 | ```
53 | 


--------------------------------------------------------------------------------
/parsers/yaml/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set( SOURCES
 2 | parse.cc
 3 | event_writer.cc
 4 | writer.cc
 5 | reader.cc
 6 | to_json.cc
 7 | yaml.cc
 8 | )
 9 | 
10 | if( BUILD_SHARED_LIBS )
11 |   add_library(yaml SHARED ${SOURCES})
12 | else()
13 |   add_library(yaml STATIC ${SOURCES})
14 | endif()
15 | enable_warnings(yaml)
16 | 
17 | add_library(trieste::yaml ALIAS yaml)
18 | 
19 | target_link_libraries(yaml
20 |   PUBLIC
21 |     trieste::trieste
22 |     trieste::json
23 | )
24 | 
25 | if(MSVC)
26 |   target_compile_options(yaml PUBLIC "/Zc:__cplusplus")
27 |   target_compile_definitions(yaml PUBLIC "_SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING")
28 | endif()
29 | 
30 | if(YAML_USE_CXX17)
31 |   target_compile_features(yaml PUBLIC cxx_std_17)
32 |   target_compile_definitions(yaml PUBLIC YAML_USE_CXX17)
33 | else()
34 |   target_compile_features(yaml PUBLIC cxx_std_20)
35 | endif()
36 | 
37 | target_include_directories( yaml
38 |   PUBLIC
39 |     $<INSTALL_INTERFACE:include>
40 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
41 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
42 |   PRIVATE
43 |     ${CMAKE_CURRENT_SOURCE_DIR}
44 |     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include/trieste>
45 | )
46 | 
47 | install(TARGETS yaml
48 |   EXPORT ${PROJECT_NAME}_Targets
49 |   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
50 |   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
51 |   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
52 | )
53 | 
54 | install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../include/trieste/yaml.h DESTINATION include/trieste)
55 | 
56 | if( TRIESTE_BUILD_PARSER_TOOLS )
57 |   add_executable(yamlc yamlc.cc)
58 |   enable_warnings(yamlc)
59 | 
60 |   target_link_libraries(yamlc
61 |     PRIVATE
62 |       yaml
63 |   )
64 | 
65 |   install(TARGETS yamlc RUNTIME DESTINATION parsers)
66 | endif()
67 | 


--------------------------------------------------------------------------------
/parsers/test/yaml_checker.cc:
--------------------------------------------------------------------------------
 1 | #include "trieste/logging.h"
 2 | 
 3 | #include <CLI/CLI.hpp>
 4 | #include <trieste/checker.h>
 5 | #include <trieste/yaml.h>
 6 | 
 7 | using namespace trieste;
 8 | 
 9 | int main(int argc, char** argv)
10 | {
11 |   CLI::App app;
12 | 
13 |   app.set_help_all_flag("--help-all", "Expand all help");
14 | 
15 |   bool check_against_wf = false;
16 |   app.add_flag(
17 |     "-w", check_against_wf, "Check pattern against well-formedness rules");
18 | 
19 |   std::vector<std::string> ignored_tokens;
20 |   app.add_option(
21 |     "-i,--ignore_token",
22 |     ignored_tokens,
23 |     "Ignore this token when checking patterns against well-formedness rules.");
24 | 
25 |   try
26 |   {
27 |     app.parse(argc, argv);
28 |   }
29 |   catch (const CLI::ParseError& e)
30 |   {
31 |     return app.exit(e);
32 |   }
33 | 
34 |   logging::Output() << "Checking patterns" << std::endl;
35 | 
36 |   Checker reader_checker = Checker(yaml::reader())
37 |                              .check_against_wf(check_against_wf)
38 |                              .ignored_tokens(ignored_tokens);
39 |   Checker writer_checker = Checker(yaml::writer("checker"))
40 |                              .check_against_wf(check_against_wf)
41 |                              .ignored_tokens(ignored_tokens);
42 |   Checker event_writer_checker = Checker(yaml::event_writer("checker"))
43 |                                    .check_against_wf(check_against_wf)
44 |                                    .ignored_tokens(ignored_tokens);
45 |   Checker to_json_checker = Checker(yaml::to_json())
46 |                               .check_against_wf(check_against_wf)
47 |                               .ignored_tokens(ignored_tokens);
48 | 
49 |   return reader_checker.check() + writer_checker.check() +
50 |     event_writer_checker.check() + to_json_checker.check();
51 | }
52 | 


--------------------------------------------------------------------------------
/samples/shrubbery/wf.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "shrubbery.h"
 4 | 
 5 | namespace shrubbery
 6 | {
 7 |   using namespace wf::ops;
 8 | 
 9 |   inline const auto wf_term = Paren | Bracket | Brace |
10 |                               Block | Alt | Op | Atom;
11 | 
12 |   inline const auto wf_grouping_construct = Comma | Semi | Group;
13 | 
14 |   // clang-format off
15 | 
16 |   // After parsing, commas and semicolons can appear virtually everywhere
17 |   inline const auto wf_parser =
18 |       (Top <<= File)
19 |     | (File <<= wf_grouping_construct++)
20 |     | (Paren <<= wf_grouping_construct++)
21 |     | (Bracket <<= wf_grouping_construct++)
22 |     | (Brace <<= wf_grouping_construct++)
23 |     | (Block <<= wf_grouping_construct++)
24 |     | (Alt <<= wf_grouping_construct++)
25 |     | (Comma <<= (Semi | Group)++)
26 |     | (Semi <<= (Comma | Group)++)
27 |     | (Group <<= wf_term++)
28 |     ;
29 | 
30 |   // The first pass ensures that commas and semi-colons are in the right places
31 |   inline const auto wf_check_parser =
32 |       wf_parser
33 |     | (File <<= (Group | Semi)++)
34 |     | (Paren <<= (Group | Comma)++)
35 |     | (Bracket <<= (Group | Comma)++)
36 |     | (Brace <<= (Group | Comma)++)
37 |     | (Block <<= (Group | Semi)++)
38 |     | (Alt <<= (Group | Semi)++[1])
39 |     | (Comma <<= Group++[1])
40 |     | (Semi <<= Group++)
41 |     ;
42 | 
43 |   // Merge alternatives into one node with a sequence of blocks
44 |   inline const auto wf_alternatives =
45 |     wf_check_parser
46 |     | (Alt <<= Block++[1])
47 |     ;
48 | 
49 |   // Get rid of commas and semi-colons
50 |   inline const auto wf_no_semis_or_commas =
51 |     wf_alternatives
52 |     | (File <<= Group++)
53 |     | (Paren <<= Group++)
54 |     | (Bracket <<= Group++)
55 |     | (Brace <<= Group++)
56 |     | (Block <<= Group++)
57 |     ;
58 | 
59 |   // clang-format on
60 | }
61 | 


--------------------------------------------------------------------------------
/parsers/test/yaml_fuzzer.cc:
--------------------------------------------------------------------------------
 1 | #include "trieste/logging.h"
 2 | 
 3 | #include <CLI/CLI.hpp>
 4 | #include <trieste/fuzzer.h>
 5 | #include <trieste/yaml.h>
 6 | 
 7 | using namespace trieste;
 8 | 
 9 | int main(int argc, char** argv)
10 | {
11 |   CLI::App app;
12 | 
13 |   app.set_help_all_flag("--help-all", "Expand all help");
14 | 
15 |   std::string transform;
16 |   app.add_option("transform", transform, "Transform to test")
17 |     ->check(CLI::IsMember({"reader", "writer", "event_writer", "to_json"}))
18 |     ->required(true);
19 | 
20 |   uint32_t seed = std::random_device()();
21 |   app.add_option("-s,--seed", seed, "Random seed");
22 | 
23 |   uint32_t count = 100;
24 |   app.add_option("-c,--count", count, "Number of seed to test");
25 | 
26 |   bool failfast = false;
27 |   app.add_flag("-f,--failfast", failfast, "Stop on first failure");
28 | 
29 |   std::string log_level;
30 |   app
31 |     .add_option(
32 |       "-l,--log_level",
33 |       log_level,
34 |       "Set Log Level to one of "
35 |       "Trace, Debug, Info, "
36 |       "Warning, Output, Error, "
37 |       "None")
38 |     ->check(logging::set_log_level_from_string);
39 | 
40 |   try
41 |   {
42 |     app.parse(argc, argv);
43 |   }
44 |   catch (const CLI::ParseError& e)
45 |   {
46 |     return app.exit(e);
47 |   }
48 | 
49 |   logging::Output() << "Testing x" << count << ", seed: " << seed << std::endl;
50 | 
51 |   Fuzzer fuzzer;
52 |   Reader reader = yaml::reader();
53 |   if (transform == "reader")
54 |   {
55 |     fuzzer = Fuzzer(reader);
56 |   }
57 |   else if (transform == "writer")
58 |   {
59 |     fuzzer = Fuzzer(yaml::writer("fuzzer"), reader.parser().generators());
60 |   }
61 |   else if (transform == "event_writer")
62 |   {
63 |     fuzzer = Fuzzer(yaml::event_writer("fuzzer"), reader.parser().generators());
64 |   }
65 |   else if (transform == "to_json")
66 |   {
67 |     fuzzer = Fuzzer(yaml::to_json(), reader.parser().generators());
68 |   }
69 | 
70 |   return fuzzer.start_seed(seed).seed_count(count).failfast(failfast).test();
71 | }
72 | 


--------------------------------------------------------------------------------
/samples/infix/infix.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <trieste/trieste.h>
 4 | 
 5 | namespace infix
 6 | {
 7 |   using namespace trieste;
 8 | 
 9 |   inline const auto Int = TokenDef("infix-int", flag::print);
10 |   inline const auto Float = TokenDef("infix-float", flag::print);
11 |   inline const auto String = TokenDef("infix-string", flag::print);
12 |   inline const auto Ident = TokenDef("infix-ident", flag::print);
13 | 
14 |   inline const auto Calculation =
15 |     TokenDef("infix-calculation", flag::symtab | flag::defbeforeuse);
16 |   inline const auto Expression = TokenDef("infix-expression");
17 |   inline const auto Assign =
18 |     TokenDef("infix-assign", flag::lookup | flag::shadowing);
19 |   inline const auto Output = TokenDef("infix-output");
20 |   inline const auto Ref = TokenDef("infix-ref");
21 | 
22 |   inline const auto Add = TokenDef("infix-add");
23 |   inline const auto Subtract = TokenDef("infix-subtract");
24 |   inline const auto Multiply = TokenDef("infix-multiply");
25 |   inline const auto Divide = TokenDef("infix-divide");
26 |   inline const auto Literal = TokenDef("infix-literal");
27 | 
28 |   inline const auto Id = TokenDef("infix-id");
29 |   inline const auto Op = TokenDef("infix-op");
30 |   inline const auto Lhs = TokenDef("infix-lhs");
31 |   inline const auto Rhs = TokenDef("infix-rhs");
32 | 
33 |   // clang-format off
34 |   const auto wf =
35 |     (Top <<= Calculation)
36 |     | (Calculation <<= (Assign | Output)++)
37 |     | (Assign <<= Ident * Expression)[Ident]
38 |     | (Output <<= String * Expression)
39 |     | (Expression <<= (Add | Subtract | Multiply | Divide | Ref | Float | Int))
40 |     | (Ref <<= Ident)
41 |     | (Add <<= Expression * Expression)
42 |     | (Subtract <<= Expression * Expression)
43 |     | (Multiply <<= Expression * Expression)
44 |     | (Divide <<= Expression * Expression)
45 |     ;
46 |   // clang-format off
47 | 
48 |   Reader reader();
49 |   Writer writer(const std::filesystem::path& path = "infix");
50 |   Writer postfix_writer(const std::filesystem::path& path = "postfix");
51 |   Rewriter calculate();
52 | }


--------------------------------------------------------------------------------
/CMakePresets.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": 3,
 3 |     "configurePresets": [
 4 |         {
 5 |             "name": "debug-clang",
 6 |             "displayName": "Debug Build using clang",
 7 |             "description": "Sets up a debug build that uses Clang++",
 8 |             "generator": "Ninja",
 9 |             "cacheVariables": {
10 |                 "CMAKE_BUILD_TYPE": "Debug",
11 |                 "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist",
12 |                 "CMAKE_CXX_COMPILER": "clang++",
13 |                 "TRIESTE_BUILD_SAMPLES": "ON",
14 |                 "TRIESTE_CLEAN_INSTALL": "ON"
15 |             }
16 |         },
17 |         {
18 |             "name": "debug",
19 |             "displayName": "Debug Build",
20 |             "description": "Sets up a debug build that uses the default compiler and generator",
21 |             "cacheVariables": {
22 |                 "CMAKE_BUILD_TYPE": "Debug",
23 |                 "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist",
24 |                 "TRIESTE_BUILD_SAMPLES": "ON",
25 |                 "TRIESTE_CLEAN_INSTALL": "ON"
26 |             }
27 |         },
28 |         {
29 |             "name": "release-clang",
30 |             "displayName": "Release Build using clang",
31 |             "description": "Sets up a release build that uses Clang++",
32 |             "generator": "Ninja",
33 |             "cacheVariables": {
34 |                 "CMAKE_BUILD_TYPE": "Release",
35 |                 "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist",
36 |                 "CMAKE_CXX_COMPILER": "clang++",
37 |                 "TRIESTE_BUILD_SAMPLES": "ON",
38 |                 "TRIESTE_CLEAN_INSTALL": "ON"
39 |             }
40 |         },
41 |         {
42 |             "name": "release",
43 |             "displayName": "Release Build",
44 |             "description": "Sets up a release build that uses the default compiler and generator",
45 |             "cacheVariables": {
46 |                 "CMAKE_BUILD_TYPE": "Release",
47 |                 "CMAKE_INSTALL_PREFIX": "${sourceDir}/build/dist",
48 |                 "TRIESTE_BUILD_SAMPLES": "ON",
49 |                 "TRIESTE_CLEAN_INSTALL": "ON"
50 |             }
51 |         }
52 |     ]
53 | }


--------------------------------------------------------------------------------
/samples/infix/infix.cc:
--------------------------------------------------------------------------------
 1 | #include "infix.h"
 2 | 
 3 | #include <CLI/CLI.hpp>
 4 | 
 5 | using namespace trieste;
 6 | 
 7 | int main(int argc, char** argv)
 8 | {
 9 |   CLI::App app;
10 | 
11 |   std::filesystem::path input_path;
12 |   app.add_option("input", input_path, "Path to the input file ")->required();
13 | 
14 |   std::filesystem::path output_path;
15 |   app.add_option("output", output_path, "Path to the output file ");
16 | 
17 |   auto modes = {"calculate", "infix", "postfix"};
18 |   std::string mode = "calculate";
19 |   app.add_option("-m,--mode", mode, "Output mode.")
20 |     ->transform(CLI::IsMember(modes));
21 | 
22 |   try
23 |   {
24 |     app.parse(argc, argv);
25 |   }
26 |   catch (const CLI::ParseError& e)
27 |   {
28 |     return app.exit(e);
29 |   }
30 | 
31 |   auto reader = infix::reader().file(input_path);
32 |   Destination dest =
33 |     output_path.empty() ? DestinationDef::console() : DestinationDef::dir(".");
34 |   if (output_path.empty())
35 |   {
36 |     output_path = mode;
37 |   }
38 | 
39 |   try
40 |   {
41 |     ProcessResult result;
42 |     if (mode == "calculate")
43 |     {
44 |       result = reader >> infix::calculate();
45 |       if (!result.ok)
46 |       {
47 |         logging::Error err;
48 |         result.print_errors(err);
49 |         return 1;
50 |       }
51 | 
52 |       Node calc = result.ast->front();
53 |       for (const Node& output : *calc)
54 |       {
55 |         auto str = output->front()->location().view();
56 |         auto val = output->back()->location().view();
57 |         std::cout << str << " " << val << std::endl;
58 |       }
59 | 
60 |       return 0;
61 |     }
62 |     if (mode == "infix")
63 |     {
64 |       result = reader >> infix::writer(output_path).destination(dest);
65 |     }
66 |     else if (mode == "postfix")
67 |     {
68 |       result = reader >> infix::postfix_writer(output_path).destination(dest);
69 |     }
70 | 
71 |     if (!result.ok)
72 |     {
73 |       logging::Error err;
74 |       result.print_errors(err);
75 |       return 1;
76 |     }
77 |   }
78 |   catch (const std::exception& e)
79 |   {
80 |     std::cerr << e.what() << std::endl;
81 |     return 1;
82 |   }
83 | 
84 |   return 0;
85 | }
86 | 


--------------------------------------------------------------------------------
/include/trieste/rewriter.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "passes.h"
 4 | 
 5 | namespace trieste
 6 | {
 7 |   class Rewriter
 8 |   {
 9 |   private:
10 |     std::string name_;
11 |     std::vector<Pass> passes_;
12 |     const wf::Wellformed* wf_;
13 |     bool debug_enabled_;
14 |     bool wf_check_enabled_;
15 |     std::filesystem::path debug_path_;
16 | 
17 |   public:
18 |     Rewriter(
19 |       const std::string& name,
20 |       const std::vector<Pass>& passes,
21 |       const wf::Wellformed& input_wf)
22 |     : name_(name),
23 |       passes_(passes),
24 |       wf_(&input_wf),
25 |       debug_enabled_(false),
26 |       wf_check_enabled_(true),
27 |       debug_path_(".")
28 |     {}
29 | 
30 |     ProcessResult rewrite(Node ast)
31 |     {
32 |       PassRange pass_range(passes_, *wf_, name_);
33 | 
34 |       logging::Info summary;
35 |       std::filesystem::path debug_path;
36 |       if (debug_enabled_)
37 |       {
38 |         debug_path = debug_path_;
39 |       }
40 | 
41 |       summary << "---------" << std::endl;
42 |       auto result = Process(pass_range)
43 |                       .set_check_well_formed(wf_check_enabled_)
44 |                       .set_default_pass_complete(summary, name_, debug_path)
45 |                       .run(ast);
46 |       summary << "---------" << std::endl;
47 |       return result;
48 |     }
49 | 
50 |     Rewriter& debug_enabled(bool value)
51 |     {
52 |       debug_enabled_ = value;
53 |       return *this;
54 |     }
55 | 
56 |     bool debug_enabled() const
57 |     {
58 |       return debug_enabled_;
59 |     }
60 | 
61 |     Rewriter& wf_check_enabled(bool value)
62 |     {
63 |       wf_check_enabled_ = value;
64 |       return *this;
65 |     }
66 | 
67 |     bool wf_check_enabled() const
68 |     {
69 |       return wf_check_enabled_;
70 |     }
71 | 
72 |     Rewriter& debug_path(const std::filesystem::path& path)
73 |     {
74 |       debug_path_ = path;
75 |       return *this;
76 |     }
77 | 
78 |     const std::filesystem::path& debug_path() const
79 |     {
80 |       return debug_path_;
81 |     }
82 | 
83 |     const wf::Wellformed& input_wf() const
84 |     {
85 |       return *wf_;
86 |     }
87 | 
88 |     const wf::Wellformed& output_wf() const
89 |     {
90 |       return passes_.back()->wf();
91 |     }
92 | 
93 |     const std::vector<Pass>& passes() const
94 |     {
95 |       return passes_;
96 |     }
97 |   };
98 | }
99 | 


--------------------------------------------------------------------------------
/include/trieste/xoroshiro.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <cstdlib>
 5 | 
 6 | namespace xoroshiro
 7 | {
 8 |   namespace detail
 9 |   {
10 |     template<typename STATE, typename RESULT, STATE A, STATE B, STATE C>
11 |     class XorOshiro
12 |     {
13 |     private:
14 |       static constexpr unsigned STATE_BITS = 8 * sizeof(STATE);
15 |       static constexpr unsigned RESULT_BITS = 8 * sizeof(RESULT);
16 | 
17 |       static_assert(
18 |         STATE_BITS >= RESULT_BITS,
19 |         "STATE must have at least as many bits as RESULT");
20 | 
21 |       STATE x;
22 |       STATE y;
23 | 
24 |       static inline STATE rotl(STATE x, STATE k)
25 |       {
26 |         return (x << k) | (x >> (STATE_BITS - k));
27 |       }
28 | 
29 |     public:
30 |       using result_type = RESULT;
31 | 
32 |       XorOshiro(STATE x_ = 5489, STATE y_ = 0) : x(x_), y(y_)
33 |       {
34 |         // If both zero, then this does not work
35 |         if (x_ == 0 && y_ == 0)
36 |           abort();
37 | 
38 |         (*this)();
39 |       }
40 | 
41 |       void set_state(STATE x_, STATE y_ = 0)
42 |       {
43 |         // If both zero, then this does not work
44 |         if (x_ == 0 && y_ == 0)
45 |           abort();
46 | 
47 |         x = x_;
48 |         y = y_;
49 |         (*this)();
50 |       }
51 | 
52 |       constexpr static result_type min()
53 |       {
54 |         return std::numeric_limits<result_type>::min();
55 |       }
56 | 
57 |       constexpr static result_type max()
58 |       {
59 |         return std::numeric_limits<result_type>::max();
60 |       }
61 | 
62 |       result_type operator()()
63 |       {
64 |         STATE r = x + y;
65 |         y ^= x;
66 |         x = rotl(x, A) ^ y ^ (y << B);
67 |         y = rotl(y, C);
68 |         // If both zero, then this does not work
69 |         if (x == 0 && y == 0)
70 |           abort();
71 |         return r >> (STATE_BITS - RESULT_BITS);
72 |       }
73 |     };
74 |   }
75 | 
76 |   using p128r64 = detail::XorOshiro<uint64_t, uint64_t, 55, 14, 36>;
77 |   using p128r32 = detail::XorOshiro<uint64_t, uint32_t, 55, 14, 36>;
78 |   using p64r32 = detail::XorOshiro<uint32_t, uint32_t, 27, 7, 20>;
79 |   using p64r16 = detail::XorOshiro<uint32_t, uint16_t, 27, 7, 20>;
80 |   using p32r16 = detail::XorOshiro<uint16_t, uint16_t, 13, 5, 10>;
81 |   using p32r8 = detail::XorOshiro<uint16_t, uint8_t, 13, 5, 10>;
82 |   using p16r8 = detail::XorOshiro<uint8_t, uint8_t, 4, 7, 3>;
83 | }
84 | 


--------------------------------------------------------------------------------
/samples/infix/parse.cc:
--------------------------------------------------------------------------------
 1 | #include "internal.h"
 2 | 
 3 | namespace infix
 4 | {
 5 |   const std::initializer_list<Token> terminators = {Equals};
 6 | 
 7 |   Parse parser()
 8 |   {
 9 |     Parse p(depth::file, wf_parser);
10 |     auto indent = std::make_shared<std::vector<size_t>>();
11 | 
12 |     p("start", // this indicates the 'mode' these rules are associated with
13 |       {
14 |         // Whitespace between tokens.
15 |         "[[:blank:]]+" >> [](auto&) {}, // no-op
16 | 
17 |         // Equals.
18 |         "=" >> [](auto& m) { m.seq(Equals); },
19 | 
20 |         // Terminator.
21 |         ";[\r\n]*" >> [](auto& m) { m.term(terminators); },
22 | 
23 |         // Parens.
24 |         R"((\()[[:blank:]]*)" >>
25 |           [indent](auto& m) {
26 |             // we push a Paren node. Subsequent nodes will be added
27 |             // as its children.
28 |             m.push(Paren, 1);
29 |           },
30 | 
31 |         R"(\))" >>
32 |           [indent](auto& m) {
33 |             // terminate the current group
34 |             m.term(terminators);
35 |             // pop back up out of the Paren
36 |             m.pop(Paren);
37 |           },
38 | 
39 |         // Float.
40 |         R"([[:digit:]]+\.[[:digit:]]+(?:e[+-]?[[:digit:]]+)?\b)" >>
41 |           [](auto& m) { m.add(Float); },
42 | 
43 |         // String.
44 |         R"("[^"]*")" >> [](auto& m) { m.add(String); },
45 | 
46 |         // Int.
47 |         R"([[:digit:]]+\b)" >> [](auto& m) { m.add(Int); },
48 | 
49 |         // Line comment.
50 |         "//[^\r\n]*" >> [](auto&) {}, // another no-op
51 | 
52 |         // Print.
53 |         R"(print\b)" >> [](auto& m) { m.add(Print); },
54 | 
55 |         // Identifier.
56 |         R"([_[:alpha:]][_[:alnum:]]*\b)" >> [](auto& m) { m.add(Ident); },
57 | 
58 |         // Add ('+' is a reserved RegEx character)
59 |         R"(\+)" >> [](auto& m) { m.add(Add); },
60 | 
61 |         // Subtract
62 |         "-" >> [](auto& m) { m.add(Subtract); },
63 | 
64 |         // Multiply ('*' is a reserved RegEx character)
65 |         R"(\*)" >> [](auto& m) { m.add(Multiply); },
66 | 
67 |         // Divide
68 |         "/" >> [](auto& m) { m.add(Divide); },
69 |       });
70 | 
71 |     p.gen({
72 |       Int >> [](auto& rnd) { return std::to_string(rnd() % 100); },
73 |       Float >>
74 |         [](auto& rnd) {
75 |           std::uniform_real_distribution<> dist(-10.0, 10.0);
76 |           return std::to_string(dist(rnd));
77 |         },
78 |     });
79 | 
80 |     return p;
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Project Trieste
 2 | 
 3 | Project Trieste is a term rewriting system designed for rapidly prototyping programming languages.
 4 | Trieste provides three C++ DSLs to enable the rapid prototyping:
 5 | 
 6 | * Parsing - Enables generation of an untyped abstract syntax tree (AST) from one or many input files.
 7 | * Rewriting - Enables the restructuring and analysis of the AST. It can be used to simplify, elaborate to alternative representations, and lower to backend IRs or bytecodes.
 8 | * Well-formedness - Trieste provides a DSL for checking that the current AST conforms to a specification. 
 9 | 
10 | Using the well-formedness definitions Trieste can rapidly harden a language by automatically checking conformance to the specification.
11 | Additionally, Trieste uses the well-formedness definitions to generate test cases to fuzz each rewriting pass conforms with its specification.
12 | 
13 | ## Getting Started
14 | 
15 | If you want to dive right into understanding how to use Trieste, take
16 | a look at the [`infix` tutorial language](./samples/infix/README.md),
17 | which will walk you through implementing a simple calculator language
18 | in Trieste.
19 | 
20 | ## Using Trieste
21 | 
22 | Trieste is a header-only C++20 library. To get started, you'll need to define your own `trieste::Driver`, and run it from `main`:
23 | 
24 | ```c++
25 | #include <trieste/driver.h>
26 | 
27 | int main(int argc, char** argv)
28 | {
29 |   // Define your driver...
30 |   trieste::Driver driver(...);
31 |   return driver.run(argc, argv);
32 | }
33 | ```
34 | 
35 | ## Building the Samples
36 | 
37 | Here's an example of how to build the `infix` sample and run the self-tests. Other build systems and compilers may work as well.
38 | 
39 | ```sh
40 | git clone https://github.com/microsoft/trieste
41 | cd trieste
42 | mkdir build
43 | cd build
44 | cmake -G Ninja .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=clang++-14
45 | ninja install
46 | ./dist/infix/infix test
47 | ```
48 | 
49 | ## Using Trieste in Your Project
50 | 
51 | You can use Trieste via FetchContent by including the following lines
52 | in your CMake:
53 | 
54 | ``` cmake
55 | FetchContent_Declare(
56 |   trieste
57 |   GIT_REPOSITORY https://github.com/microsoft/Trieste
58 |   GIT_TAG        a2a7fada4ab5250a4f8d1313b749ad336202841b
59 | )
60 | 
61 | FetchContent_MakeAvailable(trieste)
62 | ```
63 | 
64 | And then adding it as a target link library, e.g.
65 | 
66 | ``` cmake
67 | target_link_libraries(verona
68 |   Threads::Threads
69 |   CLI11::CLI11
70 |   trieste::trieste
71 |   )
72 | ```
73 | 
74 | ## Contributing
75 | 
76 | If you are interested in contributing to Trieste, please see our [contributing document](CONTRIBUTING.md).
77 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/include/trieste/trieste.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "parse.h"
  4 | #include "passes.h"
  5 | #include "reader.h"
  6 | #include "rewriter.h"
  7 | #include "writer.h"
  8 | 
  9 | namespace CLI
 10 | {
 11 |   class App;
 12 | }
 13 | 
 14 | namespace trieste
 15 | {
 16 |   struct Options
 17 |   {
 18 |     virtual void configure(CLI::App&) {}
 19 |   };
 20 | 
 21 |   inline ProcessResult operator>>(Reader& reader, Rewriter& rewriter)
 22 |   {
 23 |     ProcessResult result = reader.read();
 24 |     if (result.ok)
 25 |     {
 26 |       return rewriter.rewrite(result.ast);
 27 |     }
 28 | 
 29 |     return result;
 30 |   }
 31 | 
 32 |   inline ProcessResult operator>>(Reader& reader, Rewriter&& rewriter)
 33 |   {
 34 |     ProcessResult result = reader.read();
 35 |     if (result.ok)
 36 |     {
 37 |       return rewriter.rewrite(result.ast);
 38 |     }
 39 | 
 40 |     return result;
 41 |   }
 42 | 
 43 |   inline ProcessResult operator>>(Reader& reader, Writer& writer)
 44 |   {
 45 |     ProcessResult result = reader.read();
 46 |     if (result.ok)
 47 |     {
 48 |       return writer.write(result.ast);
 49 |     }
 50 | 
 51 |     return result;
 52 |   }
 53 | 
 54 |   inline ProcessResult operator>>(Reader& reader, Writer&& writer)
 55 |   {
 56 |     ProcessResult result = reader.read();
 57 |     if (result.ok)
 58 |     {
 59 |       return writer.write(result.ast);
 60 |     }
 61 | 
 62 |     return result;
 63 |   }
 64 | 
 65 |   inline ProcessResult
 66 |   operator>>(const ProcessResult& result, Rewriter& rewriter)
 67 |   {
 68 |     if (result.ok)
 69 |     {
 70 |       return rewriter.rewrite(result.ast);
 71 |     }
 72 | 
 73 |     return result;
 74 |   }
 75 | 
 76 |   inline ProcessResult
 77 |   operator>>(const ProcessResult& result, Rewriter&& rewriter)
 78 |   {
 79 |     if (result.ok)
 80 |     {
 81 |       return rewriter.rewrite(result.ast);
 82 |     }
 83 | 
 84 |     return result;
 85 |   }
 86 | 
 87 |   inline ProcessResult operator>>(const ProcessResult& result, Writer& writer)
 88 |   {
 89 |     if (result.ok)
 90 |     {
 91 |       return writer.write(result.ast);
 92 |     }
 93 | 
 94 |     return result;
 95 |   }
 96 | 
 97 |   inline ProcessResult operator>>(const ProcessResult& result, Writer&& writer)
 98 |   {
 99 |     if (result.ok)
100 |     {
101 |       return writer.write(result.ast);
102 |     }
103 | 
104 |     return result;
105 |   }
106 | 
107 |   inline ProcessResult operator>>(const Node& ast, Rewriter& rewriter)
108 |   {
109 |     return rewriter.rewrite(ast->clone());
110 |   }
111 | 
112 |   inline ProcessResult operator>>(const Node& ast, Rewriter&& rewriter)
113 |   {
114 |     return rewriter.rewrite(ast->clone());
115 |   }
116 | 
117 |   inline ProcessResult operator>>(const Node& ast, Writer& writer)
118 |   {
119 |     return writer.write(ast->clone());
120 |   }
121 | 
122 |   inline ProcessResult operator>>(const Node& ast, Writer&& writer)
123 |   {
124 |     return writer.write(ast->clone());
125 |   }
126 | 
127 |   inline Reader operator>>=(Reader&& reader, Rewriter&& rewriter)
128 |   {
129 |     auto reader_passes = reader.passes();
130 |     for (auto pass : rewriter.passes())
131 |     {
132 |       reader_passes.push_back(pass);
133 |     }
134 |     return Reader(
135 |       reader.language_name(),
136 |       reader_passes,
137 |       reader.parser());
138 |   }
139 | 
140 | } // namespace trieste
141 | 


--------------------------------------------------------------------------------
/parsers/json/parse.cc:
--------------------------------------------------------------------------------
  1 | #include "internal.h"
  2 | 
  3 | namespace trieste
  4 | {
  5 |   namespace json
  6 |   {
  7 |     Parse parser()
  8 |     {
  9 |       Parse p(depth::file, wf_parse);
 10 |       std::shared_ptr<std::vector<char>> stack =
 11 |         std::make_shared<std::vector<char>>();
 12 | 
 13 |       p("start",
 14 |         {"[ \r\n\t]+" >> [](auto&) { return; },
 15 | 
 16 |          ":" >> [](auto& m) { m.add(Colon); },
 17 | 
 18 |          "," >> [](auto& m) { m.add(Comma); },
 19 | 
 20 |          "{" >>
 21 |            [stack](auto& m) {
 22 |              m.push(Object);
 23 |              m.push(Group);
 24 |              stack->push_back('{');
 25 |            },
 26 | 
 27 |          "}" >>
 28 |            [stack](auto& m) {
 29 |              if (stack->empty() || stack->back() != '{')
 30 |              {
 31 |                m.error("Mismatched braces");
 32 |                return;
 33 |              }
 34 |              stack->pop_back();
 35 |              m.term();
 36 |              m.pop(Object);
 37 |            },
 38 | 
 39 |          R"(\[)" >>
 40 |            [stack](auto& m) {
 41 |              m.push(Array);
 42 |              m.push(Group);
 43 |              stack->push_back('[');
 44 |            },
 45 | 
 46 |          "]" >>
 47 |            [stack](auto& m) {
 48 |              if (stack->empty() || stack->back() != '[')
 49 |              {
 50 |                m.error("Mismatched brackets");
 51 |                return;
 52 |              }
 53 |              stack->pop_back();
 54 |              m.term();
 55 |              m.pop(Array);
 56 |            },
 57 | 
 58 |          "true" >> [](auto& m) { m.add(True); },
 59 | 
 60 |          "false" >> [](auto& m) { m.add(False); },
 61 | 
 62 |          "null" >> [](auto& m) { m.add(Null); },
 63 | 
 64 |          // RE for a JSON number:
 65 |          // -? : optional minus sign
 66 |          // (?:0|[1-9][0-9]*) : either a single 0, or 1-9 followed by any digits
 67 |          // (?:\.[0-9]+)? : optionally, a single period followed by one or more
 68 |          // digits (fraction)
 69 |          // (?:[eE][-+]?[0-9]+)? : optionally, an exponent. This can start with
 70 |          // e or E,
 71 |          //                        have +/-/nothing, and then 1 or more digits
 72 |          R"(-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][-+]?[0-9]+)?)" >>
 73 |            [](auto& m) { m.add(Number); },
 74 | 
 75 |          // RE for a JSON string:
 76 |          // " : a double quote followed by either:
 77 |          // 1. [^"\\\x00-\x1F]+ : one or more characters that are not a double
 78 |          // quote, backslash,
 79 |          //                       or a control character from 00-1f
 80 |          // 2. \\["\\\/bfnrt] : a backslash followed by one of the characters ",
 81 |          // \, /, b, f, n, r, or t
 82 |          // 3. \\u[[:xdigit:]]{4} : a backslash followed by u, followed by 4 hex
 83 |          // digits zero or more times and then " : a double quote
 84 |          R"("(?:[^"\\\x00-\x1F]+|\\["\\\/bfnrt]|\\u[[:xdigit:]]{4})*")" >>
 85 |            [](auto& m) { m.add(String); },
 86 | 
 87 |          "." >> [](auto& m) { m.error("Invalid character"); }});
 88 | 
 89 |       p.done([stack](auto& m) {
 90 |         if (!stack->empty())
 91 |         {
 92 |           m.error("Mismatched braces or brackets");
 93 |         }
 94 |         stack->clear();
 95 |       });
 96 | 
 97 |       return p;
 98 |     }
 99 |   }
100 | }
101 | 


--------------------------------------------------------------------------------
/parsers/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ## JSON
 2 | 
 3 | add_executable(json_fuzzer json_fuzzer.cc)
 4 | enable_warnings(json_fuzzer)
 5 | 
 6 | target_link_libraries(json_fuzzer
 7 |   PRIVATE
 8 |   trieste::json)
 9 | 
10 | add_executable(json_pattern_checker json_checker.cc)
11 | enable_warnings(json_pattern_checker)
12 | 
13 | target_link_libraries(json_pattern_checker
14 |   PRIVATE
15 |   trieste::json)
16 | 
17 | add_executable(json_test json_test.cc)
18 | enable_warnings(json_test)
19 | 
20 | target_link_libraries(json_test
21 |   PRIVATE
22 |   trieste::json)
23 | 
24 | add_executable(json_patch_test json_patch_test.cc)
25 | enable_warnings(json_patch_test)
26 | 
27 | target_link_libraries(json_patch_test
28 |   PRIVATE
29 |   trieste::json)
30 | 
31 | set(JSON_TEST_SUITE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../JSONTestSuite/test_parsing)
32 | set(JSON_PATCH_TEST_SUITE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../json-patch-tests/)
33 | 
34 | add_test(NAME json_fuzz_reader COMMAND json_fuzzer reader -f WORKING_DIRECTORY $<TARGET_FILE_DIR:json_fuzzer>)
35 | add_test(NAME json_fuzz_writer COMMAND json_fuzzer writer -f WORKING_DIRECTORY $<TARGET_FILE_DIR:json_fuzzer>)
36 | add_test(NAME json_check_patterns COMMAND json_pattern_checker -w -i json-value WORKING_DIRECTORY $<TARGET_FILE_DIR:json_pattern_checker>)
37 | add_test(NAME json_test COMMAND json_test -wf ${JSON_TEST_SUITE_ROOT} WORKING_DIRECTORY $<TARGET_FILE_DIR:json_test>)
38 | add_test(NAME json_test_manual COMMAND json_test -wf -n manual WORKING_DIRECTORY $<TARGET_FILE_DIR:json_test>)
39 | add_test(NAME json_patch_tests COMMAND json_patch_test -f ${JSON_PATCH_TEST_SUITE_ROOT}/tests.json WORKING_DIRECTORY $<TARGET_FILE_DIR:json_patch_test>)
40 | add_test(NAME json_patch_spec_tests COMMAND json_patch_test -f ${JSON_PATCH_TEST_SUITE_ROOT}/spec_tests.json WORKING_DIRECTORY $<TARGET_FILE_DIR:json_patch_test>)
41 | 
42 | install(TARGETS json_fuzzer json_test json_patch_test RUNTIME DESTINATION parsers)
43 | 
44 | ## YAML
45 | 
46 | add_executable(yaml_fuzzer yaml_fuzzer.cc)
47 | enable_warnings(yaml_fuzzer)
48 | 
49 | target_link_libraries(yaml_fuzzer
50 |   PRIVATE
51 |   trieste::yaml)
52 | 
53 | add_executable(yaml_pattern_checker yaml_checker.cc)
54 | enable_warnings(yaml_pattern_checker)
55 | 
56 | target_link_libraries(yaml_pattern_checker
57 |   PRIVATE
58 |   trieste::yaml)
59 | 
60 | add_executable(yaml_test yaml_test.cc)
61 | enable_warnings(yaml_test)
62 | 
63 | target_link_libraries(yaml_test
64 |   PRIVATE
65 |   trieste::yaml)
66 | 
67 | set(YAML_TEST_SUITE_ROOT ${CMAKE_CURRENT_BINARY_DIR}/../yaml-test-suite)
68 | 
69 | add_test(NAME yaml_fuzz_reader COMMAND yaml_fuzzer reader -f WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_fuzzer>)
70 | add_test(NAME yaml_fuzz_writer COMMAND yaml_fuzzer writer -f WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_fuzzer>)
71 | add_test(NAME yaml_fuzz_event_writer COMMAND yaml_fuzzer event_writer -f WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_fuzzer>)
72 | add_test(NAME yaml_fuzz_to_json COMMAND yaml_fuzzer to_json -f WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_fuzzer>)
73 | add_test(NAME yaml_check_patterns COMMAND yaml_pattern_checker WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_pattern_checker>)
74 | add_test(NAME yaml_test COMMAND yaml_test -wf ${YAML_TEST_SUITE_ROOT} WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_test>)
75 | add_test(NAME yaml_test_crlf COMMAND yaml_test --crlf -wf ${YAML_TEST_SUITE_ROOT} WORKING_DIRECTORY $<TARGET_FILE_DIR:yaml_test>)
76 | 
77 | install(TARGETS yaml_fuzzer yaml_test RUNTIME DESTINATION parsers)
78 | 


--------------------------------------------------------------------------------
/parsers/yaml/internal.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "trieste/yaml.h"
 4 | 
 5 | #include <set>
 6 | #include <string>
 7 | #include <string_view>
 8 | 
 9 | namespace trieste
10 | {
11 |   namespace yaml
12 |   {
13 |     Parse parser();
14 | 
15 |     enum class Chomp
16 |     {
17 |       Clip,
18 |       Strip,
19 |       Keep,
20 |     };
21 | 
22 |     std::string
23 |     escape_chars(const std::string_view& str, const std::set<char>& to_escape);
24 |     std::string unescape_url_chars(const std::string_view& input);
25 |     std::string replace_all(
26 |       const std::string_view& v,
27 |       const std::string_view& find,
28 |       const std::string_view& replace);
29 | 
30 |     inline const auto Whitespace = TokenDef("yaml-whitespace", flag::print);
31 |     inline const auto Hyphen = TokenDef("yaml-hyphen");
32 |     inline const auto Colon = TokenDef("yaml-colon");
33 |     inline const auto Comma = TokenDef("yaml-comma");
34 |     inline const auto NewLine = TokenDef("yaml-newline");
35 |     inline const auto Comment = TokenDef("yaml-comment", flag::print);
36 |     inline const auto VerbatimTag = TokenDef("yaml-verbatimtag", flag::print);
37 |     inline const auto ShorthandTag = TokenDef("yaml-shorthandtag", flag::print);
38 |     inline const auto Tag = TokenDef("yaml-tag");
39 |     inline const auto IndentIndicator =
40 |       TokenDef("yaml-indentation-indicator", flag::print);
41 |     inline const auto FlowSequenceStart =
42 |       TokenDef("yaml-flowseqstart", flag::print);
43 |     inline const auto FlowSequenceEnd =
44 |       TokenDef("yaml-flowseqend", flag::print);
45 |     inline const auto FlowMappingStart =
46 |       TokenDef("yaml-flowmapstart", flag::print);
47 |     inline const auto FlowMappingEnd = TokenDef("yaml-flowmapend", flag::print);
48 |     inline const auto MaybeDirective =
49 |       TokenDef("yaml-maybedirective", flag::print);
50 |     inline const auto Block = TokenDef("yaml-block");
51 |     inline auto WhitespaceLine = TokenDef("yaml-whitespace-line", flag::print);
52 | 
53 |     inline const auto wf_parse_tokens = Stream | Document | Hyphen | NewLine |
54 |       Whitespace | Value | Int | Float | Hex | True | False | Null | Colon |
55 |       TagDirective | Anchor | Alias | SingleQuote | DoubleQuote |
56 |       VersionDirective | UnknownDirective | DocumentStart | DocumentEnd | Tag |
57 |       TagPrefix | ShorthandTag | VerbatimTag | TagPrefix | TagHandle | Literal |
58 |       Folded | IndentIndicator | ChompIndicator | Key | FlowMapping |
59 |       FlowMappingStart | FlowMappingEnd | FlowSequence | FlowSequenceStart |
60 |       FlowSequenceEnd | Comma | Comment | MaybeDirective;
61 | 
62 |     // clang-format off
63 |   inline const auto wf_parse =
64 |     (Top <<= File)
65 |     | (File <<= Group)
66 |     | (Stream <<= Group++)
67 |     | (Document <<= Group)
68 |     | (Tag <<= Group)
69 |     | (FlowMapping <<= Group++)
70 |     | (FlowSequence <<= Group++)
71 |     | (TagDirective <<= Group)
72 |     | (Group <<= wf_parse_tokens++[1])
73 |     ;
74 |     // clang-format on
75 | 
76 |     inline auto err(Node node, const std::string& msg)
77 |     {
78 |       return Error << (ErrorMsg ^ msg) << (ErrorAst << node->clone());
79 |     }
80 | 
81 |     inline Node err(const NodeRange& r, const std::string& msg)
82 |     {
83 |       return Error << (ErrorMsg ^ msg) << (ErrorAst << r);
84 |     }
85 | 
86 |     inline auto err(const std::string& msg)
87 |     {
88 |       return Error << (ErrorMsg ^ msg);
89 |     }
90 |   }
91 | }
92 | 


--------------------------------------------------------------------------------
/include/trieste/defaultmap.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <array>
  4 | #include <cstddef>
  5 | 
  6 | #include "token.h"
  7 | 
  8 | namespace trieste::detail
  9 | {
 10 |   /**
 11 |    * Maps tokens to values, with a modifiable default value.
 12 |    *
 13 |    * This is used by matching system.  If a rule applies generally, it is added
 14 |    * to all tokens, and if it applies to a specific token, it is added to that
 15 |    * token only.
 16 |    */
 17 |   template<typename T>
 18 |   class DefaultMap
 19 |   {
 20 |     // The default value for this map. This is returned when a specific value
 21 |     // has has not been set for the looked up token.
 22 |     T def{};
 23 | 
 24 |     // The map of specific values for tokens.
 25 |     std::array<T*, TokenDef::DEFAULT_MAP_TABLE_SIZE> map;
 26 | 
 27 |     // If this is true, then the map is empty, and the default value has not
 28 |     // been modified.
 29 |     bool empty_{true};
 30 | 
 31 |     bool is_index_default(size_t index) const
 32 |     {
 33 |       return map[index] == &def;
 34 |     }
 35 | 
 36 |     size_t token_index(const Token& t) const
 37 |     {
 38 |       return t.default_map_hash();
 39 |     }
 40 | 
 41 |   public:
 42 |     DefaultMap()
 43 |     {
 44 |       map.fill(&def);
 45 |     }
 46 | 
 47 |     DefaultMap(const DefaultMap& dm) : def(dm.def), empty_(dm.empty_)
 48 |     {
 49 |       for (size_t index = 0; index < map.size(); index++)
 50 |       {
 51 |         if (dm.is_index_default(index))
 52 |           map[index] = &def;
 53 |         else
 54 |           map[index] = new T(*dm.map[index]);
 55 |       }
 56 |     }
 57 | 
 58 |     /**
 59 |      *  Modify all values in the map, including the default value.
 60 |      *
 61 |      *  This is used for adding rules that do not specify an explicit start
 62 |      * token, or an explicit parent, so they need to apply generally.
 63 |      */
 64 |     template<typename F>
 65 |     void modify_all(F f)
 66 |     {
 67 |       empty_ = false;
 68 |       for (size_t i = 0; i < map.size(); i++)
 69 |         if (!is_index_default(i))
 70 |           f(*map[i]);
 71 |       f(def);
 72 |     }
 73 | 
 74 |     /**
 75 |      * Get a mutable reference to the value for a token.  If this does not have
 76 |      * a current value, first fill it with the current default value.
 77 |      */
 78 |     T& modify(const Token& t)
 79 |     {
 80 |       auto i = token_index(t);
 81 |       empty_ = false;
 82 |       // Use existing default set of rules.
 83 |       if (is_index_default(i))
 84 |         map[i] = new T(def);
 85 |       return *map[i];
 86 |     }
 87 | 
 88 |     /**
 89 |      * Get the value for a token. If this token has no specific value, return
 90 |      * the default value.
 91 |      */
 92 |     T& get(const Token& t)
 93 |     {
 94 |       return *map[token_index(t)];
 95 |     }
 96 | 
 97 |     /**
 98 |      * Clear all the values in the map, and the default value.
 99 |      */
100 |     void clear()
101 |     {
102 |       empty_ = true;
103 |       for (size_t i = 0; i < map.size(); i++)
104 |       {
105 |         if (!is_index_default(i))
106 |         {
107 |           delete map[i];
108 |           map[i] = &def;
109 |         }
110 |       }
111 |       def.clear();
112 |     }
113 | 
114 |     ~DefaultMap()
115 |     {
116 |       clear();
117 |     }
118 | 
119 |     /**
120 |      * Returns true if modify has not been called since the last clear.
121 |      */
122 |     bool empty() const
123 |     {
124 |       return empty_;
125 |     }
126 |   };
127 | }
128 | 


--------------------------------------------------------------------------------
/.github/workflows/buildtest.yml:
--------------------------------------------------------------------------------
 1 | name: CMake
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | env:
10 |   global-cmake-flags: -DTRIESTE_ENABLE_TESTING=1
11 | 
12 | jobs:
13 |   build-test:
14 |     strategy:
15 |       matrix:
16 |         platform: [ "ubuntu-22.04", "macos-latest", "windows-latest" ]
17 |         build-type: [ "Release", "Debug" ]
18 |         # Note: cmake-options is missing here on purpose to let it be overridden by includes
19 |         standard: [ "", "-DTRIESTE_USE_CXX17=ON" ]
20 |         compiler: [ "", "clang" ]
21 |         variant: [""]
22 |         
23 |         include:
24 |         # ensures ubuntu-22.04 clang uses Ninja (modifies the matrix entry)
25 |         - platform: "ubuntu-22.04"
26 |           compiler: "clang"
27 |           cmake-options: "-DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DTRIESTE_BUILD_PARSER_TESTS=1"
28 |           generator: "-G Ninja"
29 |           dependencies: "sudo apt install ninja-build clang-15"
30 | 
31 |         - platform: "windows-latest"
32 |           variant:  "build-parser-tests"
33 |           build-type: "Release"
34 |           cmake-options: "-DTRIESTE_BUILD_PARSER_TESTS=1"
35 |         
36 |         - platform: "macos-latest"
37 |           variant:  "build-parser-tests"
38 |           build-type: "Release"
39 |           cmake-options: "-DTRIESTE_BUILD_PARSER_TESTS=1"
40 | 
41 |         - platform: "ubuntu-22.04"
42 |           variant: "asan"
43 |           build-type: "Release"
44 |           cmake-options: "-DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DTRIESTE_SANITIZE=address -DTRIESTE_BUILD_PARSER_TESTS=1"
45 |           dependencies: "sudo apt install ninja-build clang-15"
46 | 
47 |         - platform: "macos-latest"
48 |           variant: "asan"
49 |           build-type: "Release"
50 |           cmake-options: "-DTRIESTE_SANITIZE=address -DTRIESTE_BUILD_PARSER_TESTS=1"
51 | 
52 |         - platform: "ubuntu-22.04"
53 |           variant: "ubsan"
54 |           build-type: "Release"
55 |           cmake-options: "-DCMAKE_CXX_COMPILER=clang++-15 -DCMAKE_C_COMPILER=clang-15 -DTRIESTE_SANITIZE=undefined -DTRIESTE_BUILD_PARSER_TESTS=1"
56 |           dependencies: "sudo apt install ninja-build clang-15"
57 | 
58 |         exclude:
59 |         # Mac is already using clang.
60 |         - platform: "macos-latest"
61 |           compiler: "clang"
62 |         # Windows is only using MSVC.
63 |         - platform: "windows-latest"
64 |           compiler: "clang"
65 | 
66 |       # Don't abort runners if a single one fails
67 |       fail-fast: false
68 | 
69 |     runs-on: ${{matrix.platform}}
70 | 
71 |     name: ${{matrix.platform}} ${{matrix.build-type}} ${{matrix.standard}} ${{matrix.compiler}} ${{matrix.variant}}
72 | 
73 |     steps:
74 |     - uses: actions/checkout@v3
75 |     
76 |     - name: Install build dependencies
77 |       run: ${{matrix.dependencies}}
78 |       
79 |     - name: Configure CMake
80 |       run: cmake -B ${{github.workspace}}/build ${{env.global-cmake-flags}} -DCMAKE_BUILD_TYPE=${{matrix.build-type}} ${{matrix.generator}} ${{matrix.standard}} ${{matrix.cmake-options}}
81 | 
82 |     - name: Build
83 |       run: cmake --build ${{github.workspace}}/build --config ${{matrix.build-type}}
84 | 
85 |     - name: Test
86 |       working-directory: ${{github.workspace}}/build
87 |       run: ctest -C ${{matrix.build-type}} --output-on-failure --timeout 400 --interactive-debug-mode 0 
88 |   
89 |   all-checks:
90 |     # Join of all build-test jobs
91 |     needs: [build-test]
92 |     runs-on: ubuntu-22.04
93 |     steps:
94 |     - name: Check all jobs
95 |       run: echo "All jobs passed"
96 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
  1 | ---
  2 | Language:        Cpp
  3 | # BasedOnStyle:  LLVM
  4 | AccessModifierOffset: -2
  5 | AlignAfterOpenBracket: AlwaysBreak
  6 | AlignConsecutiveAssignments: false
  7 | AlignConsecutiveDeclarations: false
  8 | AlignEscapedNewlines: DontAlign
  9 | AlignOperands:   false
 10 | AlignTrailingComments: false
 11 | AllowAllParametersOfDeclarationOnNextLine: true
 12 | AllowShortBlocksOnASingleLine: false
 13 | AllowShortCaseLabelsOnASingleLine: false
 14 | AllowShortFunctionsOnASingleLine: Empty
 15 | AllowShortIfStatementsOnASingleLine: false
 16 | AllowShortLoopsOnASingleLine: false
 17 | AlwaysBreakAfterDefinitionReturnType: None
 18 | AlwaysBreakAfterReturnType: None
 19 | AlwaysBreakBeforeMultilineStrings: true
 20 | AlwaysBreakTemplateDeclarations: true
 21 | BinPackArguments: false
 22 | BinPackParameters: false
 23 | BraceWrapping:
 24 |   AfterCaseLabel:  true
 25 |   AfterClass:      true
 26 |   AfterControlStatement: true
 27 |   AfterEnum:       true
 28 |   AfterFunction:   true
 29 |   AfterNamespace:  true
 30 |   AfterObjCDeclaration: true
 31 |   AfterStruct:     true
 32 |   AfterUnion:      true
 33 |   AfterExternBlock: true
 34 |   BeforeCatch:     true
 35 |   BeforeElse:      true
 36 |   IndentBraces:    false
 37 |   SplitEmptyFunction: false
 38 |   SplitEmptyRecord: false
 39 |   SplitEmptyNamespace: false
 40 | BreakBeforeBinaryOperators: None
 41 | BreakBeforeBraces: Custom
 42 | BreakBeforeInheritanceComma: false
 43 | BreakBeforeTernaryOperators: false
 44 | BreakConstructorInitializersBeforeComma: false
 45 | BreakConstructorInitializers: BeforeColon
 46 | BreakAfterJavaFieldAnnotations: false
 47 | BreakStringLiterals: true
 48 | ColumnLimit:     80
 49 | CommentPragmas:  '^ IWYU pragma:'
 50 | CompactNamespaces: false
 51 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
 52 | ConstructorInitializerIndentWidth: 0
 53 | ContinuationIndentWidth: 2
 54 | Cpp11BracedListStyle: true
 55 | DerivePointerAlignment: false
 56 | DisableFormat:   false
 57 | ExperimentalAutoDetectBinPacking: false
 58 | FixNamespaceComments: false
 59 | ForEachMacros:
 60 |   - Q_FOREACH
 61 |   - BOOST_FOREACH
 62 | IncludeBlocks:   Regroup
 63 | IncludeCategories:
 64 |   - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
 65 |     Priority:        2
 66 |   - Regex:           '^(<|"(gtest|gmock|isl|json)/)'
 67 |     Priority:        3
 68 |   - Regex:           '.*'
 69 |     Priority:        1
 70 | IncludeIsMainRegex: '(Test)?$'
 71 | IndentCaseLabels: true
 72 | IndentPPDirectives: AfterHash
 73 | IndentWidth:     2
 74 | IndentWrappedFunctionNames: false
 75 | InsertNewlineAtEOF: true
 76 | JavaScriptQuotes: Leave
 77 | JavaScriptWrapImports: true
 78 | KeepEmptyLinesAtTheStartOfBlocks: false
 79 | MacroBlockBegin: ''
 80 | MacroBlockEnd:   ''
 81 | MaxEmptyLinesToKeep: 1
 82 | NamespaceIndentation: All
 83 | ObjCBlockIndentWidth: 2
 84 | ObjCSpaceAfterProperty: false
 85 | ObjCSpaceBeforeProtocolList: true
 86 | PenaltyBreakAssignment: 2
 87 | PenaltyBreakBeforeFirstCallParameter: 19
 88 | PenaltyBreakComment: 300
 89 | PenaltyBreakFirstLessLess: 120
 90 | PenaltyBreakString: 1000
 91 | PenaltyExcessCharacter: 1000000
 92 | PenaltyReturnTypeOnItsOwnLine: 60
 93 | PointerAlignment: Left
 94 | ReflowComments:  true
 95 | SortIncludes:    true
 96 | SortUsingDeclarations: true
 97 | SpaceAfterCStyleCast: false
 98 | SpaceAfterTemplateKeyword: false
 99 | SpaceBeforeAssignmentOperators: true
100 | SpaceBeforeParens: ControlStatements
101 | SpaceInEmptyParentheses: false
102 | SpacesBeforeTrailingComments: 1
103 | SpacesInAngles:  false
104 | SpacesInContainerLiterals: false
105 | SpacesInCStyleCastParentheses: false
106 | SpacesInParentheses: false
107 | SpacesInSquareBrackets: false
108 | Standard:        Cpp11
109 | TabWidth:        2
110 | UseTab:          Never
111 | ...
112 | 
113 | 


--------------------------------------------------------------------------------
/parsers/README.md:
--------------------------------------------------------------------------------
 1 | # Parsers
 2 | 
 3 | In addition to providing the Trieste library, we have also provided reference parsers for JSON and YAML as practical examples. You (the language implementer) can use these language implementions as templates and guides for how to implement your own language toolchain using Trieste. In this document we will highlight the key features we believe a language implementation should have. You can learn more about implementing helper classes in the [`infix` tutorial](../samples/infix/README.md).
 4 | 
 5 | ## WF Definition
 6 | 
 7 | Each of the language implementations exposes various things in their respective namespaces. For example, the JSON implementation exposes this WF definition at `trieste::json::wf`:
 8 | 
 9 | ```c++
10 | inline const auto Object = TokenDef("json-object");
11 | inline const auto Array = TokenDef("json-array");
12 | inline const auto String = TokenDef("json-string", flag::print);
13 | inline const auto Number = TokenDef("json-number", flag::print);
14 | inline const auto True = TokenDef("json-true");
15 | inline const auto False = TokenDef("json-false");
16 | inline const auto Null = TokenDef("json-null");
17 | inline const auto Member = TokenDef("json-member");
18 | 
19 | inline const auto wf_value_tokens =
20 |     Object | Array | String | Number | True | False | Null;
21 | 
22 | // clang-format off
23 | inline const auto wf =
24 |     (Top <<= wf_value_tokens++[1])
25 |     | (Object <<= Member++)
26 |     | (Member <<= String * (Value >>= wf_value_tokens))
27 |     | (Array <<= wf_value_tokens++)
28 |     ;
29 | // clang-format on
30 | ```
31 | 
32 | This is the well-formedness definition which corresponds to the AST of a successfully parsed JSON document. There are two best practices here to keep in mind. The first is the C++ namespace. As a rule, it is a good idea for your language implementation to provide its own namespace, in which the tokens, WF definitions, and helper constructs will be exposed to your users. The second is the token prefixing, *i.e.*:
33 | 
34 | ```c++
35 | inline const auto Object = TokenDef("json-object");
36 | ```
37 | 
38 | Tokens in Trieste must have unique names, and so using a prefix like `json-` ensures that the tokens used do not collide with tokens defined by other language implementations which may be at use in the project.
39 | 
40 | ## Reader
41 | 
42 | Every language implementation should expose a `Reader` helper in its namespace. For example, here is the one for JSON:
43 | 
44 | ```c++
45 | Reader reader(bool allow_multiple = false);
46 | ```
47 | 
48 | Any parsing settings should be passed to this method. For example, here we see a flag saying whether the parser allows there to be multiple JSON values at the top level of a document (a very common variant of the language). As a best practice, these should always be provided with a default value (if possible) so that your users can create a `Reader` object as simply as possible.
49 | 
50 | ## Writer
51 | 
52 | Whether your language implementation exposes a `Writer` depends entirely on the language. In the case of data formats like JSON and YAML, it makes a lot of sense and as such we have included them in our implementations. Naming them like `json::writer()` or `yaml::event_writer()`, that is with `writer` in the name and a prefix indicating if it is not what the "default" writer would be (for example, `yaml::event_writer()` for YAML event files) is considered best practice.
53 | 
54 | # Rewriter
55 | 
56 | One of the unique affordances given by Trieste as a library is that the workflow to translate from one language to another is the same as everything else: multi-pass AST rewriting. To provide an example of this we expose `yaml::to_json()`, which returns a `Rewriter` that converts a YAML AST to a JSON AST. Best practice for exposing these helpers is as show here, that is to say `<namespace>::to_<other namespace>`.
57 | 


--------------------------------------------------------------------------------
/parsers/yaml/yamlc.cc:
--------------------------------------------------------------------------------
  1 | #include "trieste/json.h"
  2 | #include "trieste/trieste.h"
  3 | #include "trieste/yaml.h"
  4 | 
  5 | #include <CLI/CLI.hpp>
  6 | 
  7 | using namespace trieste;
  8 | 
  9 | int main(int argc, char** argv)
 10 | {
 11 |   CLI::App app;
 12 | 
 13 |   std::filesystem::path input_path;
 14 |   app.add_option("input", input_path, "Path to the input file ")->required();
 15 | 
 16 |   std::filesystem::path output_path;
 17 |   app.add_option("output", output_path, "Path to the output file");
 18 | 
 19 |   std::filesystem::path debug_path;
 20 |   app.add_option(
 21 |     "-a,--ast",
 22 |     debug_path,
 23 |     "Output the AST (debugging for the reader/rewriter/writer workflows)");
 24 | 
 25 |   bool wf_checks{false};
 26 |   app.add_flag("-w,--wf", wf_checks, "Enable well-formedness checks (slow)");
 27 | 
 28 |   bool prettyprint{false};
 29 |   app.add_flag(
 30 |     "--prettyprint", prettyprint, "Pretty print the output (for JSON)");
 31 | 
 32 |   bool sort_keys{false};
 33 |   app.add_flag(
 34 |     "--sort-keys", sort_keys, "Sort object keys in the output (for JSON)");
 35 | 
 36 |   auto modes = {"event", "json", "yaml"};
 37 |   std::string mode;
 38 |   app.add_option("-m,--mode", mode, "Output mode.")
 39 |     ->transform(CLI::IsMember(modes));
 40 | 
 41 |   try
 42 |   {
 43 |     app.parse(argc, argv);
 44 |   }
 45 |   catch (const CLI::ParseError& e)
 46 |   {
 47 |     return app.exit(e);
 48 |   }
 49 | 
 50 |   if (mode.empty())
 51 |   {
 52 |     if (output_path.extension() == ".event")
 53 |     {
 54 |       mode = "event";
 55 |     }
 56 |     else if (output_path.extension() == ".json")
 57 |     {
 58 |       mode = "json";
 59 |     }
 60 |     else if (output_path.extension() == ".yaml")
 61 |     {
 62 |       mode = "yaml";
 63 |     }
 64 |     else
 65 |     {
 66 |       std::cerr << "Output mode not specified and could not be inferred from "
 67 |                    "the output file extension."
 68 |                 << std::endl;
 69 |       return 1;
 70 |     }
 71 |   }
 72 | 
 73 |   trieste::Reader reader = yaml::reader()
 74 |                              .file(input_path)
 75 |                              .debug_enabled(!debug_path.empty())
 76 |                              .debug_path(debug_path / "inyaml")
 77 |                              .wf_check_enabled(wf_checks);
 78 |   Destination dest = output_path.empty() ?
 79 |     DestinationDef::console() :
 80 |     DestinationDef::dir(output_path.parent_path());
 81 |   if (output_path.empty())
 82 |   {
 83 |     output_path = mode;
 84 |   }
 85 | 
 86 |   ProcessResult result;
 87 |   if (mode == "event")
 88 |   {
 89 |     result = reader >> yaml::event_writer(output_path)
 90 |                          .destination(dest)
 91 |                          .debug_enabled(!debug_path.empty())
 92 |                          .debug_path(debug_path / "event")
 93 |                          .wf_check_enabled(wf_checks);
 94 |     ;
 95 |   }
 96 |   else if (mode == "json")
 97 |   {
 98 |     result = reader >> yaml::to_json()
 99 |                          .debug_enabled(!debug_path.empty())
100 |                          .debug_path(debug_path / "json")
101 |                          .wf_check_enabled(wf_checks) >>
102 |       json::writer(output_path, prettyprint, sort_keys)
103 |         .destination(dest)
104 |         .debug_enabled(!debug_path.empty())
105 |         .debug_path(debug_path)
106 |         .wf_check_enabled(wf_checks);
107 |     ;
108 |   }
109 |   else
110 |   {
111 |     result = reader >> yaml::writer(output_path.filename().string())
112 |                          .destination(dest)
113 |                          .debug_enabled(!debug_path.empty())
114 |                          .debug_path(debug_path / "outyaml")
115 |                          .wf_check_enabled(wf_checks);
116 |     ;
117 |   }
118 | 
119 |   if (!result.ok)
120 |   {
121 |     logging::Error err;
122 |     result.print_errors(err);
123 |     return 1;
124 |   }
125 | 
126 |   return 0;
127 | }
128 | 


--------------------------------------------------------------------------------
/notes/von.md:
--------------------------------------------------------------------------------
 1 | # Verona Object Notation
 2 | 
 3 | Design goals, in order:
 4 | - Efficient zero-copy random access.
 5 | - Typed.
 6 | - Arbitrary graph.
 7 | - Efficient to encode.
 8 | - Possible to read some values with incomplete data (streaming friendly).
 9 | - Bijective human readable format.
10 | 
11 | Non-goals:
12 | - Updateable or versioning. A consumer may understand a different type table (or type table digest) to indicate a new version, but there is no expectation that an unaware consumer can understand a new version.
13 | - Compression. This can be applied during storage or transport, but it isn't part of the format.
14 | 
15 | ## Types
16 | 
17 | A type for a file is a 64-bit count of the number of types, followed by an entry for each type. This is followed by a 64-bit index into the type table for the root object type.
18 | 
19 | A `name` in a type is a 64-bit length followed by a number of bytes equal to the length. A `name` is a valid UTF-8 string. These are used only for human readable accessors.
20 | 
21 | A type is described as follows:
22 | 
23 | 00000000: void
24 | 00000001: bit
25 | 00000010: byte
26 | 00000011: fixed size vector, followed by a type and 64-bit length
27 | 00000100: variable size vector, followed by a type
28 | 00000101: variant, followed by a 64-bit option count and a number of types equal to the option count
29 | 00000110: record, followed by a 64-bit field count and a number of pairs of `type, name` equal to the field count, where the `name` must not be repeated
30 | 00000111: pointer, followed by a 64-bit index into the type table
31 | 00001000: type index, followed by a 64-bit index into the type table
32 | 001xxxxx: floating point, with a 5 bit size class
33 | 010xxxxx: unsigned integer, with a 5 bit size class
34 | 011xxxxx: integer, with a 5 bit size class
35 | 
36 | ## File Format
37 | 
38 | The file begins with a 1-bit indicator whether it is self-describing or if the consumer is expected to understand the type.
39 | 
40 | Next is a 63-bit file length. All offsets must be less than this length. Offsets are relative to the start of the file, and are encoded differently depending on the size of the file.
41 | 
42 | If the file length is less than 2^16, then all offsets are 16-bit. If the file length is less than 2^32, then all offsets are 32-bit. Otherwise, all offsets are 64-bit. This unsigned integer size will be called a size_t in the rest of this document.
43 | 
44 | If the file is self-describing, next is the type table. Otherwise, next is a SHA-256 digest of the type table. The consumer is expected to be able to map this digest to the type table.
45 | 
46 | The root object follows. This must be a valid object of the root type.
47 | 
48 | ## Values
49 | 
50 | Void is zero length.
51 | 
52 | A bit is a single byte with value 0 or 1.
53 | 
54 | Bytes are encoded directly.
55 | 
56 | Fixed size vectors are encoded as a series of values of the correct type. A bit vector is packed into the minimal byte count.
57 | 
58 | Variable size vectors are encoded as a size_t offset into the file. At that location in the file, there is a size_t length followed by a series of values of the correct type.
59 | 
60 | Variants are encoded as a size_t option index followed by a value of the correct type. This is padded with 0 bits to the size of the largest option. It may be more efficient to encode a variant as a pointer to a variant.
61 | 
62 | Records are encoded as a series of values of the correct type.
63 | 
64 | Floating point numbers are encoded as IEEE 754-2019 bit patterns. The size of the bit pattern is taken from the type.
65 | 
66 | Integers and unsigned integers are directly encoded. The type gives the size of the encoding.
67 | 
68 | Pointers are encoded as size_t offsets into the file. The object at that offset must be of the type specified by the pointer. If the offset is 0, then the pointer is null.
69 | 
70 | ## Human Readable Format
71 | 
72 | ```ts
73 | types
74 | {
75 |   sha256: array 32 byte
76 |   foobar: record { foo: foo; bar: bar }
77 |   foo: variant { i32; string }
78 |   bar: string
79 |   string: array * byte
80 |   opt_foobar: variant { pointer foobar; void }
81 | }
82 | 
83 | $1: foobar = { foo = 0 42; bar = $2 }
84 | $2: string = 13:Hello, World!
85 | ```
86 | 


--------------------------------------------------------------------------------
/parsers/yaml/README.md:
--------------------------------------------------------------------------------
 1 | # YAML
 2 | 
 3 | The files in this directory provide a [YAML 1.2.2](https://yaml.org/spec/1.2.2/) compliant language implementation using Trieste. In addition to parsing YAML, we also provide the capability to output YAML event files, JSON, and canonically formatted YAML.
 4 | 
 5 | We validate our implementation against the official [YAML test suite](https://github.com/yaml/yaml-test-suite).
 6 | 
 7 | ## Getting Started
 8 | 
 9 | To use Trieste YAML in your own codebase you will need to configure your CMake project with the `TRIESTE_BUILD_PARSERS` flag set. Trieste YAML definitions are in the `trieste::yaml` namespace, and to access them you need to include the `trieste/yaml.h` header. The YAML implementation is based around the following well-formedness definition (copied here from the header file):
10 | 
11 | ```c++
12 | inline const auto wf_tokens = Mapping | Sequence | Value | Int | Float |
13 |   True | False | Hex | Null | SingleQuote | DoubleQuote | Plain |
14 |   AnchorValue | Alias | TagValue | Literal | Folded | Empty | FlowMapping |
15 |   FlowSequence;
16 | 
17 | inline const auto wf_flow_tokens = wf_tokens - (Mapping | Sequence);
18 | 
19 | inline const auto wf =
20 |   (Top <<= Stream)
21 |   | (Stream <<= Directives * Documents)
22 |   | (Documents <<= Document++)
23 |   | (Document <<= Directives * DocumentStart * (Value >>= wf_tokens) * DocumentEnd)
24 |   | (Directives <<= (TagDirective | VersionDirective | UnknownDirective)++)
25 |   | (TagDirective <<= TagPrefix * TagHandle)[TagPrefix]
26 |   | (Mapping <<= MappingItem++[1])
27 |   | (MappingItem <<= (Key >>= wf_tokens) * (Value >>= wf_tokens))
28 |   | (FlowMapping <<= FlowMappingItem++)
29 |   | (FlowMappingItem <<= (Key >>= wf_flow_tokens) * (Value >>= wf_flow_tokens))
30 |   | (AnchorValue <<= Anchor * (Value >>= wf_tokens))[Anchor]
31 |   | (TagValue <<= TagPrefix * TagName * (Value >>= wf_tokens))
32 |   | (Sequence <<= wf_tokens++[1])
33 |   | (FlowSequence <<= wf_flow_tokens++)
34 |   | (SingleQuote <<= (BlockLine|EmptyLine)++[1])
35 |   | (DoubleQuote <<= (BlockLine|EmptyLine)++[1])
36 |   | (Literal <<= AbsoluteIndent * ChompIndicator * Lines)
37 |   | (Folded <<= AbsoluteIndent * ChompIndicator * Lines)
38 |   | (Lines <<= (BlockLine|EmptyLine)++)
39 |   | (Plain <<= (BlockLine|EmptyLine)++[1])
40 |   ;
41 | ```
42 | 
43 | The language implementation exposes the following helpers:
44 | 
45 | - `reader()` - `Reader` that parses any valid 1.2.2 YAML file and produces an AST that conforms to `yaml::wf`.
46 | - `writer()` - `Writer` that takes a YAML AST that conforms to `yaml::wf` and produces a YAML file.
47 | - `event_writer()` - `Writer` that takes a YAML AST and produces a YAML event file.
48 | - `to_json` - `Rewriter` that takes a YAML AST and converts it to a JSON AST that conforms to `json::wf`.
49 | 
50 | ## Using the `yamlc` Executable
51 | 
52 | One of the targets that will be written to the `dist/parsers` directory is an executable named `yamlc` (which will
53 | only be built if the `TRIESTE_BUILD_PARSER_TOOLS` flag is set). It has the following usage information:
54 | 
55 | ```
56 | Usage: parsers/yamlc [OPTIONS] input [output]
57 | 
58 | Positionals:
59 |   input TEXT REQUIRED         Path to the input file 
60 |   output TEXT                 Path to the output file
61 | 
62 | Options:
63 |   -h,--help                   Print this help message and exit
64 |   -a,--ast TEXT               Output the AST (debugging for the reader/rewriter/writer workflows)
65 |   -w,--wf                     Enable well-formedness checks (slow)
66 |   --prettyprint               Pretty print the output (for JSON)
67 |   -m,--mode TEXT:{event,json,yaml}
68 |                               Output mode.
69 | ```
70 | 
71 | It can be used to convert YAML to one of three output formats:
72 | 
73 | - `event` a YAML event file
74 | - `json` a JSON file (if the YAML stream has more than one document, this JSON will have multiple values at the top level)
75 | - `yaml` a canonical YAML file. While there is no formal definition for canonical YAML, we have tried to produce a file that would be easily parseable by the majority of YAML parsers.
76 | 
77 | > **Note**
78 | > Our implementation is CRLF aware and compatible, in that if the input file contains CRLF line endings they
79 | > will be handled correctly and preserved in the output.


--------------------------------------------------------------------------------
/parsers/json/reader.cc:
--------------------------------------------------------------------------------
  1 | #include "internal.h"
  2 | 
  3 | namespace
  4 | {
  5 |   using namespace trieste;
  6 |   using namespace trieste::json;
  7 | 
  8 |   std::size_t
  9 |   invalid_tokens(Node node, const std::map<Token, std::string>& token_messages)
 10 |   {
 11 |     std::size_t changes = 0;
 12 | 
 13 |     node->traverse([&](Node& n) {
 14 |       if (n->type() == Error)
 15 |         return false;
 16 | 
 17 |       for (Node& child : *n)
 18 |       {
 19 |         if (token_messages.count(child->type()) > 0)
 20 |         {
 21 |           n->replace(child, err(child, token_messages.at(child->type())));
 22 |           changes += 1;
 23 |         }
 24 |       }
 25 |       return true;
 26 |     });
 27 | 
 28 |     return changes;
 29 |   }
 30 | 
 31 | 
 32 |   // clang-format off
 33 |   inline const auto wf_groups =
 34 |     (Top <<= wf_value_tokens++[1])
 35 |     | (Object <<= ObjectGroup)
 36 |     | (Array <<= ArrayGroup)
 37 |     | (ObjectGroup <<= (wf_value_tokens | Colon | Comma)++)
 38 |     | (ArrayGroup <<= (wf_value_tokens | Comma)++)
 39 |     ;
 40 |   // clang-format on
 41 | 
 42 |   const auto ValueToken = T(Object, Array, String, Number, True, False, Null);
 43 | 
 44 |   PassDef groups(bool allow_multiple)
 45 |   {
 46 |     PassDef groups = {
 47 |       "groups",
 48 |       wf_groups,
 49 |       dir::bottomup,
 50 |       {
 51 |         In(Array) * T(Group)[Group] >>
 52 |           [](Match& _) { return ArrayGroup << *_[Group]; },
 53 | 
 54 |         In(Object) * T(Group)[Group] >>
 55 |           [](Match& _) { return ObjectGroup << *_[Group]; },
 56 | 
 57 |         In(Top) *
 58 |             (T(File) << ((T(Group) << (ValueToken++[Value] * End)) * End)) >>
 59 |           [allow_multiple](Match& _) {
 60 |             auto values = _[Value];
 61 |             if (values.empty())
 62 |             {
 63 |               return err("Invalid JSON");
 64 |             }
 65 | 
 66 |             if (values.size() > 1 && !allow_multiple)
 67 |             {
 68 |               return err("Multiple top-level values not allowed");
 69 |             }
 70 | 
 71 |             return Seq << _[Value];
 72 |           },
 73 | 
 74 |         // errors
 75 |         In(Top) * T(File)[File] >>
 76 |           [](Match& _) { return err(_[File], "Invalid JSON"); },
 77 | 
 78 |         In(ArrayGroup) * T(Colon)[Colon] >>
 79 |           [](Match& _) { return err(_[Colon], "Invalid colon in array"); },
 80 |       }};
 81 | 
 82 |     return groups;
 83 |   }
 84 | 
 85 |   PassDef structure()
 86 |   {
 87 |     PassDef structure = {
 88 |       "structure",
 89 |       json::wf,
 90 |       dir::bottomup,
 91 |       {
 92 |         In(ArrayGroup) * (Start * ValueToken[Value]) >>
 93 |           [](Match& _) { return (Value << _(Value)); },
 94 | 
 95 |         In(ArrayGroup) * (T(Value)[Lhs] * T(Comma) * ValueToken[Rhs]) >>
 96 |           [](Match& _) { return Seq << _(Lhs) << (Value << _(Rhs)); },
 97 | 
 98 |         In(Array) * (T(ArrayGroup) << (T(Value)++[Array] * End)) >>
 99 |           [](Match& _) { return Seq << _[Array]; },
100 | 
101 |         In(Array) * T(Value)[Value] >>
102 |           [](Match& _) { return _(Value)->front(); },
103 | 
104 |         In(ObjectGroup) *
105 |             (Start * T(String)[Lhs] * T(Colon) * ValueToken[Rhs]) >>
106 |           [](Match& _) {
107 |             Location key = _(Lhs)->location();
108 |             key.pos += 1;
109 |             key.len -= 2;
110 |             return (Member << (Key ^ key) << _(Rhs));
111 |           },
112 | 
113 |         In(ObjectGroup) *
114 |             (T(Member)[Member] * T(Comma) * T(String)[Lhs] * T(Colon) *
115 |              ValueToken[Rhs]) >>
116 |           [](Match& _) {
117 |             Location key = _(Lhs)->location();
118 |             key.pos += 1;
119 |             key.len -= 2;
120 |             return Seq << _(Member) << (Member << (Key ^ key) << _(Rhs));
121 |           },
122 | 
123 |         In(Object) * (T(ObjectGroup) << (T(Member)++[Object] * End)) >>
124 |           [](Match& _) { return Seq << _[Object]; },
125 |       }};
126 | 
127 |     structure.post([&](Node n) {
128 |       return invalid_tokens(
129 |         n, {{ObjectGroup, "Invalid object"}, {ArrayGroup, "Invalid array"}});
130 |     });
131 | 
132 |     return structure;
133 |   }
134 | }
135 | 
136 | namespace trieste
137 | {
138 |   namespace json
139 |   {
140 |     Reader reader(bool allow_multiple)
141 |     {
142 |       return Reader{"json", {groups(allow_multiple), structure()}, parser()};
143 |     }
144 |   }
145 | 
146 | }
147 | 


--------------------------------------------------------------------------------
/notes/dispatch.md:
--------------------------------------------------------------------------------
  1 | # Dispatch
  2 | 
  3 | For some `(call (selector f) (args (copy a) (copy b)))`, this could have been:
  4 | - `a.f(b)`
  5 | - `a f b`
  6 | - `f(a, b)`
  7 | 
  8 | We need to discover all possible functions `f` that could be called. This includes:
  9 | - Functions `f` that can be looked up from the source location (static dispatch).
 10 | - Functions `f` defined on the dynamic type of `a` (dynamic dispatch).
 11 | 
 12 | When calling `f`, we need to rank the possible functions and choose one.
 13 | 
 14 | ## Motivation
 15 | 
 16 | The supposition is that using the same syntax for static and dynamic dispatch allows for better EDSLs.
 17 | 
 18 | ## Extending Classes
 19 | 
 20 | For modularity, extensions should be scoped.
 21 | - You can import the extensions with `use <type>`.
 22 | 
 23 | When making a dynamic call:
 24 | - We know all extension methods that are in scope.
 25 |   - For each extended selector, build a map of `type descriptor -> function pointer`.
 26 | - If the selector has been extended:
 27 |   - Look up the function pointer in the selector map.
 28 |   - If there's no entry in the selector map, look up the function pointer in the receiver's type descriptor.
 29 |   - If no `class` that's been extended with this selector is a subtype of the receiver's static type, this check can be elided.
 30 | - Else look up the selector on the receiver's type descriptor. 
 31 | - If the static type of the receiver is concrete, all dynamic dispatch including extension can be turned into static dispatch.
 32 | 
 33 | When making a dynamic call:
 34 | - We know all extension methods that are in scope.
 35 |   - For each extended type, build an alternate local type descriptor.
 36 |   - Keep a local map of `type descriptor -> type descriptor`.
 37 | - If the selector has been extended for any type:
 38 |   - Look up the local type descriptor in the map.
 39 |     - This can be optimised such that for any given object, the local type descriptor is only looked up once.
 40 |   - If there's no entry in the map, use the receiver's type descriptor.
 41 | - Else use the receiver's type descriptor.
 42 | 
 43 | ```rust
 44 | type Eq =
 45 | {
 46 |   ==(self: Self, other: Self): Bool
 47 | }
 48 | 
 49 | class Foo
 50 | {
 51 |   var x: I32
 52 | }
 53 | 
 54 | Foo::==(self: Foo, other: Foo): Bool
 55 | {
 56 |   // No access to private members of Foo unless the enclosing scope has access
 57 |   // to private members of Foo.
 58 |   self.x == other.x
 59 | }
 60 | 
 61 | ```
 62 | 
 63 | ## Ranking
 64 | 
 65 | Static or Dynamic
 66 | - Dot notation and unscoped names are dynamic dispatch.
 67 | - Scoped names are static dispatch.
 68 | - Allow `::name` as a scoped name, meaning lookup only, no lookdown phase.
 69 | 
 70 | Arity
 71 | - Given a call of arity N, select only `f/N`.
 72 | - There must be only one `f/N` for a given `N`.
 73 | - Generate partial application functions.
 74 | - For f/3, generate f/2, f/1, f/0, if they don't exist.
 75 | 
 76 | No static type based overloading.
 77 | 
 78 | ## Type Inference
 79 | 
 80 | Dynamic:
 81 | ```ts
 82 | (bind r T1 (call (selector f/2 (typeargs)) (args a b)))
 83 | ```
 84 | Result:
 85 | - `a: T2`
 86 | - `b: T3`
 87 | - `T2 <: { f/2: (T2, T3)->T1 }`
 88 | 
 89 | Static:
 90 | ```ts
 91 | (bind r T1 (call (functionname f/2 (typeargs)) (args a b)))
 92 | ```
 93 | Result:
 94 | - `a: T2`
 95 | - `b: T3`
 96 | - `f/2: T4`
 97 | - `T4 <: (T2, T3)->T1`
 98 | 
 99 | ## Partial Application
100 | 
101 | ```ts
102 | class A
103 | {
104 |   class $f_0
105 |   {
106 |     create(): $f_0
107 |     {
108 |       new ()
109 |     }
110 | 
111 |     apply(self: $f_0, a: A): R
112 |     {
113 |       $f_1::create(a)
114 |     }
115 | 
116 |     apply(self: $f_0, a: A, b: B): R
117 |     {
118 |       $f_2::create(a, b)
119 |     }
120 | 
121 |     apply(self: $f_0, a: A, b: B, c: C): R
122 |     {
123 |       A::f(a, b, c)
124 |     }
125 |   }
126 | 
127 |   class $f_1
128 |   {
129 |     let a: A
130 | 
131 |     create(a: A): $f_1
132 |     {
133 |       new (a)
134 |     }
135 | 
136 |     apply(self: $f_1, b: B): R
137 |     {
138 |       $f_2::create(self.a, b)
139 |     }
140 | 
141 |     apply(self: $f_1, b: B, c: C): R
142 |     {
143 |       A::f(self.a, b, c)
144 |     }
145 |   }
146 | 
147 |   class $f_2
148 |   {
149 |     let a: A
150 |     let b: B
151 | 
152 |     create(a: A, b: B): $f_2
153 |     {
154 |       new (a, b)
155 |     }
156 | 
157 |     apply(self: $f_2, c: C): R
158 |     {
159 |       A::f(self.a, self.b, c)
160 |     }
161 |   }
162 | 
163 |   f(a: A, b: B, c: C): R
164 |   {
165 |     // ...
166 |   }
167 | 
168 |   f(a: A, b: B): $f_2
169 |   {
170 |     $f_2::create(a, b)
171 |   }
172 | 
173 |   f(a: A): $f_1
174 |   {
175 |     $f_1::create(a)
176 |   }
177 | 
178 |   f(): $f_0
179 |   {
180 |     $f_0::create()
181 |   }
182 | }
183 | ```
184 | 


--------------------------------------------------------------------------------
/test/intrusive_ptr_test.cc:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <cstdlib>
  3 | #include <iostream>
  4 | #include <thread>
  5 | #include <trieste/intrusive_ptr.h>
  6 | 
  7 | struct Dummy : public trieste::intrusive_refcounted<Dummy>
  8 | {
  9 |   size_t tag;
 10 | 
 11 |   Dummy(size_t tag_) : tag{tag_} {}
 12 | };
 13 | 
 14 | using ptr_t = trieste::intrusive_ptr<Dummy>;
 15 | using ActionFn = ptr_t(ptr_t);
 16 | 
 17 | std::vector<ActionFn*> actions{
 18 |   [](ptr_t ptr) -> ptr_t {
 19 |     if (ptr == nullptr)
 20 |     {
 21 |       std::cout << "Should only be setting to nullptr once per thread!"
 22 |                 << std::endl;
 23 |       std::abort();
 24 |     }
 25 |     return nullptr; // dec_ref on this ptr
 26 |   },
 27 |   [](ptr_t ptr) {
 28 |     auto tmp = std::move(ptr);
 29 |     return tmp;
 30 |   },
 31 |   [](ptr_t ptr) {
 32 |     auto tmp = ptr;
 33 |     return ptr;
 34 |   },
 35 |   [](ptr_t ptr) {
 36 |     auto& alias = ptr;
 37 |     alias = ptr;
 38 |     return ptr;
 39 |   },
 40 | };
 41 | 
 42 | struct Behavior
 43 | {
 44 |   size_t action_idx;
 45 |   size_t ptr_idx;
 46 | 
 47 |   bool operator<(const Behavior& other) const
 48 |   {
 49 |     return std::pair{action_idx, ptr_idx} <
 50 |       std::pair{other.action_idx, other.ptr_idx};
 51 |   }
 52 | };
 53 | 
 54 | struct Test
 55 | {
 56 |   size_t ptr_count;
 57 |   std::vector<std::vector<Behavior>> thread_behaviors;
 58 | 
 59 |   void run() const
 60 |   {
 61 |     // Each thread gets its own copy of an array of N pointers, where every
 62 |     // thread shares refcounts with every other thread.
 63 |     std::vector<std::vector<ptr_t>> ptrs_per_thread;
 64 |     ptrs_per_thread.emplace_back();
 65 |     for (size_t i = 0; i < ptr_count; ++i)
 66 |     {
 67 |       ptrs_per_thread.front().push_back(ptr_t::make(i));
 68 |     }
 69 |     while (ptrs_per_thread.size() < thread_behaviors.size())
 70 |     {
 71 |       ptrs_per_thread.push_back(ptrs_per_thread.back());
 72 |     }
 73 | 
 74 |     std::vector<std::thread> threads;
 75 |     for (size_t i = 0; i < thread_behaviors.size(); ++i)
 76 |     {
 77 |       threads.emplace_back([&, i]() {
 78 |         for (auto& behavior : thread_behaviors.at(i))
 79 |         {
 80 |           auto& ptr = ptrs_per_thread.at(i).at(behavior.ptr_idx);
 81 |           ptr = actions[behavior.action_idx](ptr);
 82 |         }
 83 |       });
 84 |     }
 85 | 
 86 |     for (auto& thread : threads)
 87 |     {
 88 |       thread.join();
 89 |     }
 90 | 
 91 |     // Sanity check: every thread should be setting their ptr to nullptr at some
 92 |     // point
 93 |     for (const auto& ptrs : ptrs_per_thread)
 94 |     {
 95 |       for (const auto& ptr : ptrs)
 96 |       {
 97 |         if (ptr != nullptr)
 98 |         {
 99 |           std::cout << "non-null ptr!" << std::endl;
100 |           std::abort();
101 |         }
102 |       }
103 |     }
104 |   }
105 | };
106 | 
107 | std::vector<Test>
108 | build_tests(size_t ptr_count, size_t thread_count, size_t permutations)
109 | {
110 |   std::vector<Behavior> all_behaviors;
111 |   for (size_t action_idx = 0; action_idx < actions.size(); ++action_idx)
112 |   {
113 |     for (size_t ptr_idx = 0; ptr_idx < ptr_count; ++ptr_idx)
114 |     {
115 |       all_behaviors.push_back({
116 |         action_idx,
117 |         ptr_idx,
118 |       });
119 |     }
120 |   }
121 | 
122 |   std::vector<Test> tests = {{ptr_count, {}}};
123 |   for (size_t i = 0; i < thread_count; ++i)
124 |   {
125 |     std::vector<Test> next_tests;
126 |     for (const auto& test : tests)
127 |     {
128 |       // Allow adding some extra permutations if you think you're stuck at the
129 |       // first few.
130 |       for (size_t permutation_idx = 0; permutation_idx < permutations;
131 |            ++permutation_idx)
132 |       {
133 |         auto mod_test = test;
134 |         mod_test.thread_behaviors.push_back(all_behaviors);
135 |         next_tests.push_back(mod_test);
136 | 
137 |         // Unconditionally permute the behaviors. We're not looking for total
138 |         // coverage, just variety.
139 |         std::next_permutation(all_behaviors.begin(), all_behaviors.end());
140 |       }
141 |     }
142 |     tests = next_tests;
143 |   }
144 |   return tests;
145 | }
146 | 
147 | // The intention of this test is to do a lot of work to refcounts, while under
148 | // some kind of thread sanitizer. Changing the intrusive_ptr implementation to
149 | // use non-atomic refcounting should make Clang's thread sanitizer unhappy, for
150 | // instance, whereas with the current thread-safe implementation everything
151 | // _should_ be fine.
152 | int main()
153 | {
154 |   // Be very careful when increasing these numbers... they can quickly eat up
155 |   // your memory and time.
156 |   auto tests = build_tests(3, 6, 4);
157 |   std::cout << "Found " << tests.size() << " permutations." << std::endl;
158 | 
159 |   for (auto test : tests)
160 |   {
161 |     test.run();
162 |   }
163 | 
164 |   std::cout << "Ran " << tests.size() << " permutations." << std::endl;
165 |   return 0;
166 | }
167 | 


--------------------------------------------------------------------------------
/parsers/json/README.md:
--------------------------------------------------------------------------------
  1 | # JSON
  2 | 
  3 | The files in this directory provide a [RFC 8259](https://www.rfc-editor.org/rfc/rfc8259) compliant JSON language implementation using Trieste. It is able to both read and write JSON files.
  4 | 
  5 | We are grateful to the maintainers of the [JSONTestSuite](https://github.com/nst/JSONTestSuite), which we use to ensure compliance with RFC 8259.
  6 | 
  7 | ## Getting Started
  8 | 
  9 | To use Trieste JSON in your own codebase you will need to configure your CMAKE project with the `TRIESTE_BUILD_PARSERS` flag set. Trieste JSON definitions are in the `trieste::json` namespace, and to access them you need to include the `trieste/json.h` header. The JSON implementation is based around the following well-formedness definition (copied here from the header file):
 10 | 
 11 | ```cpp
 12 | inline const auto wf_value_tokens =
 13 |   Object | Array | String | Number | True | False | Null;
 14 | 
 15 | // clang-format off
 16 | inline const auto wf =
 17 |   (Top <<= wf_value_tokens++[1])
 18 |   | (Object <<= Member++)
 19 |   | (Member <<= String * (Value >>= wf_value_tokens))
 20 |   | (Array <<= wf_value_tokens++)
 21 |   ;
 22 | // clang-format on
 23 | ```
 24 | 
 25 | The language implementation exposes the following helpers:
 26 | 
 27 | - `reader()` - `Reader` that parses any valid JSON file and produces an AST that conforms to `json::wf`. Optionally, this reader will read non-compliant JSON files which contain more than one JSON value at the top level of the file.
 28 | - `writer()` - `Writer` that takes a JSON AST that conforms to `json::wf` and produces a JSON file.
 29 | 
 30 | ## Creating JSON Nodes
 31 | 
 32 | The public API exposes several methods to help create JSON documents:
 33 | 
 34 | ```cpp
 35 | Node object = json::object(
 36 |     {json::member("key_a_str", "value"),
 37 |      json::member("key_b_number", 42),
 38 |      json::member("key_c_bool", json::boolean(true)),
 39 |      json::member("key_d_null", json::null()),
 40 |      json::member("key_e_array", json::array({json::value(1), json::value(2)})),
 41 |      json::member(
 42 |        "key_f_object",
 43 |        json::object({json::member("key", json::value("value"))}))});
 44 | 
 45 | std::cout << json::to_string(object) << std::endl;
 46 | // {"key_a_str":"value","key_b_number":42,"key_c_bool":true,"key_d_null":null,"key_e_array":[1,2],"key_f_object":{"key":"value"}}
 47 | 
 48 | Nodes elements;
 49 | elements.push_back(json::value(1));
 50 | elements.push_back(json::value("two"));
 51 | elements.push_back(json::boolean(false));
 52 | elements.push_back(json::null());
 53 | Node array = json::array(elements.begin(), elements.end());
 54 | ```
 55 | 
 56 | ## Reading values
 57 | There are also methods to get C++ values back out of nodes, such as:
 58 | 
 59 | - `optional<double> get_number(const Node&)`
 60 | 
 61 |    Attempts to get a number out of a `Node`. If the node is not of type `Number`, or cannot be parsed as a double, returns `nullopt`.
 62 | - `optional<bool> get_boolean(const Node&)`
 63 | 
 64 |    Attempts to get a boolean value out of a `Node`. If the node is not of type `True` or `False`, returns `nullopt`.
 65 | - `optional<Location> get_string(const Node&)`
 66 | 
 67 |    Attempts to get a string out of a `Node`. This will remove the double quotation marks. If the node is not of type `String`, returns `nullopt`.
 68 | 
 69 | You can also use a [JSON Pointer](https://www.rfc-editor.org/rfc/rfc6901) to select nodes out of a document using the `select()` function:
 70 | 
 71 | ```cpp
 72 | std::cout << "c: " << json::select(object, {"/key_c_bool"}).value();
 73 | // c: (json-true)
 74 | 
 75 | std::cout << "a: " << json::select_string(object, {"/key_a_str"}).value() << std::endl;
 76 | // a: value
 77 | 
 78 | std::cout << "e1[1]: " << json::select_number(object, {"/key_e_array/1"}).value() << std::endl;
 79 | // e[1]: 2
 80 | 
 81 | std::cout << "missingkey: " << json::select(object, {"/missingkey"});
 82 | // missingkey: missing key: (error (errormsg 42:Member does not exist with key: missingkey) (errorast ... )
 83 | ```
 84 | 
 85 | Note the `select_<type>` functions which mirror those above and perform a select and then a `get_<type>` call.
 86 | 
 87 | ## Json Patch
 88 | The `patch(const Node&, const Location&)` function provides support for [JSON Patch](https://www.rfc-editor.org/rfc/rfc6902).
 89 | The implementation is fully compliant with RFC 6902 and we thank the maintainers of the
 90 | [JSON Patch Test Suite](https://github.com/json-patch/json-patch-tests), which we have integrated with our CI and pass in full.
 91 | 
 92 | ```cpp
 93 | auto reader = json::reader();
 94 | auto doc =
 95 |       reader.synthetic(R"json({"foo": {"bar": {"baz": [{"boo": "net"}]}}})json")
 96 |         .read()
 97 |         .ast->front();
 98 | auto patch = reader
 99 |                 .synthetic(R"json([
100 |     {"op": "copy", "from": "/foo", "path": "/bak"},
101 |     {"op": "replace", "path": "/foo/bar/baz/0/boo", "value": "qux"}
102 |   ])json")
103 |                 .read()
104 |                 .ast->front();
105 | 
106 | auto patched = json::patch(doc, patch);
107 | std::cout << "patched: " << json::to_string(patched) << std::endl;
108 | // patched: {"foo":{"bar":{"baz":[{"boo":"qux"}]}},"bak":{"bar":{"baz":[{"boo":"net"}]}}}
109 | 


--------------------------------------------------------------------------------
/include/trieste/regex.h:
--------------------------------------------------------------------------------
  1 | // Copyright Microsoft and Project Verona Contributors.
  2 | // SPDX-License-Identifier: MIT
  3 | #pragma once
  4 | 
  5 | #include "logging.h"
  6 | #include "ast.h"
  7 | 
  8 | #include <re2/re2.h>
  9 | 
 10 | namespace trieste
 11 | {
 12 |   class REMatch
 13 |   {
 14 |     friend class REIterator;
 15 | 
 16 |   private:
 17 |     std::vector<re2::StringPiece> match;
 18 |     std::vector<Location> locations;
 19 |     size_t matches = 0;
 20 | 
 21 |     bool match_regexp(const RE2& regex, re2::StringPiece& sp, Source& source)
 22 |     {
 23 |       matches = regex.NumberOfCapturingGroups() + 1;
 24 | 
 25 |       if (match.size() < matches)
 26 |         match.resize(matches);
 27 | 
 28 |       if (locations.size() < matches)
 29 |         locations.resize(matches);
 30 | 
 31 |       auto matched = regex.Match(
 32 |         sp,
 33 |         0,
 34 |         sp.length(),
 35 |         re2::RE2::ANCHOR_START,
 36 |         match.data(),
 37 |         static_cast<int>(matches));
 38 | 
 39 |       if (!matched)
 40 |       {
 41 |         return false;
 42 |       }
 43 | 
 44 |       for (size_t i = 0; i < matches; i++)
 45 |       {
 46 |         locations[i] = {
 47 |           source,
 48 |           static_cast<size_t>(match.at(i).data() - source->view().data()),
 49 |           match.at(i).size()};
 50 |       }
 51 | 
 52 |       return true;
 53 |     }
 54 | 
 55 |   public:
 56 |     REMatch(size_t max_capture = 0)
 57 |     {
 58 |       match.resize(max_capture + 1);
 59 |       locations.resize(max_capture + 1);
 60 |     }
 61 | 
 62 |     const Location& at(size_t index = 0) const
 63 |     {
 64 |       if (index >= matches)
 65 |         return locations.at(0);
 66 | 
 67 |       return locations.at(index);
 68 |     }
 69 | 
 70 |     template<typename T>
 71 |     T parse(size_t index = 0) const
 72 |     {
 73 |       if (index >= matches)
 74 |         return T();
 75 | 
 76 |       T t;
 77 |       RE2::Arg arg(&t);
 78 |       auto& m = match.at(index);
 79 |       arg.Parse(m.data(), m.size());
 80 |       return t;
 81 |     }
 82 |   };
 83 | 
 84 |   class REIterator
 85 |   {
 86 |   private:
 87 |     Source source;
 88 |     re2::StringPiece sp;
 89 | 
 90 |   public:
 91 |     REIterator(Source source_) : source(source_), sp(source_->view()) {}
 92 | 
 93 |     bool empty()
 94 |     {
 95 |       return sp.empty();
 96 |     }
 97 | 
 98 |     bool consume(const RE2& regex, REMatch& m)
 99 |     {
100 |       if (!m.match_regexp(regex, sp, source))
101 |         return false;
102 | 
103 |       sp.remove_prefix(m.at(0).len);
104 |       return true;
105 |     }
106 | 
107 |     Location current() const
108 |     {
109 |       return {
110 |         source, static_cast<size_t>(sp.data() - source->view().data()), 1};
111 |     }
112 | 
113 |     void skip(size_t count = 1)
114 |     {
115 |       sp.remove_prefix(count);
116 |     }
117 |   };
118 | 
119 |   inline Node build_ast(Source source, size_t pos)
120 |   {
121 |     auto hd = RE2("[[:space:]]*\\([[:space:]]*([^[:space:]\\(\\)]*)");
122 |     auto st = RE2("[[:space:]]*\\{[^\\}]*\\}");
123 |     auto id = RE2("[[:space:]]*([[:digit:]]+):");
124 |     auto tl = RE2("[[:space:]]*\\)");
125 | 
126 |     REMatch re_match(2);
127 |     REIterator re_iterator(source);
128 |     re_iterator.skip(pos);
129 | 
130 |     Node top;
131 |     Node ast;
132 | 
133 |     while (!re_iterator.empty())
134 |     {
135 |       // Find the type of the node. If we didn't find a node, it's an error.
136 |       if (!re_iterator.consume(hd, re_match))
137 |       {
138 |         auto loc = re_iterator.current();
139 |         logging::Error() << loc.origin_linecol() << ": expected node"
140 |                          << std::endl
141 |                          << loc.str() << std::endl;
142 |         return {};
143 |       }
144 | 
145 |       // If we don't have a valid node type, it's an error.
146 |       auto type_loc = re_match.at(1);
147 |       auto type = detail::find_token(type_loc.view());
148 | 
149 |       if (type == Invalid)
150 |       {
151 |         logging::Error() << type_loc.origin_linecol() << ": unknown type"
152 |                          << std::endl
153 |                          << type_loc.str() << std::endl;
154 |         return {};
155 |       }
156 | 
157 |       // Find the source location of the node as a netstring.
158 |       auto ident_loc = type_loc;
159 | 
160 |       if (re_iterator.consume(id, re_match))
161 |       {
162 |         auto len = re_match.parse<size_t>(1);
163 |         ident_loc =
164 |           Location(source, re_match.at().pos + re_match.at().len, len);
165 |         re_iterator.skip(len);
166 |       }
167 | 
168 |       // Push the node into the AST.
169 |       auto node = NodeDef::create(type, ident_loc);
170 | 
171 |       if (ast)
172 |         ast->push_back(node);
173 |       else
174 |         top = node;
175 | 
176 |       ast = node;
177 | 
178 |       // Skip the symbol table.
179 |       re_iterator.consume(st, re_match);
180 | 
181 |       // `)` ends the node. Otherwise, we'll add children to this node.
182 |       while (re_iterator.consume(tl, re_match))
183 |       {
184 |         auto parent = ast->parent();
185 | 
186 |         if (!parent)
187 |           return ast;
188 | 
189 |         ast = parent;
190 |       }
191 |     }
192 | 
193 |     // We never finished the AST, so it's an error.
194 |     auto loc = re_iterator.current();
195 |     logging::Error() << loc.origin_linecol() << ": incomplete AST" << std::endl
196 |                      << loc.str() << std::endl;
197 |     return {};
198 |   }
199 | }
200 | 


--------------------------------------------------------------------------------
/parsers/include/trieste/yaml.h:
--------------------------------------------------------------------------------
  1 | // Copyright Microsoft and Project Verona Contributors.
  2 | // SPDX-License-Identifier: MIT
  3 | #pragma once
  4 | 
  5 | #include "trieste/trieste.h"
  6 | 
  7 | namespace trieste
  8 | {
  9 |   namespace yaml
 10 |   {
 11 |     using namespace wf::ops;
 12 | 
 13 |     inline const auto Stream =
 14 |       TokenDef("yaml-stream", flag::symtab | flag::defbeforeuse);
 15 |     inline const auto Directives = TokenDef("yaml-directives");
 16 |     inline const auto UnknownDirective =
 17 |       TokenDef("yaml-unknowndirective", flag::print);
 18 |     inline const auto VersionDirective =
 19 |       TokenDef("yaml-versiondirective", flag::print);
 20 |     inline const auto TagDirective =
 21 |       TokenDef("yaml-tagdirective", flag::lookup | flag::shadowing);
 22 |     inline const auto TagPrefix = TokenDef("yaml-tagprefix", flag::print);
 23 |     inline const auto TagHandle = TokenDef("yaml-taghandle", flag::print);
 24 |     inline const auto Documents = TokenDef("yaml-documents");
 25 |     inline const auto Document =
 26 |       TokenDef("yaml-document", flag::symtab | flag::defbeforeuse);
 27 |     inline const auto DocumentStart = TokenDef("yaml-docstart", flag::print);
 28 |     inline const auto DocumentEnd = TokenDef("yaml-docend", flag::print);
 29 |     inline const auto Sequence = TokenDef("yaml-sequence");
 30 |     inline const auto Mapping = TokenDef("yaml-mapping");
 31 |     inline const auto MappingItem = TokenDef("yaml-mappingitem");
 32 |     inline const auto Key = TokenDef("yaml-key");
 33 |     inline const auto Value = TokenDef("yaml-value", flag::print);
 34 |     inline const auto Int = TokenDef("yaml-int", flag::print);
 35 |     inline const auto Hex = TokenDef("yaml-hex", flag::print);
 36 |     inline const auto Float = TokenDef("yaml-float", flag::print);
 37 |     inline const auto Null = TokenDef("yaml-null");
 38 |     inline const auto True = TokenDef("yaml-true");
 39 |     inline const auto False = TokenDef("yaml-false");
 40 |     inline const auto SingleQuote = TokenDef("yaml-singlequote");
 41 |     inline const auto DoubleQuote = TokenDef("yaml-doublequote");
 42 |     inline const auto BlockLine = TokenDef("yaml-blockline", flag::print);
 43 |     inline const auto EmptyLine = TokenDef("yaml-emptyline");
 44 |     inline const auto Literal = TokenDef("yaml-literal");
 45 |     inline const auto Folded = TokenDef("yaml-folded");
 46 |     inline const auto AbsoluteIndent =
 47 |       TokenDef("yaml-absoluteindent", flag::print);
 48 |     inline const auto ChompIndicator =
 49 |       TokenDef("yaml-chomp-indicator", flag::print);
 50 |     inline const auto Lines = TokenDef("yaml-lines");
 51 |     inline const auto Plain = TokenDef("yaml-plain");
 52 |     inline const auto AnchorValue = TokenDef("yaml-anchorvalue", flag::lookup);
 53 |     inline const auto Anchor = TokenDef("yaml-anchor", flag::print);
 54 |     inline const auto TagValue = TokenDef("yaml-tagvalue");
 55 |     inline const auto TagName = TokenDef("yaml-tagname", flag::print);
 56 |     inline const auto Alias = TokenDef("yaml-alias", flag::print);
 57 |     inline const auto Empty = TokenDef("yaml-empty");
 58 |     inline const auto FlowMapping = TokenDef("yaml-flowmapping");
 59 |     inline const auto FlowMappingItem = TokenDef("yaml-flowmappingitem");
 60 |     inline const auto FlowSequence = TokenDef("yaml-flowsequence");
 61 | 
 62 |     inline const auto wf_tokens = Mapping | Sequence | Value | Int | Float |
 63 |       True | False | Hex | Null | SingleQuote | DoubleQuote | Plain |
 64 |       AnchorValue | Alias | TagValue | Literal | Folded | Empty | FlowMapping |
 65 |       FlowSequence;
 66 | 
 67 |     inline const auto wf_flow_tokens =
 68 |       wf_tokens - (Literal | Folded | Mapping | Sequence);
 69 | 
 70 |     // clang-format off
 71 |     inline const auto wf =
 72 |       (Top <<= Stream)
 73 |       | (Stream <<= Directives * Documents)
 74 |       | (Documents <<= Document++)
 75 |       | (Document <<= Directives * DocumentStart * (Value >>= wf_tokens) * DocumentEnd)
 76 |       | (Directives <<= (TagDirective | VersionDirective | UnknownDirective)++)
 77 |       | (TagDirective <<= TagPrefix * TagHandle)[TagPrefix]
 78 |       | (Mapping <<= MappingItem++[1])
 79 |       | (MappingItem <<= (Key >>= wf_tokens) * (Value >>= wf_tokens))
 80 |       | (FlowMapping <<= FlowMappingItem++)
 81 |       | (FlowMappingItem <<= (Key >>= wf_flow_tokens) * (Value >>= wf_flow_tokens))
 82 |       | (AnchorValue <<= Anchor * (Value >>= wf_tokens))[Anchor]
 83 |       | (TagValue <<= TagPrefix * TagName * (Value >>= wf_tokens))
 84 |       | (Sequence <<= wf_tokens++[1])
 85 |       | (FlowSequence <<= wf_flow_tokens++)
 86 |       | (SingleQuote <<= (BlockLine|EmptyLine)++[1])
 87 |       | (DoubleQuote <<= (BlockLine|EmptyLine)++[1])
 88 |       | (Literal <<= AbsoluteIndent * ChompIndicator * Lines)
 89 |       | (Folded <<= AbsoluteIndent * ChompIndicator * Lines)
 90 |       | (Lines <<= (BlockLine|EmptyLine)++)
 91 |       | (Plain <<= (BlockLine|EmptyLine)++[1])
 92 |       ;
 93 |     // clang-format on
 94 | 
 95 |     Reader reader();
 96 |     Writer event_writer(
 97 |       const std::filesystem::path& path, const std::string& newline = "\n");
 98 |     Writer writer(
 99 |       const std::filesystem::path& path,
100 |       const std::string& newline = "\n",
101 |       std::size_t indent = 2,
102 |       bool canonical = false);
103 |     Rewriter to_json();
104 |     std::ostream& block_to_string(
105 |       std::ostream& os, const Node& node, bool raw_quotes = false);
106 |     std::ostream& quote_to_string(
107 |       std::ostream& os, const Node& quote, bool raw_quotes = false);
108 |     std::string to_string(
109 |       Node yaml,
110 |       const std::string& newline = "\n",
111 |       std::size_t indent = 2,
112 |       bool canonical = false);
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/parsers/json/writer.cc:
--------------------------------------------------------------------------------
  1 | #include "internal.h"
  2 | #include "trieste/wf.h"
  3 | 
  4 | namespace
  5 | {
  6 |   using namespace trieste;
  7 |   using namespace trieste::json;
  8 | 
  9 |   struct WriteSettings
 10 |   {
 11 |     bool prettyprint;
 12 |     bool sort_keys;
 13 |     const std::string& indent;
 14 |   };
 15 | 
 16 |   void write_value(
 17 |     std::ostream& os,
 18 |     const WriteSettings& settings,
 19 |     const std::string& indent,
 20 |     const Node& value);
 21 | 
 22 |   void write_object(
 23 |     std::ostream& os,
 24 |     const WriteSettings& settings,
 25 |     const std::string& indent,
 26 |     const Node& object)
 27 |   {
 28 |     if (object->empty())
 29 |     {
 30 |       os << "{}";
 31 |       return;
 32 |     }
 33 | 
 34 |     std::string new_indent = indent + settings.indent;
 35 |     os << "{";
 36 |     if (settings.prettyprint)
 37 |     {
 38 |       os << std::endl;
 39 |     }
 40 | 
 41 |     std::vector<Node> members;
 42 |     if (settings.sort_keys)
 43 |     {
 44 |       std::vector<Location> keys;
 45 |       std::transform(
 46 |         object->begin(),
 47 |         object->end(),
 48 |         std::back_inserter(keys),
 49 |         [](Node member) { return (member / Key)->location(); });
 50 |       std::sort(keys.begin(), keys.end());
 51 |       for (const Location& key : keys)
 52 |       {
 53 |         Nodes defs = object->lookdown(key);
 54 |         members.insert(members.end(), defs.begin(), defs.end());
 55 |       }
 56 |     }
 57 |     else
 58 |     {
 59 |       members.insert(members.end(), object->begin(), object->end());
 60 |     }
 61 | 
 62 |     for (std::size_t i = 0; i < members.size(); ++i)
 63 |     {
 64 |       Node member = members[i];
 65 |       assert(member == Member);
 66 | 
 67 |       if (settings.prettyprint)
 68 |       {
 69 |         os << new_indent;
 70 |       }
 71 | 
 72 |       write_value(os, settings, new_indent, member / Key);
 73 |       os << ":";
 74 | 
 75 |       if (settings.prettyprint)
 76 |       {
 77 |         os << " ";
 78 |       }
 79 | 
 80 |       write_value(os, settings, new_indent, member / Value);
 81 | 
 82 |       if (i < object->size() - 1)
 83 |       {
 84 |         os << ",";
 85 |       }
 86 | 
 87 |       if (settings.prettyprint)
 88 |       {
 89 |         os << std::endl;
 90 |       }
 91 |     }
 92 | 
 93 |     if (settings.prettyprint)
 94 |     {
 95 |       os << indent;
 96 |     }
 97 | 
 98 |     os << "}";
 99 |   }
100 | 
101 |   void write_array(
102 |     std::ostream& os,
103 |     const WriteSettings& settings,
104 |     const std::string& indent,
105 |     const Node& array)
106 |   {
107 |     if (array->empty())
108 |     {
109 |       os << "[]";
110 |       return;
111 |     }
112 | 
113 |     std::string new_indent = indent + settings.indent;
114 |     os << "[";
115 |     if (settings.prettyprint)
116 |     {
117 |       os << std::endl;
118 |     }
119 |     for (std::size_t i = 0; i < array->size(); ++i)
120 |     {
121 |       Node element = array->at(i);
122 |       if (settings.prettyprint)
123 |       {
124 |         os << new_indent;
125 |       }
126 |       write_value(os, settings, new_indent, element);
127 | 
128 |       if (i < array->size() - 1)
129 |       {
130 |         os << ",";
131 |       }
132 | 
133 |       if (settings.prettyprint)
134 |       {
135 |         os << std::endl;
136 |       }
137 |     }
138 |     if (settings.prettyprint)
139 |     {
140 |       os << indent;
141 |     }
142 |     os << "]";
143 |   }
144 | 
145 |   void write_value(
146 |     std::ostream& os,
147 |     const WriteSettings& settings,
148 |     const std::string& indent,
149 |     const Node& value)
150 |   {
151 |     if (value->in({Number, String, True, False, Null}))
152 |     {
153 |       os << value->location().view();
154 |     }
155 |     else if (value == Key)
156 |     {
157 |       os << '"' << value->location().view() << '"';
158 |     }
159 |     else if (value == Object)
160 |     {
161 |       write_object(os, settings, indent, value);
162 |     }
163 |     else if (value == Array)
164 |     {
165 |       write_array(os, settings, indent, value);
166 |     }
167 |     else if (value == Top)
168 |     {
169 |       write_value(os, settings, indent, value->front());
170 |     }
171 |     else
172 |     {
173 |       std::ostringstream message;
174 |       message << "Unexpected node type: " << value->type().str();
175 |       throw std::runtime_error(message.str());
176 |     }
177 |   }
178 | 
179 |   const auto ValueToken = T(Object, Array, String, Number, True, False, Null);
180 | 
181 |   // clang-format off
182 |   inline const auto wf_to_file =
183 |     json::wf
184 |     | (Top <<= File)
185 |     | (File <<= Path * Contents)
186 |     | (Contents <<= wf_value_tokens++[1])
187 |     ;
188 |   // clang-format on
189 | 
190 |   PassDef to_file(const std::filesystem::path& path)
191 |   {
192 |     return {
193 |       "to_file",
194 |       wf_to_file,
195 |       dir::bottomup | dir::once,
196 |       {
197 |         In(Top) * ValueToken++[Value] >>
198 |           [path](Match& _) {
199 |             return File << (Path ^ path.string()) << (Contents << _[Value]);
200 |           },
201 |       }};
202 |   }
203 | }
204 | 
205 | namespace trieste
206 | {
207 |   namespace json
208 |   {
209 |     Writer writer(
210 |       const std::filesystem::path& path,
211 |       bool prettyprint,
212 |       bool sort_keys,
213 |       const std::string& indent)
214 |     {
215 |       return Writer(
216 |         "json",
217 |         {to_file(path)},
218 |         json::wf,
219 |         [prettyprint, sort_keys, indent](std::ostream& os, Node contents) {
220 |           for (const Node& value : *contents)
221 |           {
222 |             write_value(os, {prettyprint, sort_keys, indent}, "", value);
223 |             os << std::endl;
224 |           }
225 |           return true;
226 |         });
227 |     }
228 | 
229 |     std::string to_string(
230 |       Node json, bool prettyprint, bool sort_keys, const std::string& indent)
231 |     {
232 |       WFContext context(json::wf);
233 |       std::ostringstream os;
234 |       write_value(os, {prettyprint, sort_keys, indent}, "", json);
235 |       return os.str();
236 |     }
237 |   }
238 | }
239 | 


--------------------------------------------------------------------------------
/samples/shrubbery/reader.cc:
--------------------------------------------------------------------------------
  1 | #include "shrubbery.h"
  2 | #include "wf.h"
  3 | 
  4 | namespace shrubbery
  5 | {
  6 |   auto err(const NodeRange& r, const std::string& msg)
  7 |   {
  8 |     return Error << (ErrorMsg ^ msg) << (ErrorAst << r);
  9 |   }
 10 | 
 11 |   auto err(Node node, const std::string& msg)
 12 |   {
 13 |     return Error << (ErrorMsg ^ msg) << (ErrorAst << node);
 14 |   }
 15 | 
 16 |   PassDef check_parsing()
 17 |   {
 18 |     return {
 19 |       "check parsing",
 20 |       wf_check_parser,
 21 |       dir::bottomup | dir::once,
 22 |       {
 23 |         // An empty block followed by alternatives is ignored
 24 |         (T(Group) << (!T(Block))++[Terms] * (T(Block) << End)) * (T(Group) << T(Alt))[Alt] >>
 25 |           [](Match& _) { return Seq << (Group << _[Terms])
 26 |                                     << _(Alt);
 27 |                        },
 28 | 
 29 |         (T(Block) << (T(Group)[Group] << T(Alt))) >>
 30 |           [](Match& _) { return Seq << *_[Group]; },
 31 | 
 32 |         // An empty group caused by a semicolon is ignored
 33 |         (In(Semi) * ((T(Group) << End))) >>
 34 |           [](Match&) { return Seq ^ ""; },
 35 | 
 36 |         // Commas must separate (non-empty) groups
 37 |         (T(Comma) << End)[Comma] >>
 38 |           [](Match& _) { return err(_[Comma], "Comma does not separate groups"); },
 39 | 
 40 |         (In(Comma) * (T(Group) << End)[Group]) >>
 41 |           [](Match& _) { return err(_[Group], "Comma does not separate groups"); },
 42 | 
 43 |         // A comma can only appear inside a paren, brace or bracket
 44 |         ((--In(Paren, Brace, Bracket)) * T(Comma)[Comma]) >>
 45 |           [](Match& _) { return err(_[Comma], "Commas can only separate groups in parentheses/braces/brackets"); },
 46 | 
 47 |         // Opener-closer pairs must have comma-separated groups
 48 |         (In(Paren, Brace, Bracket) * Any * Any)[Group] >>
 49 |           [](Match& _) { return err(_[Group], "Groups in parentheses/braces/brackets must be comma separated"); },
 50 | 
 51 |         // Opener-closer pairs cannot have semicolon-separated groups
 52 |         (In(Paren, Brace, Bracket, Comma) * T(Semi))[Semi] >>
 53 |           [](Match& _) { return err(_[Semi], "Semicolons cannot separate groups in parentheses/brackets/braces. Use commas."); },
 54 | 
 55 |         // Blocks cannot be empty, except immediately under opener-closer pairs
 56 |         // and as the only term in a top-level group
 57 |         (--(In(Paren, Brace, Bracket, Comma, File))) * ((T(Group) << ((!T(Block))++ * (T(Block)[Block] << End)))) >>
 58 |           [](Match& _) { return err(_[Block], "Blocks may not be empty"); },
 59 | 
 60 |         In(File) * (T(Group) << (((!T(Block)) * (!T(Block))++ * (T(Block)[Block] << End) * End))) >>
 61 |           [](Match& _) { return err(_[Block], "Blocks may not be empty"); },
 62 | 
 63 |         // Alternatives cannot be empty
 64 |         T(Alt)[Alt] << End >>
 65 |           [](Match& _) { return err(_[Alt], "Alternatives may not be empty"); },
 66 | 
 67 |       }
 68 |     };
 69 |   }
 70 | 
 71 |   // Alternatives belong to the preceeding Group and keep their contents in
 72 |   // blocks
 73 |   PassDef merge_alternatives()
 74 |   {
 75 |     return {
 76 |       "merge alternatives",
 77 |       wf_alternatives,
 78 |       dir::bottomup | dir::once,
 79 |       {
 80 |         // Alternatives keep their contents in a block
 81 |         (T(Alt)[Alt] << !T(Block)) >>
 82 |           [](Match& _) { return Alt << (Block << *_[Alt]); },
 83 | 
 84 |         // Move a trailing alternatives into the preceding group but do not
 85 |         // cross a comma or semi-colon. Merge the alternatives into one
 86 |         (--In(Comma, Semi) * T(Group)[Group] * ((T(Group) << T(Alt)[Alt]) * (T(Group) << T(Alt))++[Terms])) >>
 87 |           [](Match& _) {
 88 |             Node alt = _(Alt);
 89 |             for (auto& node : _[Terms]) {
 90 |               alt << node->front()->front(); // Group->Alt->Block
 91 |             }
 92 |             return _(Group) << alt;
 93 |           },
 94 |       }
 95 |     };
 96 |   }
 97 | 
 98 |   // Remove nodes for commas and semicolons and replace them by their children.
 99 |   PassDef drop_separators()
100 |   {
101 |     return {
102 |       "drop separators",
103 |       wf_no_semis_or_commas,
104 |       dir::bottomup | dir::once,
105 |       {
106 |         (T(Comma)[Comma]) >>
107 |           [](Match& _) { return Seq << *_[Comma]; },
108 | 
109 |         T(Semi)[Semi] >>
110 |           [](Match& _) { return Seq << *_[Semi]; },
111 |       }
112 |     };
113 |   }
114 | 
115 |   // Check that groups starting with alternatives only appear immediately under
116 |   // braces and brackets
117 |   PassDef check_alternatives()
118 |   {
119 |     return {
120 |       "check alternatives",
121 |       wf_no_semis_or_commas,
122 |       dir::bottomup | dir::once,
123 |       {
124 |         (--In(Brace, Bracket)) * T(Group) << T(Alt)[Alt] >>
125 |           [](Match& _) { return err(_[Alt], "Alternative cannot appear first in a group"); },
126 |       }
127 |     };
128 |   }
129 | 
130 |   // Structure groups so that they contain their atoms in a Contents node,
131 |   // followed by a
132 |   PassDef group_structure()
133 |   {
134 |     return {
135 |       "group structure",
136 |       wf,
137 |       dir::bottomup | dir::once,
138 |       {
139 |         In(Group) * Start * (!T(Block, Alt))++[Atom] * ~T(Block)[Block] * ~T(Alt)[Alt] * End >>
140 |           [](Match& _) {
141 |             return Seq << (Terms << _[Atom])
142 |                        << (_(Block)? _(Block): None)
143 |                        << (_(Alt)? _(Alt): None);
144 |           },
145 | 
146 |         // Groups cannot be empty
147 |         T(Group)[Group] << End >>
148 |           [](Match& _) { return err(_[Group], "Groups cannot be empty"); },
149 | 
150 |         // Overly permissive wf rules from before allows groups to have
151 |         // impossible structure. To pass fuzz testing, we add this rule
152 |         T(Group)[Group] << !T(Terms) >>
153 |           [](Match& _) { return err(_[Group], "Should never happen"); },
154 |       }
155 |     };
156 |   }
157 | 
158 | 
159 |   Reader reader()
160 |   {
161 |     return {
162 |       "shrubbery",
163 |       {
164 |         check_parsing(),
165 |         merge_alternatives(),
166 |         drop_separators(),
167 |         check_alternatives(),
168 |         group_structure(),
169 |       },
170 |       parser(),
171 |     };
172 |   }
173 | }
174 | 


--------------------------------------------------------------------------------
/notes/regions.md:
--------------------------------------------------------------------------------
  1 | # Regions again
  2 | 
  3 | *TODO*
  4 | - what if I want to return `(A & in, B & out)`?
  5 |   - what's the capability of the returned tuple?
  6 | - what if a lambda wants to capture an `out` as a free variable?
  7 |   - what's the capability of the lambda?
  8 | - need a `local` or `stack` capability?
  9 |   - seems painful to write functions that accept `in | local`
 10 | 
 11 | ## Four Capabilities
 12 | 
 13 | `in`
 14 |   can put things in its fields
 15 |   can be put in a field
 16 | `out`
 17 |   can't put things in its fields
 18 |   can't be put in a field
 19 | `const`
 20 |   can't put things in its fields
 21 |   can be put in a field
 22 | 
 23 | enter `region[T] & in`
 24 |   get `T & in` in the lambda
 25 |   ```ts
 26 |   enter (x: region[T] & in)
 27 |   {
 28 |     x: T & in => ...
 29 |   }
 30 |   ```
 31 |   free variables that are `in` become `out`
 32 |   region rc++
 33 | 
 34 | enter `region[T] & out`
 35 |   get `T & out` in the lambda
 36 |   ```ts
 37 |   enter (x: region[T] & out)
 38 |   {
 39 |     x: T & out => ...
 40 |   }
 41 |   ```
 42 |   free variables that are `in` become `out`
 43 |   region rc++
 44 |   *safe even if the target region is `in` in some context*
 45 |   *doesn't allow any `in` region to coexist with a non-ancestor `out` region*
 46 | 
 47 | lateral-enter `region[T] & out`
 48 |   get `T & in` in the lambda
 49 |   ```ts
 50 |   enter (x: region[T] & out)
 51 |   {
 52 |     x: T & in => ... // exciting
 53 |   }
 54 |   ```
 55 |   fails if region rc != 1
 56 |   *maybe not rc=1? because of cown aliasing*
 57 |   *there's already an `in` or `out` reference, so we would coexist*
 58 |   *could be separate from the alias rc*
 59 |   free variables that are `in` become `out`
 60 |   region rc++
 61 | 
 62 | explore `region[T] & (in | out)`
 63 |   get `T & out` in the lambda
 64 |   ```ts
 65 |   explore (x: region[T] & (in | out))
 66 |   {
 67 |     x: T & out => ...
 68 |   }
 69 |   ```
 70 |   fails if region == open region
 71 |   *would cause `in` and `out` to coexist*
 72 |   free variables that are `in` *stay* `in`
 73 |   region rc++
 74 | 
 75 | exit
 76 |   region rc--
 77 | 
 78 | ```ts
 79 | class region[T]
 80 | {
 81 |   var val: in.T
 82 |   var region_rc: usize = 0
 83 | 
 84 |   exit(self) = (ref self.region_rc)--
 85 | 
 86 |   explore(self): out.T | throw AlreadyOpen
 87 |   {
 88 |     if ($regions.top == self) {throw AlreadyOpen}
 89 |     (ref self.region_rc)++
 90 |     self.val
 91 |   }
 92 | 
 93 |   freeze(self: in): const.T | throw NotUnique
 94 |   {
 95 | 
 96 |   }
 97 | 
 98 |   move(self): (region[T] & in) | throw NotUnique
 99 | }
100 | ```
101 | 
102 | ## Viewpoint Adaption
103 | 
104 | It's ok to use `out` in a field position. It means the same thing as `in` due to viewpoint adaptation: `in.(T & out)` = `T & in`, etc.
105 | 
106 | Field access on `C & k` gives `k.(ref[C.f] & in)`. This means that references to fields of a `lin` object are `in`, not `lin`. Linearity is for the individual object, not the transitive closure.
107 | 
108 | ```ts
109 | k ∈ capability := lin | in | out | const
110 | 
111 | // Note that lin.k and in.k are the same.
112 | lin.k   = ∅ if k = lin
113 |           in if k = out
114 |           k otherwise
115 | in.k    = ∅ if k = lin
116 |           in if k = out
117 |           k otherwise
118 | out.k   = out if k ∈ {lin, in}
119 |           k otherwise
120 | const.k = const
121 | 
122 | k.(T1 & T2) = k.T1 & k.T2
123 | k.(T1 | T2) = k.T1 | k.T2
124 | k.(T1, T2) = k.T1, k.T2
125 | k.C = C
126 | (T1 & T2).T = T1.T & T2.T
127 | (T1 | T2).T = T1.T | T2.T
128 | (T1, T2).T = ∅
129 | C.T = ∅
130 | 
131 | // K, TP, C, I, A, V, tuple, |, &, list, (), true, false
132 | T*.{C, I, tuple, (), true, false} = rhs
133 | T*.(T1 | T2) = T*.T1 | T*.T2
134 | T*.(T1 & T2) = T*.T1 | T*.T2
135 | T*.(T1...) = (T*.T1)...
136 | T*.A = T*.(def A)
137 | 
138 | // rhs = K, TP
139 | 
140 | ```
141 | 
142 | ## Ref[T]
143 | 
144 | ```ts
145 | 
146 | class Ref[T]
147 | {
148 |   var val: T
149 | 
150 |   // we lose any disjunction that is `lin` when loading
151 |   load(self): self.T = self.val
152 | 
153 |   // T is unadapted, so if T is `lin`, we accept and return it as `lin`
154 |   // TODO: has to be adapted, eg T: String & out should yield String & in
155 |   store(self: in, val: T): T = (self.val = val)
156 | 
157 |   // fully specified
158 |   load[$self: Ref[T] & (in | out | const)](self: $self): $self.T = self.val
159 | 
160 |   store[$self: Ref[T] & (in | out | count) & in](self: $self, val: $self.T):
161 |     $self.T = (self.val = val)
162 | }
163 | ```
164 | 
165 | ## No Capability
166 | 
167 | If a disjunction in a type has no capability after being fully worked out, we want it to be an efficient `readonly` type. Adding `& (in | out | const)` to it works (note that we leave out `lin`), but results in expensive dynamic reference counting.
168 | 
169 | If, at the end of compilation, all types are replaced with type parameters that have the type as an upper bound, this gives the most efficient way to handle both structural types and `readonly` capabilities.
170 | 
171 | ## Example
172 | 
173 | ```ts
174 | // no need for `stack`? escape analysis for stack allocation
175 | when(a0: cown[A], b0: cown[B])
176 | {
177 |   a1: region[A] & in, b1: region[B] & in =>
178 |   // we can enter an `in` region, but it's only sendable if it has rc=1.
179 |   // if a1 and b1 are aliases, then we can't send them.
180 |   enter a1
181 |   {
182 |     a2: A & in =>
183 | 
184 |     // on enter, all free variables are `out`
185 |     b1: region[B] & out
186 |     enter b1
187 |     {
188 |       b1f: B & out =>
189 |       ...
190 |     }
191 | 
192 |     enter(a2.f: region[AF] & in)
193 |     {
194 |       // af.rc = 2, one for a2.f, one for enter
195 |       af: AF & in =>
196 |       // a2 is also `out` now
197 |       a2: A & out
198 |       // if we read ourselves, we would get an `out` region
199 |       a2.f: region[T2] & out
200 |     }
201 | 
202 |     // af.rc = 1
203 |     let r0: region[AF] & in = a2.f
204 |     // af.rc = 2
205 |     enter(r0: region[AF] & in)
206 |     {
207 |       // af.rc = 3
208 |       af: AF & in =>
209 |       enter(b1: region[B] & out)
210 |       {
211 |         // b.rc = 1, no rc-inc on an out region
212 |         // if b is an alias of a, does anything go wrong?
213 |         b2: B & out =>
214 |         enter(b2.f: region[BF] & out)
215 |         {
216 |           // bf.rc = 1, no rc-inc on an out region
217 |           bf: BF & out =>
218 |           ...
219 |         }
220 |       }
221 |     }
222 | 
223 |     // could we `out` read an `in` region safely?
224 |     // yes, if the `in` region is `out` - which it is, because all free 
225 |     // variables are `out`
226 | 
227 |     // a region is only sendable if it has rc=1
228 |     // will get collected when rc=0
229 |   }
230 | }
231 | ```
232 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.14.0)
  2 | project(trieste VERSION 1.0.0 LANGUAGES CXX)
  3 | 
  4 | # #############################################
  5 | # Options
  6 | option(TRIESTE_ENABLE_TESTING "Specifies whether to enable Trieste's tests" OFF)
  7 | option(TRIESTE_BUILD_SAMPLES "Specifies whether to build the samples" ON)
  8 | option(TRIESTE_BUILD_PARSERS "Specifies whether to build the parsers" ON)
  9 | option(TRIESTE_BUILD_PARSER_TESTS "Specifies whether to build the parser tests" OFF)
 10 | option(TRIESTE_BUILD_PARSER_TOOLS "Specifies whether to build parser tools" OFF)
 11 | option(TRIESTE_USE_CXX17 "Specifies whether to target the C++17 standard" OFF)
 12 | option(TRIESTE_CLEAN_INSTALL "Specifies whether to delete all files (recursively) from the install prefix before install" OFF)
 13 | option(TRIESTE_USE_SNMALLOC "Specifies that new/delete should be overridden with snmalloc" ON)
 14 | 
 15 | set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
 16 | 
 17 | # #############################################
 18 | # Dependencies
 19 | 
 20 | include(FetchContent)
 21 | 
 22 | if(TRIESTE_USE_CXX17)
 23 |   set(CMAKE_CXX_STANDARD 17)
 24 | else()
 25 |   set(CMAKE_CXX_STANDARD 20)
 26 | endif()
 27 | 
 28 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 29 | 
 30 | set(SNMALLOC_BUILD_TESTING OFF CACHE INTERNAL "Turn off snmalloc tests")
 31 | set(SNMALLOC_USE_CXX17 ${TRIESTE_USE_CXX17})
 32 | 
 33 | set(RE2_BUILD_TESTING OFF CACHE INTERNAL "Turn off RE2 tests")
 34 | 
 35 | set(TRIESTE_SANITIZE "" CACHE STRING "Argument to pass to sanitize (disabled by default)")
 36 | 
 37 | # Used to provide
 38 | #  FetchContent_MakeAvailable_ExcludeFromAll
 39 | FetchContent_Declare(
 40 |     cmake_utils
 41 |     GIT_REPOSITORY https://github.com/mjp41/cmake_utils
 42 |     GIT_TAG 2bf98b5773ea7282197c823e205547d8c2e323c0
 43 |     GIT_SHALLOW FALSE
 44 | )
 45 | 
 46 | FetchContent_MakeAvailable(cmake_utils)
 47 | 
 48 | 
 49 | FetchContent_Declare(
 50 |   snmalloc
 51 |   GIT_REPOSITORY https://github.com/microsoft/snmalloc
 52 |   GIT_TAG b8e28be14b3fd98e27c2fe87c0296570f6d3990e
 53 |   # per https://gitlab.kitware.com/cmake/cmake/-/issues/17770, do not use GIT_SHALLOW here
 54 |   # or it will silently break new builds days later, once the commit hash above no longer
 55 |   # matches a named branch, like main.
 56 |   # Tracking a named branch with GIT_SHALLOW enabled is ok, however.
 57 |   # GIT_SHALLOW TRUE
 58 | )
 59 | 
 60 | FetchContent_MakeAvailable_ExcludeFromAll(snmalloc)
 61 | 
 62 | FetchContent_Declare(
 63 |   re2
 64 |   GIT_REPOSITORY https://github.com/google/re2
 65 |   GIT_TAG 2022-12-01
 66 |   GIT_SHALLOW TRUE
 67 | )
 68 | 
 69 | FetchContent_MakeAvailable_ExcludeFromAll(re2)
 70 | 
 71 | FetchContent_Declare(
 72 |   cli11
 73 |   GIT_REPOSITORY https://github.com/CLIUtils/CLI11
 74 |   GIT_TAG 4160d259d961cd393fd8d67590a8c7d210207348
 75 |   GIT_SHALLOW TRUE
 76 | )
 77 | 
 78 | FetchContent_MakeAvailable_ExcludeFromAll(cli11)
 79 | 
 80 | # #############################################
 81 | # Create target and set properties
 82 | add_library(trieste INTERFACE)
 83 | 
 84 | # Add an alias so that library can be used inside the build tree, e.g. when testing
 85 | add_library(trieste::trieste ALIAS trieste)
 86 | 
 87 | # Set target properties
 88 | target_include_directories(trieste
 89 |   INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
 90 |   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
 91 | )
 92 | 
 93 | target_link_libraries(trieste
 94 |   INTERFACE
 95 |   re2::re2
 96 |   CLI11::CLI11
 97 |   snmalloc
 98 | )
 99 | 
100 | if(TRIESTE_USE_SNMALLOC)
101 |   if(TRIESTE_SANITIZE)
102 |     message(WARNING "Sanitizers are not supported with snmalloc, disabling snmalloc.")
103 |   else()
104 |     target_link_libraries(trieste INTERFACE snmalloc-new-override)
105 |   endif()
106 | endif()
107 | 
108 | if(TRIESTE_USE_CXX17)
109 |   target_compile_definitions(trieste INTERFACE cxx_std_17 TRIESTE_USE_CXX17)
110 | else()
111 |   target_compile_definitions(trieste INTERFACE cxx_std_20)
112 | endif()
113 | 
114 | if (TRIESTE_SANITIZE)
115 |   target_compile_options(trieste INTERFACE -g -fsanitize=${TRIESTE_SANITIZE} -fno-omit-frame-pointer)
116 |   target_link_libraries(trieste INTERFACE -fsanitize=${TRIESTE_SANITIZE})
117 | endif()
118 | 
119 | function(enable_warnings target)
120 |   if(MSVC)
121 |     target_compile_options(${target} PRIVATE /W4 /WX /wd5030 /bigobj)
122 |   else()
123 |     target_compile_options(${target} PRIVATE
124 |       -Wall -Wextra -Wpedantic -Werror -Wshadow)
125 |   endif()
126 | endfunction()
127 | 
128 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
129 |   target_compile_options(trieste INTERFACE -Wmismatched-tags -fstandalone-debug)
130 | endif()
131 | 
132 | if(TRIESTE_CLEAN_INSTALL)
133 |   message("${CMAKE_INSTALL_PREFIX} will be recursively cleaned before install")
134 |   # Clear all existing files and folders from the install directory
135 |   install(CODE [[
136 |     file(REMOVE_RECURSE ${CMAKE_INSTALL_PREFIX}/.)
137 |     ]])
138 | endif()
139 | 
140 | # #############################################
141 | # Installation instructions
142 | set(CMAKE_INSTALL_PREFIX ${PROJECT_BINARY_DIR}/dist)
143 | 
144 | set(INSTALL_DEPS snmalloc re2 CLI11)
145 | if (TRIESTE_USE_SNMALLOC)
146 |   list(APPEND INSTALL_DEPS snmalloc-new-override)
147 | endif()
148 | 
149 | install(TARGETS trieste ${INSTALL_DEPS}
150 |   EXPORT ${PROJECT_NAME}_Targets
151 |   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
152 |   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
153 |   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
154 | )
155 | 
156 | # Create a ConfigVersion.cmake file
157 | include(CMakePackageConfigHelpers)
158 | write_basic_package_version_file(
159 |   ${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
160 |   VERSION ${PROJECT_VERSION}
161 |   COMPATIBILITY AnyNewerVersion
162 | )
163 | 
164 | configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/triesteConfig.cmake.in
165 |   ${PROJECT_BINARY_DIR}/triesteConfig.cmake
166 |   INSTALL_DESTINATION
167 |   ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake
168 | )
169 | 
170 | install(EXPORT ${PROJECT_NAME}_Targets
171 |   FILE ${PROJECT_NAME}Targets.cmake
172 |   NAMESPACE ${PROJECT_NAME}::
173 |   DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
174 | 
175 | install(FILES ${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
176 |   ${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
177 |   DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake)
178 | 
179 | install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/trieste DESTINATION include)
180 | 
181 | # #############################################
182 | # # Exporting from the build tree
183 | export(EXPORT ${PROJECT_NAME}_Targets
184 |   FILE ${CMAKE_CURRENT_BINARY_DIR}/triesteTargets.cmake
185 |   NAMESPACE trieste::)
186 | 
187 | # Register package in the User Package Registry
188 | export(PACKAGE trieste)
189 | 
190 | # #############################################
191 | # # Add core Trieste tests
192 | if(TRIESTE_ENABLE_TESTING)
193 |   enable_testing()
194 |   add_subdirectory(test)
195 | endif()
196 | 
197 | # #############################################
198 | # # Add samples
199 | if(TRIESTE_BUILD_SAMPLES)
200 |   enable_testing()
201 |   add_subdirectory(samples/infix)
202 |   add_subdirectory(samples/shrubbery)
203 | endif()
204 | 
205 | # #############################################
206 | # # Add parsers
207 | if(TRIESTE_BUILD_PARSERS)
208 |   add_subdirectory(parsers)
209 | endif()
210 | 


--------------------------------------------------------------------------------
/cmake/testsuite.cmake:
--------------------------------------------------------------------------------
  1 | find_program(DIFF_TOOL NAMES
  2 |   diff)
  3 | 
  4 | set(DIR_OF_TESTSUITE_CMAKE ${CMAKE_CURRENT_LIST_DIR})
  5 | 
  6 | if (DIFF_TOOL STREQUAL DIFF_TOOL-NOTFOUND)
  7 |   set(DIFF_TOOL "")
  8 | endif()
  9 | 
 10 | # How to use this testsuite system.
 11 | #  In a directory with the testsuite files, create a CMakeLists.txt file.
 12 | #  * Include this file.
 13 | #  * Call the testsuite function with the name of the tool.
 14 | # E.g. something like this:
 15 | #
 16 | #   include (${CMAKE_SOURCE_DIR}/cmake/testsuite.cmake)
 17 | #   testsuite(infix)
 18 | #
 19 | #  The testsuite function will find all adjacent .cmake files they should contain the following:
 20 | #  *  A variable TESTSUITE_REGEX, which specifies which files are to be considered a test.
 21 | #     This should be a regular expression that matches the test files. E.g. "test_type/.*\\.infix"
 22 | #     which matches all files with the .infix extension in the test_type directory.
 23 | #  *  A variable TESTSUITE_EXE which is the executable to run for the tests.  This can be a generator expression
 24 | #     to allow for different executables in different configurations.
 25 | #  *  A macro toolinvoke which takes the arguments ARGS, local_build, testfile and outputdir.
 26 | #     This macro should set ARGS to the command line arguments for the tool.
 27 | #  *  A function test_output_dir which takes the output directory out and the test file test.
 28 | #     This function should set out to the output directory for the test. This is relative to
 29 | #
 30 | # An example of this is in samples/infix/testsuite/infix.cmake.
 31 | function(testsuite name)
 32 |   message(STATUS "Building test suite: ${name}")
 33 |   # Iterate each tool
 34 |   set(UPDATE_DUMPS_TARGETS)
 35 |   # Each test collection has its own cmake file for its configuration.
 36 |   file (GLOB test_collections CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cmake)
 37 |   file (GLOB_RECURSE all_files CONFIGURE_DEPENDS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
 38 | 
 39 |   foreach(test_collection ${test_collections})
 40 |     set (test_set)
 41 |     
 42 |     # Grab specific settings for this tool
 43 |     include(${CMAKE_CURRENT_SOURCE_DIR}/${test_collection})
 44 | 
 45 |     set (tests ${all_files})
 46 |     list(FILTER tests
 47 |       INCLUDE REGEX
 48 |       ${TESTSUITE_REGEX}
 49 |     )
 50 | 
 51 |     foreach(test ${tests})
 52 |       test_output_dir(output_dir_relative ${test})
 53 |       get_filename_component(test_dir ${test} DIRECTORY)
 54 |       get_filename_component(test_file ${test} NAME)
 55 |       # Create command to create the output for this test.
 56 |       set (output_dir ${CMAKE_CURRENT_BINARY_DIR}/${output_dir_relative})
 57 |       set (test_output_cmd 
 58 |         ${CMAKE_COMMAND}
 59 |           -DTESTFILE=${test_file}
 60 |           -DTEST_EXE=${TESTSUITE_EXE}
 61 |           -DWORKING_DIR=${CMAKE_CURRENT_SOURCE_DIR}/${test_dir}
 62 |           -DCOLLECTION=${CMAKE_CURRENT_SOURCE_DIR}/${test_collection}
 63 |           -DOUTPUT_DIR=${output_dir}
 64 |           -P ${DIR_OF_TESTSUITE_CMAKE}/runcommand.cmake
 65 |       )
 66 | 
 67 |       # Add test that rebuilds the compiler output
 68 |       add_test(NAME ${output_dir_relative}
 69 |         COMMAND ${test_output_cmd}
 70 |       )
 71 | 
 72 |       # Add command that rebuilts the compiler output for updating golden files.
 73 |       add_custom_command(OUTPUT "${output_dir_relative}_fake"
 74 |         COMMAND ${test_output_cmd}
 75 |       )
 76 |       set_source_files_properties("${output_dir_relative}_fake" PROPERTIES SYMBOLIC "true")
 77 |       list(APPEND test_set "${output_dir_relative}_fake")
 78 | 
 79 |       # Make json for debugging.
 80 |       toolinvoke(launch_json_args ${test_file} ${output_dir})
 81 |       # Convert to a json format list.
 82 |       string(REPLACE "\"" "\\\"" launch_json_args "${launch_json_args}")
 83 |       string(REPLACE ";" "\", \"" launch_json_args "${launch_json_args}")
 84 |       list(APPEND LAUNCH_JSON
 85 |   "    {
 86 |         \"name\": \"${output_dir_relative}\",
 87 |         \"type\": \"cppdbg\",
 88 |         \"request\": \"launch\",
 89 |         \"program\": \"${TESTSUITE_EXE}\",
 90 |         \"args\": [\"${launch_json_args}\"],
 91 |         \"stopAtEntry\": false,
 92 |         \"cwd\": \"${CMAKE_CURRENT_SOURCE_DIR}/${test_dir}\",
 93 |       },")
 94 | 
 95 |       # Add output comparison for each golden / output file
 96 |       set (golden_dir  ${CMAKE_CURRENT_SOURCE_DIR}/${output_dir_relative} )
 97 |       file (GLOB_RECURSE results CONFIGURE_DEPENDS RELATIVE ${golden_dir} ${golden_dir}/*)
 98 |       # Check if there are any files to compare for this test.
 99 |       list(LENGTH results res_length)
100 |       if(res_length EQUAL 0)
101 |         message(WARNING "Test does not have results directory: ${golden_dir}\nRun `update-dump` to generate golden files.")
102 |         # Add to generate golden output target
103 |         add_custom_command(OUTPUT ${output_dir_relative}_fake
104 |           COMMAND
105 |             ${CMAKE_COMMAND}
106 |             -E make_directory
107 |             ${golden_dir}
108 |           APPEND
109 |         )
110 |         add_custom_command(OUTPUT ${output_dir_relative}_fake
111 |           COMMAND
112 |             ${CMAKE_COMMAND}
113 |             -E copy_if_different
114 |             ${output_dir}/*
115 |             ${golden_dir}/
116 |           APPEND
117 |         )
118 |       else()
119 |         foreach (result ${results})
120 |           # Check each file is correct as a test target
121 |           add_test (NAME ${output_dir_relative}-${result}
122 |             COMMAND 
123 |               ${CMAKE_COMMAND}
124 |                 -Doriginal_file=${golden_dir}/${result} 
125 |                 -Dnew_file=${output_dir}/${result}
126 |                 -Ddiff_tool=${DIFF_TOOL}
127 |                 -P ${DIR_OF_TESTSUITE_CMAKE}/compare.cmake
128 |           )
129 |           set_tests_properties(${output_dir_relative}-${result} PROPERTIES DEPENDS ${output_dir_relative})
130 | 
131 |           # Override out of date files.
132 |           add_custom_command(OUTPUT "${output_dir_relative}_fake"
133 |             COMMAND
134 |               ${CMAKE_COMMAND}
135 |               -E copy_if_different
136 |               ${output_dir}/${result}
137 |               ${golden_dir}/${result}
138 |             APPEND
139 |           )
140 |         endforeach()
141 |         # All tests require an error_code.
142 |         add_custom_command(OUTPUT "${output_dir_relative}_fake"
143 |           COMMAND
144 |             ${CMAKE_COMMAND}
145 |             -E copy_if_different
146 |             ${output_dir}/exit_code.txt
147 |             ${golden_dir}/exit_code.txt
148 |           APPEND
149 |         )
150 | 
151 |       endif()
152 |     endforeach()
153 |     add_custom_target("update-dump-${test_collection}" DEPENDS ${test_set})
154 |     list(APPEND UPDATE_DUMPS_TARGETS "update-dump-${test_collection}")
155 |   endforeach()
156 | 
157 |   string(REPLACE ";" "\n" LAUNCH_JSON2 "${LAUNCH_JSON}")
158 | 
159 |   if (TRIESTE_GENERATE_LAUNCH_JSON)
160 |     file(GENERATE OUTPUT ${CMAKE_SOURCE_DIR}/.vscode/launch.json
161 |       CONTENT
162 |   "{
163 |     \"version\": \"0.2.0\",
164 |     \"configurations\": [
165 |       ${LAUNCH_JSON2}
166 |     ]
167 |   }")
168 |   endif()
169 | 
170 | 
171 |   if (TARGET update-dump)
172 |     add_dependencies(update-dump ${UPDATE_DUMPS_TARGETS})
173 |   else()
174 |     add_custom_target(update-dump DEPENDS ${UPDATE_DUMPS_TARGETS})
175 |   endif()
176 | endfunction()


--------------------------------------------------------------------------------
/notes/control-flow.md:
--------------------------------------------------------------------------------
  1 | # Control Flow
  2 | 
  3 | ## Non-local Returns
  4 | 
  5 | The result of every call is checked. If it's a `nonlocal[T]`, then it's immediately returned by the function or lambda. A function first unwraps the non-local value by calling `load` on it, whereas a lambda doesn't.
  6 | 
  7 | This allows `throw[T]` to be implemented as a `nonlocal[throw[T]]`, such that it propagates upwards until it's explicitly caught, while `return[T]` can be implemented as `nonlocal[T]`, such that it causes the calling function to return a value of type `T`.
  8 | 
  9 | If a call is syntactically marked as `try`, then the check for a non-local value is suppressed.
 10 | 
 11 | ## Altering the default behavior
 12 | 
 13 | A function can be made to behave as a lambda with `try`. A lambda can be made to behave like a function as follows:
 14 | 
 15 | ```rust
 16 | let f = { x -> ... }
 17 | let f = { x -> returning { ... } }
 18 | 
 19 | returning[T1, T2](f: ()->(non_local[T1] | T2)): T1 | T2
 20 | {
 21 |   match (try f())
 22 |   {
 23 |     { x: nlr[T1] -> x.load() }
 24 |     { x: T2 -> x }
 25 |   }
 26 | }
 27 | 
 28 | (return[A] | throw[B]) <: non_local[A | throw[B]]
 29 | (return[C] | return[D]) <: non_local[C | D]
 30 | 
 31 | let x = try f() // x: T1 | non_local[T2]
 32 | let y = { x1 -> x1 } x // y: T1
 33 | 
 34 | match x
 35 | {
 36 |   { x1: T1 -> ... }
 37 |   { x2: non_local[T2] -> ... }
 38 | }
 39 | 
 40 | ```
 41 | 
 42 | ## NLRCheck
 43 | 
 44 | ```rust
 45 | type non_local[T] =
 46 | {
 47 |   trait_non_local(): ()
 48 |   load(self): Self.T
 49 | }
 50 | 
 51 | class return[T]: non_local[T]
 52 | {
 53 |   let value: T
 54 | 
 55 |   trait_non_local(): () = ()
 56 |   load(self): Self.T = self.value
 57 | 
 58 |   create(): return[()] = return[()]::create(())
 59 |   create(value: T): return[T] = new value
 60 |   up(value: T): return[return[T]] = return[return[T]]::create(new value)
 61 | }
 62 | 
 63 | class throw[T]: non_local[throw[T]]
 64 | {
 65 |   let value: T
 66 | 
 67 |   trait_non_local(): () = ()
 68 |   load(self): self.throw[T] = self
 69 | 
 70 |   create(): throw[()] = throw[()]::create(())
 71 |   create(value: T): value.throw[T] = new value
 72 | }
 73 | 
 74 | type Break = throw[break & const]
 75 | type Continue = throw[continue & const]
 76 | 
 77 | class break
 78 | {
 79 |   create(): Break = throw(new const)
 80 | }
 81 | 
 82 | class continue
 83 | {
 84 |   create(): Continue = throw(new const)
 85 | }
 86 | 
 87 | f()
 88 | {
 89 |   try
 90 |   {
 91 |     throw x // as expected, `try` catches this
 92 |     return y // unexpected: `try` also catches this
 93 |   }
 94 | 
 95 |   y
 96 | 
 97 |   match try
 98 |   {
 99 |     ...
100 |   }
101 |   {
102 |     { x: return[T] -> x }
103 |   }
104 | }
105 | 
106 | // TODO: establishing `value: T1 & ¬T2` in the `else` branch
107 | catch(value: T1, handlers: T2->T3): (T1 & ¬T2) | T3
108 | {
109 |   if value
110 |   {
111 |     x: T2 -> handler value
112 |   }
113 |   else
114 |   {
115 |     value
116 |   }
117 | }
118 | 
119 | f()
120 | {
121 |   for iter
122 |   {
123 |     subiter ->
124 |     for subiter
125 |     {
126 |       value ->
127 |       if something
128 |       {
129 |         // want to return from f here
130 |         // `if` catches it and hands it to the subiter lambda
131 |         // the subiter catches it and returns it to `for subiter`
132 |         // `for subiter` catches it and hands it to the iter lambda
133 |         // the iter catches it and returns it to `for iter`
134 |         // `for iter` catches it (explicit) and hands it to f()
135 |         // f() catches it, unwraps it, and returns it to the caller
136 |         return value
137 |       }
138 | 
139 |       if something
140 |       {
141 |         throw "fail"
142 |       }
143 | 
144 |       // if we end here, `for` gets a 3
145 |       // it probably just continues the loop
146 |       // but it could build a list of values, track the last returned value,
147 |       // or something else
148 |       3
149 | 
150 |       // if we end here, f() returns `value`
151 |       return value
152 |     }
153 |   }
154 | 
155 |   if something
156 |   {
157 |     if something_else
158 |     {
159 |       // if we end here, f() returns `value`
160 |       return value
161 |     }
162 |   }
163 | 
164 |   // immediate return if we end here
165 |   value
166 | 
167 |   // also an immediate return, because the function unwraps it
168 |   return value
169 | }
170 | 
171 | for[T1, T2, T3](iter: Iterator[T1], body: T1->(non_local[T2] | T3)):
172 |   (non_local[T2] & ¬throw[Break] & ¬throw[Continue]) | ()
173 | {
174 |   try
175 |   {
176 |     while (iter.has_next)
177 |     {
178 |       // If this is a non-local, it goes to the `body()` call in `while`,
179 |       // which returns it to our `try`, which forwards it to the caller.
180 |       body iter.next
181 |     }
182 |   }
183 | }
184 | 
185 | while[T1, T2](cond: ()->Bool, body: ()->(non_local[T1] | T2)):
186 |   (non_local[T1] & ¬throw[Break] & ¬throw[Continue]) | ()
187 | {
188 |   if (cond())
189 |   {
190 |     match try body()
191 |     {
192 |       { _: throw[Break] -> () }
193 |       { _: throw[Continue] -> while cond body }
194 |       { r: non_local[T1] -> r }
195 |       { _: T2 -> while cond body }
196 |     }
197 |   }
198 | }
199 | 
200 | catch[T, U, V](value: T, body: U->V): T | V
201 | {
202 |   if value
203 |   {
204 |     x: U ->
205 |     // Don't unwrap any return[T]
206 |     try body x
207 |   }
208 |   else
209 |   {
210 |     value
211 |   }
212 | }
213 | 
214 | ```
215 | 
216 | ## Conditionals
217 | 
218 | Two types of conditional are built in.
219 | 
220 | ```ts
221 | // boolean conditional
222 | // executes the lambda if `cond` is true
223 | if cond
224 | {
225 |   ...
226 | }
227 | 
228 | // type conditional
229 | // executes the lambda if `value <: T`
230 | if value
231 | {
232 |   x: T -> ...
233 | }
234 | ```
235 | 
236 | ## Pattern Matching
237 | 
238 | The result of the match should be:
239 | - A `return[T]` for the result of any successful match.
240 | - A `throw[E]` for an error result of any successful match.
241 | - A `match[T]` if not yet matched.
242 | 
243 | What's needed?
244 | - structural pattern
245 | - type pattern
246 | 
247 | What about exhaustive matching?
248 | 
249 | ```ts
250 | class match[T]
251 | {
252 |   class NoMatch{}
253 | 
254 |   let value: T
255 | 
256 |   create(value: T): match[T] & lin = new (value, true)
257 | 
258 |   |[G: {==(G, T1): Bool}, T1, T2, E](
259 |     self: lin, guard: G, case: T1->(T2 | throw E)):
260 |     ((match[T] | match[T \ T1] | matched[T2]) & lin) | throw E
261 |   {
262 |     if self.value
263 |     {
264 |       x: T1 ->
265 |       if (guard == x)
266 |       {
267 |         matched(case x)
268 |       }
269 |       else
270 |       {
271 |         match(x)
272 |       }
273 |     }
274 |     else
275 |     {
276 |       x: T \ T1 -> match(x)
277 |     }
278 |   }
279 | 
280 |   |[T1, T2, E](self: lin, case: T1->(T2 | throw E)):
281 |     ((match[T \ T1] | matched[T2]) & lin) | throw E
282 |   {
283 |     if self.value
284 |     {
285 |       x: T1 -> matched(case x)
286 |     }
287 |     else
288 |     {
289 |       x: T \ T1 -> match(x)
290 |     }
291 |   }
292 | 
293 |   end(self: lin): throw NoMatch
294 |   {
295 |     throw NoMatch
296 |   }
297 | }
298 | 
299 | class matched[T]
300 | {
301 |   let value: T
302 | 
303 |   create(value: T): matched[T] & lin = new value
304 | 
305 |   |[G: {==(G, T1): Bool}, T1, T2](self: lin, guard: G, case: T1->T2): Self =
306 |     self
307 | 
308 |   |[T1, T2](self: lin, case: T1->T2): Self = self
309 | 
310 |   end(self: lin): T = self.value
311 | }
312 | 
313 | // match[type x] | matched[type case1] | matched[type case2] | throw <E>
314 | match x
315 |   | true { 0 }
316 |   | { a, b -> a + b }
317 |   end
318 | ```
319 | 


--------------------------------------------------------------------------------
/include/trieste/reader.h:
--------------------------------------------------------------------------------
  1 | // Copyright Microsoft and Project Verona Contributors.
  2 | // SPDX-License-Identifier: MIT
  3 | #pragma once
  4 | 
  5 | #include "parse.h"
  6 | #include "passes.h"
  7 | 
  8 | #include <optional>
  9 | #include <variant>
 10 | 
 11 | namespace trieste
 12 | {
 13 |   class Reader
 14 |   {
 15 |   private:
 16 |     constexpr static auto parse_only = "parse";
 17 | 
 18 |     using InputSpec =
 19 |       std::optional<std::variant<std::filesystem::path, Source>>;
 20 | 
 21 |     std::string language_name_;
 22 |     std::vector<Pass> passes_;
 23 |     Parse parser_;
 24 |     InputSpec input_{};
 25 |     bool debug_enabled_;
 26 |     bool wf_check_enabled_;
 27 |     std::filesystem::path debug_path_;
 28 |     std::string start_pass_;
 29 |     std::string end_pass_;
 30 |     std::size_t offset_;
 31 | 
 32 |   public:
 33 |     Reader(
 34 |       const std::string& language_name,
 35 |       const std::vector<Pass>& passes,
 36 |       const Parse& parser)
 37 |     : language_name_(language_name),
 38 |       passes_(passes),
 39 |       parser_(parser),
 40 |       debug_enabled_(false),
 41 |       wf_check_enabled_(false),
 42 |       debug_path_("."),
 43 |       start_pass_(""),
 44 |       end_pass_(""),
 45 |       offset_(0)
 46 |     {}
 47 | 
 48 |     ProcessResult read()
 49 |     {
 50 |       if (!input_)
 51 |       {
 52 |         return {false, parse_only, nullptr, {(Error ^ "No source provided")}};
 53 |       }
 54 | 
 55 |       auto& input = *input_;
 56 |       PassRange pass_range(
 57 |         passes_.begin(), passes_.end(), parser_.wf(), parse_only);
 58 | 
 59 |       if (!end_pass_.empty())
 60 |       {
 61 |         if (end_pass_ == parse_only)
 62 |         {
 63 |           pass_range.disable();
 64 |         }
 65 |         else if (!pass_range.move_end(end_pass_))
 66 |         {
 67 |           return {
 68 |             false,
 69 |             parse_only,
 70 |             nullptr,
 71 |             {Error ^ ("Unknown pass: " + end_pass_)}};
 72 |         }
 73 |       }
 74 | 
 75 |       Node ast;
 76 |       auto parse_start = std::chrono::high_resolution_clock::now();
 77 |       if (!start_pass_.empty())
 78 |       {
 79 |         if (!pass_range.move_start(start_pass_))
 80 |         {
 81 |           return {
 82 |             false,
 83 |             parse_only,
 84 |             nullptr,
 85 |             {Error ^ ("Unknown pass: " + start_pass_)}};
 86 |         }
 87 | 
 88 |         Source source;
 89 |         if (std::holds_alternative<std::filesystem::path>(input))
 90 |         {
 91 |           auto& path = std::get<std::filesystem::path>(input);
 92 |           if (std::filesystem::is_directory(path))
 93 |             return {
 94 |               false,
 95 |               parse_only,
 96 |               nullptr,
 97 |               {Error ^ "Cannot use directory with intermediate pass."}};
 98 |           source = SourceDef::load(path);
 99 |         }
100 |         else
101 |         {
102 |           source = std::get<Source>(input);
103 |         }
104 | 
105 |         // Pass range is currently pointing at pass, but the output is the
106 |         // dump of that, so advance it one, so we start processing on the
107 |         // next pass.
108 |         ++pass_range;
109 | 
110 |         ast = build_ast(source, offset_);
111 |       }
112 |       else
113 |       {
114 |         std::visit([&](auto x) { ast = parser_.parse(x); }, input);
115 |       }
116 |       auto parse_end = std::chrono::high_resolution_clock::now();
117 | 
118 |       logging::Info summary;
119 |       std::filesystem::path debug_path;
120 |       if (debug_enabled_)
121 |       {
122 |         debug_path = debug_path_;
123 |       }
124 | 
125 |       summary << "---------" << std::endl;
126 |       summary << "Parse time (us): "
127 |               << std::chrono::duration_cast<std::chrono::microseconds>(
128 |                    parse_end - parse_start).count()
129 |               << std::endl;
130 | 
131 |       auto result =
132 |         Process(pass_range)
133 |           .set_check_well_formed(wf_check_enabled_)
134 |           .set_default_pass_complete(summary, language_name_, debug_path)
135 |           .run(ast);
136 |       summary << "---------" << std::endl;
137 |       return result;
138 |     }
139 | 
140 |     template<typename StringLike>
141 |     size_t pass_index(const StringLike& name_) const
142 |     {
143 |       if (name_ == parse_only)
144 |         return 0;
145 | 
146 |       for (size_t i = 0; i < passes_.size(); i++)
147 |       {
148 |         if (passes_[i]->name() == name_)
149 |           return i + 1;
150 |       }
151 | 
152 |       return std::numeric_limits<size_t>::max();
153 |     }
154 | 
155 |     std::vector<std::string> pass_names() const
156 |     {
157 |       std::vector<std::string> names;
158 |       names.push_back(parse_only);
159 |       std::transform(
160 |         passes_.begin(),
161 |         passes_.end(),
162 |         std::back_inserter(names),
163 |         [](const auto& p) { return p->name(); });
164 |       return names;
165 |     }
166 | 
167 |     Reader& executable(const std::filesystem::path& path)
168 |     {
169 |       parser_.executable(path);
170 |       return *this;
171 |     }
172 | 
173 |     Reader& language_name(const std::string& name)
174 |     {
175 |       language_name_ = name;
176 |       return *this;
177 |     }
178 | 
179 |     const std::string& language_name() const
180 |     {
181 |       return language_name_;
182 |     }
183 | 
184 |     const std::vector<Pass>& passes() const
185 |     {
186 |       return passes_;
187 |     }
188 | 
189 |     const Parse& parser() const
190 |     {
191 |       return parser_;
192 |     }
193 | 
194 |     Reader& debug_enabled(bool value)
195 |     {
196 |       debug_enabled_ = value;
197 |       return *this;
198 |     }
199 | 
200 |     bool debug_enabled() const
201 |     {
202 |       return debug_enabled_;
203 |     }
204 | 
205 |     Reader& wf_check_enabled(bool value)
206 |     {
207 |       wf_check_enabled_ = value;
208 |       return *this;
209 |     }
210 | 
211 |     bool wf_check_enabled() const
212 |     {
213 |       return wf_check_enabled_;
214 |     }
215 | 
216 |     Reader& debug_path(const std::filesystem::path& path)
217 |     {
218 |       debug_path_ = path;
219 |       return *this;
220 |     }
221 | 
222 |     const std::filesystem::path& debug_path() const
223 |     {
224 |       return debug_path_;
225 |     }
226 | 
227 |     template<typename StringLike>
228 |     Reader& start_pass(const StringLike& pass)
229 |     {
230 |       start_pass_ = pass;
231 |       return *this;
232 |     }
233 | 
234 |     const std::string& start_pass() const
235 |     {
236 |       return start_pass_;
237 |     }
238 | 
239 |     template<typename StringLike>
240 |     Reader& end_pass(const StringLike& pass)
241 |     {
242 |       end_pass_ = pass;
243 |       return *this;
244 |     }
245 | 
246 |     const std::string& end_pass() const
247 |     {
248 |       return end_pass_;
249 |     }
250 | 
251 |     Reader& offset(std::size_t pos)
252 |     {
253 |       offset_ = pos;
254 |       return *this;
255 |     }
256 | 
257 |     std::size_t offset() const
258 |     {
259 |       return offset_;
260 |     }
261 | 
262 |     Reader& source(const Source& s)
263 |     {
264 |       input_ = s;
265 |       return *this;
266 |     }
267 | 
268 |     Reader& file(const std::filesystem::path& path)
269 |     {
270 |       input_ = path;
271 |       return *this;
272 |     }
273 | 
274 |     Reader& synthetic(const std::string& contents, const std::string& origin="")
275 |     {
276 |       input_ = SourceDef::synthetic(contents, origin);
277 |       return *this;
278 |     }
279 | 
280 |     Reader& postparse(Parse::PostF func)
281 |     {
282 |       parser_.postparse(func);
283 |       return *this;
284 |     }
285 | 
286 |     const wf::Wellformed& output_wf() const
287 |     {
288 |       return passes_.back()->wf();
289 |     }
290 |   };
291 | }
292 | 


--------------------------------------------------------------------------------
/include/trieste/token.h:
--------------------------------------------------------------------------------
  1 | // Copyright Microsoft and Project Verona Contributors.
  2 | // SPDX-License-Identifier: MIT
  3 | #pragma once
  4 | 
  5 | #include "logging.h"
  6 | #include "source.h"
  7 | 
  8 | #include <atomic>
  9 | #include <map>
 10 | 
 11 | namespace trieste
 12 | {
 13 |   class NodeDef;
 14 | 
 15 |   // Certain uses of the Node alias before the full definition of NodeDef can
 16 |   // cause incomplete type errors, so this manually relocates the problematic
 17 |   // code to after NodeDef is fully defined. See the docs on the specialized
 18 |   // trait for details.
 19 |   //
 20 |   // Note: this is only needed by our C++17 implementation of NodeRange (in
 21 |   // ast.h). If we stop supporting C++17, this can be deleted.
 22 |   template<>
 23 |   struct intrusive_refcounted_traits<NodeDef>
 24 |   {
 25 |     static constexpr void intrusive_inc_ref(NodeDef*);
 26 |     inline static void intrusive_dec_ref(NodeDef*);
 27 |   };
 28 | 
 29 |   using Node = intrusive_ptr<NodeDef>;
 30 | 
 31 |   struct TokenDef;
 32 |   struct Token;
 33 | 
 34 |   namespace detail
 35 |   {
 36 |     void register_token(const TokenDef& def);
 37 |   }
 38 | 
 39 |   struct TokenDef
 40 |   {
 41 |     using flag = uint32_t;
 42 |     const char* name;
 43 |     flag fl;
 44 | 
 45 |     // Hash id for this token.  This is used to determine the hash function for
 46 |     // the default map for the main rewrite loop.  This is not a general purpose
 47 |     // hash function.
 48 |     uint32_t default_map_id;
 49 |     static constexpr size_t DEFAULT_MAP_TABLE_SIZE{128};
 50 | 
 51 |     TokenDef(const char* name_, flag fl_ = 0) : name(name_), fl(fl_)
 52 |     {
 53 |       static std::atomic<uint32_t> next_id = 0;
 54 |       default_map_id = (next_id++ % DEFAULT_MAP_TABLE_SIZE) * sizeof(void*);
 55 | 
 56 |       detail::register_token(*this);
 57 |     }
 58 | 
 59 |     TokenDef() = delete;
 60 |     TokenDef(const TokenDef&) = delete;
 61 | 
 62 |     operator Node() const;
 63 | 
 64 |     bool has(TokenDef::flag f) const
 65 |     {
 66 |       return (fl & f) != 0;
 67 |     }
 68 |   };
 69 | 
 70 |   struct Token
 71 |   {
 72 |     const TokenDef* def;
 73 | 
 74 |     Token() : def(nullptr) {}
 75 |     Token(const TokenDef& def_) : def(&def_) {}
 76 | 
 77 |     operator Node() const;
 78 | 
 79 |     /**
 80 |      * Special hash for looking up in tables of size DEFAULT_MAP_TABLE_SIZE with
 81 |      * elements of size sizeof(void*).
 82 |      */
 83 |     uint32_t default_map_hash() const
 84 |     {
 85 |       return def->default_map_id / sizeof(void*);
 86 |     }
 87 | 
 88 |     bool operator&(TokenDef::flag f) const
 89 |     {
 90 |       return def->has(f);
 91 |     }
 92 | 
 93 |     bool operator==(const Token& that) const
 94 |     {
 95 |       return def == that.def;
 96 |     }
 97 | 
 98 |     bool operator!=(const Token& that) const
 99 |     {
100 |       return def != that.def;
101 |     }
102 | 
103 |     bool operator<(const Token& that) const
104 |     {
105 |       return def < that.def;
106 |     }
107 | 
108 |     bool operator>(const Token& that) const
109 |     {
110 |       return def > that.def;
111 |     }
112 | 
113 |     bool operator<=(const Token& that) const
114 |     {
115 |       return def <= that.def;
116 |     }
117 | 
118 |     bool operator>=(const Token& that) const
119 |     {
120 |       return def >= that.def;
121 |     }
122 | 
123 |     bool in(const std::initializer_list<Token>& list) const
124 |     {
125 |       return std::find(list.begin(), list.end(), *this) != list.end();
126 |     }
127 | 
128 |     bool in(const std::vector<Token>& list) const
129 |     {
130 |       return std::find(list.begin(), list.end(), *this) != list.end();
131 |     }
132 | 
133 |     const char* str() const
134 |     {
135 |       return def->name;
136 |     }
137 |   };
138 | 
139 |   namespace flag
140 |   {
141 |     constexpr TokenDef::flag none = 0;
142 | 
143 |     // Print the location when printing an AST node of this type.
144 |     constexpr TokenDef::flag print = 1 << 0;
145 | 
146 |     // Include a symbol table in an AST node of this type.
147 |     constexpr TokenDef::flag symtab = 1 << 1;
148 | 
149 |     // If an AST node of this type has a symbol table, definitions can only be
150 |     // found from later in the same source file.
151 |     constexpr TokenDef::flag defbeforeuse = 1 << 2;
152 | 
153 |     // If a definition of this type is in a symbol table, it don't recurse into
154 |     // parent symbol tables.
155 |     constexpr TokenDef::flag shadowing = 1 << 3;
156 | 
157 |     // If a definition of this type is in a symbol table, it can be found when
158 |     // looking up.
159 |     constexpr TokenDef::flag lookup = 1 << 4;
160 | 
161 |     // If a definition of this type in a symbol table, it can be found when
162 |     // looking down.
163 |     constexpr TokenDef::flag lookdown = 1 << 5;
164 | 
165 |     // Used for AST nodes to represent internal Trieste features.  Rewriting
166 |     // should not occur inside an internal node.
167 |     constexpr TokenDef::flag internal = 1 << 6;
168 |   }
169 | 
170 |   // Built-in grouping
171 |   inline const auto Top = TokenDef("top", flag::symtab);
172 |   inline const auto Directory = TokenDef("directory");
173 |   inline const auto File = TokenDef("file");
174 |   inline const auto Group = TokenDef("group");
175 | 
176 |   // Special tokens for effects
177 |   inline const auto Seq = TokenDef("seq", flag::internal);
178 |   inline const auto Lift = TokenDef("lift", flag::internal);
179 |   inline const auto NoChange = TokenDef("nochange", flag::internal);
180 |   inline const auto Reapply = TokenDef("reapply", flag::internal);
181 | 
182 |   // Special tokens for symbol tables
183 |   inline const auto Include = TokenDef("include", flag::internal);
184 | 
185 |   // Special tokens for error handling
186 |   inline const auto Invalid = TokenDef("invalid", flag::internal);
187 |   inline const auto Error = TokenDef("error", flag::internal);
188 |   inline const auto ErrorMsg = TokenDef("errormsg", flag::print | flag::internal);
189 |   inline const auto ErrorAst = TokenDef("errorast", flag::internal);
190 | 
191 |   // Special tokens for reified patterns
192 |   namespace reified
193 |   {
194 |     inline const auto Token = TokenDef("pattern::token", flag::print);
195 |     inline const auto Regex = TokenDef("pattern::regex", flag::print);
196 |     inline const auto Any = TokenDef("pattern::anything");
197 |     inline const auto TokenMatch = TokenDef("pattern::tokenmatch");
198 |     inline const auto RegexMatch = TokenDef("pattern::regexmatch");
199 |     inline const auto Cap = TokenDef("pattern::cap");
200 |     inline const auto Opt = TokenDef("pattern::opt");
201 |     inline const auto Rep = TokenDef("pattern::rep");
202 |     inline const auto Not = TokenDef("pattern::not");
203 |     inline const auto Choice = TokenDef("pattern::choice");
204 |     inline const auto Children = TokenDef("pattern::children");
205 |     inline const auto First = TokenDef("pattern::first");
206 |     inline const auto Last = TokenDef("pattern::last");
207 |     inline const auto Inside = TokenDef("pattern::inside");
208 |     inline const auto InsideStar = TokenDef("pattern::insidestar");
209 |     inline const auto Pred = TokenDef("pattern::pred");
210 |     inline const auto NegPred = TokenDef("pattern::negpred");
211 |     inline const auto Action = TokenDef("pattern::action");
212 |   }
213 | 
214 |   namespace detail
215 |   {
216 |     inline std::map<std::string_view, Token>& token_map()
217 |     {
218 |       static std::map<std::string_view, Token> global_map;
219 |       return global_map;
220 |     }
221 | 
222 |     inline void register_token(const TokenDef& def)
223 |     {
224 |       auto& map = token_map();
225 |       auto it = map.find(def.name);
226 |       if (it != map.end())
227 |         throw std::runtime_error(
228 |           "Duplicate token definition: " + std::string(def.name));
229 | 
230 |       Token t = def;
231 |       map[t.str()] = t;
232 |     }
233 | 
234 |     inline Token find_token(std::string_view str)
235 |     {
236 |       auto& map = token_map();
237 |       auto it = map.find(str);
238 | 
239 |       if (it != map.end())
240 |         return it->second;
241 | 
242 |       return Invalid;
243 |     }
244 |   }
245 | }
246 | 


--------------------------------------------------------------------------------
/samples/infix/reader.cc:
--------------------------------------------------------------------------------
  1 | #include "internal.h"
  2 | 
  3 | namespace
  4 | {
  5 |   using namespace trieste::wf::ops;
  6 |   using namespace infix;
  7 | 
  8 |   // | is used to create a Choice between all the elements
  9 |   // this indicates that literals can be an Int or a Float
 10 | 
 11 |   // A <<= B indicates that B is a child of A
 12 |   // ++ indicates that there are zero or more instances of the token
 13 | 
 14 |   inline const auto wf_expressions_tokens =
 15 |     (wf_parse_tokens - (String | Paren | Print)) | Expression;
 16 | 
 17 |   // clang-format off
 18 |   inline const auto wf_pass_expressions =
 19 |       (Top <<= Calculation)
 20 |     | (Calculation <<= (Assign | Output)++)
 21 |     // [Ident] here indicates that the Ident node is a symbol that should
 22 |     // be stored in the symbol table  
 23 |     | (Assign <<= Ident * Expression)[Ident]
 24 |     | (Output <<= String * Expression)
 25 |     // [1] here indicates that there should be at least one token
 26 |     | (Expression <<= wf_expressions_tokens++[1])
 27 |     ;
 28 |   // clang-format on
 29 | 
 30 |   // clang-format off
 31 |   inline const auto wf_pass_multiply_divide =
 32 |     wf_pass_expressions
 33 |     | (Multiply <<= Expression * Expression)
 34 |     | (Divide <<= Expression * Expression)
 35 |     ;
 36 |   // clang-format on
 37 | 
 38 |   // clang-format off
 39 |   inline const auto wf_pass_add_subtract =
 40 |     wf_pass_multiply_divide
 41 |     | (Add <<= Expression * Expression)
 42 |     | (Subtract <<= Expression * Expression)
 43 |     ;
 44 |   // clang-format on
 45 | 
 46 |   inline const auto wf_operands_tokens = wf_expressions_tokens - Expression;
 47 | 
 48 |   // clang-format off
 49 |   inline const auto wf_pass_trim =
 50 |     wf_pass_add_subtract
 51 |     | (Expression <<= wf_operands_tokens)
 52 |     ;
 53 |   //clang-format on
 54 | 
 55 |   inline const auto wf_check_refs_tokens = (wf_operands_tokens - Ident) | Ref;
 56 | 
 57 |   // clang-format off
 58 |   inline const auto wf_pass_check_refs =
 59 |     wf_pass_trim
 60 |     | (Expression <<= wf_check_refs_tokens)
 61 |     | (Ref <<= Ident)
 62 |     ;
 63 |   // clang-format on
 64 | 
 65 |   PassDef expressions()
 66 |   {
 67 |     return {
 68 |       "expressions",
 69 |       wf_pass_expressions,
 70 |       dir::topdown,
 71 |       {
 72 |         // In() indicates this is the root node of the pattern match.
 73 |         // What we return will replace the nodes we specify after the *.
 74 |         // The [] gives us a hook in the Match to use for referring to the
 75 |         // matched entity. Here we're saying that we want to create a
 76 |         // Calculation node and make all of the values in File (*_[File]) its
 77 |         // children.
 78 |         In(Top) * T(File)[File] >>
 79 |           [](Match& _) { return Calculation << *_[File]; },
 80 | 
 81 |         // This rule selects an Equals node with the right structure,
 82 |         // i.e. a single ident being assigned. We replace it with
 83 |         // an Assign node that has two children: the Ident and the
 84 |         // an Expression, which will take the children of the Group.
 85 |         In(Calculation) *
 86 |             (T(Equals) << ((T(Group) << T(Ident)[Id]) * T(Group)[Rhs])) >>
 87 |           [](Match& _) { return Assign << _(Id) << (Expression << *_[Rhs]); },
 88 | 
 89 |         // This rule selects a Group that matches the Output pattern
 90 |         // of `print <string> <expression>`. In this case, Any++ indicates that
 91 |         // Rhs should contain all the remaining tokens in the group.
 92 |         // When used here, * means nodes that are children of the In()
 93 |         // node in the specified order. They can be anywhere inside
 94 |         // the In() child sequence.
 95 |         In(Calculation) *
 96 |             (T(Group) << (T(Print) * T(String)[Lhs] * Any++[Rhs])) >>
 97 |           [](Match& _) { return Output << _(Lhs) << (Expression << _[Rhs]); },
 98 | 
 99 |         // This node unwraps Groups that are inside Parens, making them
100 |         // Expression nodes.
101 |         In(Expression) * (T(Paren) << T(Group)[Group]) >>
102 |           [](Match& _) { return Expression << *_[Group]; },
103 | 
104 |         // errors
105 | 
106 |         // because rules are matched in order, this catches any
107 |         // Paren nodes that had no children (because the rule above
108 |         // will have handled those *with* children)
109 |         T(Paren)[Paren] >>
110 |           [](Match& _) { return err(_(Paren), "Empty paren"); },
111 | 
112 |         // Ditto for malformed equals nodes
113 |         T(Equals)[Equals] >>
114 |           [](Match& _) { return err(_(Equals), "Invalid assign"); },
115 | 
116 |         // Orphaned print node will catch bad output statements
117 |         T(Print)[Print] >>
118 |           [](Match& _) { return err(_(Print), "Invalid output"); },
119 | 
120 |         // Our WF definition allows this, so we need to handle it.
121 |         T(Expression)[Rhs] << End >>
122 |           [](Match& _) { return err(_(Rhs), "Empty expression"); },
123 | 
124 |         // Same with this.
125 |         In(Expression) * T(String)[String] >>
126 |           [](Match& _) {
127 |             return err(_(String), "Expressions cannot contain strings");
128 |           },
129 | 
130 |         T(Group)[Group] >>
131 |           [](Match& _) { return err(_[Group], "syntax error"); },
132 |       }};
133 |   }
134 | 
135 |   inline const auto ExpressionArg = T(Expression, Ident) / Number;
136 | 
137 |   PassDef multiply_divide()
138 |   {
139 |     return {
140 |       "multiply_divide",
141 |       wf_pass_multiply_divide,
142 |       dir::topdown,
143 |       {
144 |         // Group multiply and divide operations together. This rule will
145 |         // select any triplet of <arg> *|/ <arg> in an expression list and
146 |         // replace it with a single <expr> node that has the triplet as
147 |         // its children.
148 |         In(Expression) *
149 |             (ExpressionArg[Lhs] * (T(Multiply, Divide))[Op] *
150 |              ExpressionArg[Rhs]) >>
151 |           [](Match& _) {
152 |             return Expression
153 |               << (_(Op) << (Expression << _(Lhs)) << (Expression << _[Rhs]));
154 |           },
155 |         (T(Multiply, Divide))[Op] << End >>
156 |           [](Match& _) { return err(_(Op), "No arguments"); },
157 |       }};
158 |   }
159 | 
160 |   PassDef add_subtract()
161 |   {
162 |     return {
163 |       "add_subtract",
164 |       wf_pass_add_subtract,
165 |       dir::topdown,
166 |       {
167 |         In(Expression) *
168 |             (ExpressionArg[Lhs] * (T(Add, Subtract))[Op] *
169 |              ExpressionArg[Rhs]) >>
170 |           [](Match& _) {
171 |             return Expression
172 |               << (_(Op) << (Expression << _(Lhs)) << (Expression << _[Rhs]));
173 |           },
174 |         (T(Add, Subtract))[Op] << End >>
175 |           [](Match& _) { return err(_(Op), "No arguments"); },
176 |       }};
177 |   }
178 | 
179 |   PassDef trim()
180 |   {
181 |     return {
182 |       "trim",
183 |       wf_pass_trim,
184 |       dir::topdown,
185 |       {
186 |         // End is a special pattern which indicates that there
187 |         // are no further nodes. So in this case we are matching
188 |         // an Expression which has a single Expression as a
189 |         // child.
190 |         T(Expression) << (T(Expression)[Expression] * End) >>
191 |           [](Match& _) { return _(Expression); },
192 | 
193 |         T(Expression) << (Any * Any[Rhs]) >>
194 |           [](Match& _) {
195 |             return err(_(Rhs), "Only one value allowed per expression");
196 |           },
197 |       }};
198 |   }
199 | 
200 |   inline const auto Arg = T(Int) / T(Float) / T(Ident) / T(Expression);
201 | 
202 |   PassDef check_refs()
203 |   {
204 |     return {
205 |       "check_refs",
206 |       wf_pass_check_refs,
207 |       dir::topdown,
208 |       {
209 |         In(Expression) * T(Ident)[Id] >>
210 |           [](Match& _) {
211 |             auto id = _(Id); // the Node object for the identifier
212 |             auto defs = id->lookup(); // a list of matching symbols
213 |             if (defs.size() == 0)
214 |             {
215 |               // there are no symbols with this identifier
216 |               return err(id, "undefined");
217 |             }
218 | 
219 |             return Ref << id;
220 |           },
221 |       }};
222 |   }
223 | }
224 | 
225 | namespace infix
226 | {
227 |   Reader reader()
228 |   {
229 |     return {
230 |       "infix",
231 |       {expressions(), multiply_divide(), add_subtract(), trim(), check_refs()},
232 |       parser(),
233 |     };
234 |   }
235 | }
236 | 


--------------------------------------------------------------------------------
/include/trieste/source.h:
--------------------------------------------------------------------------------
  1 | // Copyright Microsoft and Project Verona Contributors.
  2 | // SPDX-License-Identifier: MIT
  3 | #pragma once
  4 | 
  5 | #include "intrusive_ptr.h"
  6 | 
  7 | #include <algorithm>
  8 | #include <cassert>
  9 | #include <filesystem>
 10 | #include <fstream>
 11 | #include <iterator>
 12 | #include <sstream>
 13 | #include <string>
 14 | #include <vector>
 15 | 
 16 | namespace trieste
 17 | {
 18 |   class SourceDef;
 19 |   struct Location;
 20 | 
 21 |   using Source = intrusive_ptr<SourceDef>;
 22 | 
 23 |   class SourceDef final : public intrusive_refcounted<SourceDef>
 24 |   {
 25 |   private:
 26 |     std::string origin_;
 27 |     std::string contents;
 28 |     std::vector<size_t> lines;
 29 | 
 30 |   public:
 31 |     static Source load(const std::filesystem::path& file)
 32 |     {
 33 |       std::ifstream f(file, std::ios::binary | std::ios::in | std::ios::ate);
 34 | 
 35 |       if (!f)
 36 |         return {};
 37 | 
 38 |       auto size = f.tellg();
 39 |       f.seekg(0, std::ios::beg);
 40 | 
 41 |       auto source = Source::make();
 42 |       source->origin_ = std::filesystem::relative(file).string();
 43 |       source->contents.resize(static_cast<std::size_t>(size));
 44 |       f.read(&source->contents[0], size);
 45 | 
 46 |       if (!f)
 47 |         return {};
 48 | 
 49 |       source->find_lines();
 50 |       return source;
 51 |     }
 52 | 
 53 |     static Source synthetic(const std::string& contents, const std::string& origin="")
 54 |     {
 55 |       auto source = Source::make();
 56 |       source->contents = contents;
 57 |       source->origin_ = origin;
 58 |       source->find_lines();
 59 |       return source;
 60 |     }
 61 | 
 62 |     const std::string& origin() const
 63 |     {
 64 |       return origin_;
 65 |     }
 66 | 
 67 |     std::string_view view() const
 68 |     {
 69 |       return std::string_view(contents);
 70 |     }
 71 | 
 72 |     std::pair<size_t, size_t> linecol(size_t pos) const
 73 |     {
 74 |       // Lines and columns are 0-indexed.
 75 |       auto it = std::lower_bound(lines.begin(), lines.end(), pos);
 76 | 
 77 |       auto line = it - lines.begin();
 78 |       auto col = pos;
 79 | 
 80 |       if (it != lines.begin())
 81 |         col -= *(it - 1) + 1;
 82 | 
 83 |       return {line, col};
 84 |     }
 85 | 
 86 |     std::pair<size_t, size_t> linepos(size_t line) const
 87 |     {
 88 |       // Lines are 0-indexed.
 89 |       if (line > lines.size())
 90 |         return {std::string::npos, 0};
 91 | 
 92 |       size_t start = 0;
 93 |       auto end = contents.size();
 94 | 
 95 |       if (line > 0)
 96 |         start = lines[line - 1] + 1;
 97 | 
 98 |       if (line < lines.size())
 99 |         end = lines[line];
100 | 
101 |       return {start, end - start};
102 |     }
103 | 
104 |   private:
105 |     // Semantics note:
106 |     // The code here only looks for \n and is not intended to be
107 |     // platform-sensitive. Effectively, sources operate in binary mode and leave
108 |     // encoding issues to the language implementation. There are however some
109 |     // cosmetic fixes in error printing, such as in Location::str(), which
110 |     // ensure that control characters don't leak into Trieste's output in that
111 |     // case.
112 |     void find_lines()
113 |     {
114 |       // Find the lines.
115 |       auto pos = contents.find('\n');
116 | 
117 |       while (pos != std::string::npos)
118 |       {
119 |         lines.push_back(pos);
120 |         pos = contents.find('\n', pos + 1);
121 |       }
122 |     }
123 |   };
124 | 
125 |   struct Location
126 |   {
127 |     Source source;
128 |     size_t pos;
129 |     size_t len;
130 | 
131 |     Location() = default;
132 | 
133 |     Location(Source source_, size_t pos_, size_t len_)
134 |     : source(source_), pos(pos_), len(len_)
135 |     {}
136 | 
137 |     Location(const std::string& s)
138 |     : source(SourceDef::synthetic(s)), pos(0), len(s.size())
139 |     {}
140 | 
141 |     std::string_view view() const
142 |     {
143 |       if (!source)
144 |         return {};
145 | 
146 |       return source->view().substr(pos, len);
147 |     }
148 | 
149 |     std::string origin_linecol() const
150 |     {
151 |       std::stringstream ss;
152 | 
153 |       if (source && !source->origin().empty())
154 |       {
155 |         auto [line, col] = linecol();
156 |         ss << source->origin() << ":" << (line + 1) << ":" << (col + 1);
157 |       }
158 | 
159 |       return ss.str();
160 |     }
161 | 
162 |     std::string str() const
163 |     {
164 |       if (!source)
165 |         return {};
166 | 
167 |       std::stringstream ss;
168 |       auto write_chars_skipping_r = [&ss](const std::string_view& str) -> void {
169 |         for (char ch : str)
170 |         {
171 |           if (ch != '\r')
172 |           {
173 |             ss << ch;
174 |           }
175 |         }
176 |       };
177 |       auto write_indexed_skipping_r =
178 |         [&ss](const std::string_view& str, auto fn) -> void {
179 |         size_t idx = 0;
180 |         for (char ch : str)
181 |         {
182 |           if (ch != '\r')
183 |           {
184 |             ss << fn(idx);
185 |           }
186 |           ++idx;
187 |         }
188 |       };
189 | 
190 |       auto [line, col] = linecol();
191 |       auto [linepos, linelen] = source->linepos(line);
192 | 
193 |       if (view().find_first_of('\n') != std::string::npos)
194 |       {
195 |         auto line_view_first = source->view().substr(linepos, linelen);
196 |         size_t col_last;
197 |         std::string_view interim_view;
198 |         std::string_view line_view_last;
199 |         {
200 |           auto [line2, col2] = source->linecol(pos + len);
201 |           auto [linepos2, linelen2] = source->linepos(line2);
202 |           line_view_last = source->view().substr(linepos2, linelen2);
203 |           col_last = col2;
204 | 
205 |           // Find the lines in between first and last to insert, if there are
206 |           // any such lines. If the lines are adjacent, this creates a 1 char
207 |           // line view with the new line between the two.
208 |           size_t interim_pos = linepos + linelen;
209 |           interim_view =
210 |             source->view().substr(interim_pos, linepos2 - interim_pos);
211 |         }
212 | 
213 |         write_indexed_skipping_r(line_view_first, [&ccol = col](size_t idx) {
214 |           return idx < ccol ? ' ' : '~';
215 |         });
216 |         ss << std::endl;
217 |         write_chars_skipping_r(line_view_first);
218 |         write_chars_skipping_r(interim_view);
219 |         write_chars_skipping_r(line_view_last);
220 |         ss << std::endl;
221 |         write_indexed_skipping_r(
222 |           line_view_last.substr(0, col_last), [&](size_t) { return '~'; });
223 |         ss << std::endl;
224 |       }
225 |       else
226 |       {
227 |         auto line_view = source->view().substr(linepos, linelen);
228 |         write_chars_skipping_r(line_view);
229 |         ss << std::endl;
230 | 
231 |         assert(pos >= linepos);
232 |         write_indexed_skipping_r(
233 |           line_view.substr(0, pos - linepos + len),
234 |           [&ccol = col](size_t idx) { return idx < ccol ? ' ' : '~'; });
235 |         ss << std::endl;
236 |       }
237 | 
238 |       return ss.str();
239 |     }
240 | 
241 |     std::pair<size_t, size_t> linecol() const
242 |     {
243 |       if (!source)
244 |         return {0, 0};
245 | 
246 |       return source->linecol(pos);
247 |     }
248 | 
249 |     Location operator*(const Location& that) const
250 |     {
251 |       if (source != that.source)
252 |         return *this;
253 | 
254 |       auto lo = std::min(pos, that.pos);
255 |       auto hi = std::max(pos + len, that.pos + that.len);
256 |       return {source, lo, hi - lo};
257 |     }
258 | 
259 |     Location& operator*=(const Location& that)
260 |     {
261 |       *this = *this * that;
262 |       return *this;
263 |     }
264 | 
265 |     bool operator==(const Location& that) const
266 |     {
267 |       return view() == that.view();
268 |     }
269 | 
270 |     bool operator!=(const Location& that) const
271 |     {
272 |       return !(*this == that);
273 |     }
274 | 
275 |     bool operator<(const Location& that) const
276 |     {
277 |       return view() < that.view();
278 |     }
279 | 
280 |     bool operator<=(const Location& that) const
281 |     {
282 |       return (*this < that) || (*this == that);
283 |     }
284 | 
285 |     bool operator>(const Location& that) const
286 |     {
287 |       return !(*this <= that);
288 |     }
289 | 
290 |     bool operator>=(const Location& that) const
291 |     {
292 |       return !(*this < that);
293 |     }
294 |   };
295 | }
296 | 


--------------------------------------------------------------------------------
/include/trieste/writer.h:
--------------------------------------------------------------------------------
  1 | // Copyright Microsoft and Project Verona Contributors.
  2 | // SPDX-License-Identifier: MIT
  3 | #pragma once
  4 | 
  5 | #include "passes.h"
  6 | #include "trieste/intrusive_ptr.h"
  7 | #include "trieste/wf.h"
  8 | 
  9 | #include <filesystem>
 10 | 
 11 | namespace trieste
 12 | {
 13 |   class DestinationDef;
 14 |   using Destination = intrusive_ptr<DestinationDef>;
 15 | 
 16 |   class DestinationDef : public intrusive_refcounted<DestinationDef>
 17 |   {
 18 |   private:
 19 |     enum class Mode
 20 |     {
 21 |       // Files will be written to the file system
 22 |       FileSystem,
 23 |       // Files will be output directly to console
 24 |       Console,
 25 |       // Files are stored in memory and accessible via dst.files()
 26 |       Synthetic
 27 |     };
 28 | 
 29 |     Mode mode_;
 30 |     std::ofstream fstream_;
 31 |     std::ostringstream sstream_;
 32 |     std::map<std::string, std::string> files_;
 33 |     std::filesystem::path path_;
 34 |     bool is_open_;
 35 | 
 36 |   public:
 37 |     ~DestinationDef()
 38 |     {
 39 |       close();
 40 |     }
 41 | 
 42 |     std::ostream& stream()
 43 |     {
 44 |       switch (mode_)
 45 |       {
 46 |         case Mode::FileSystem:
 47 |           return fstream_;
 48 | 
 49 |         case Mode::Console:
 50 |           return std::cout;
 51 | 
 52 |         case Mode::Synthetic:
 53 |           return sstream_;
 54 | 
 55 |         default:
 56 |           throw std::runtime_error("Invalid destination mode");
 57 |       }
 58 |     }
 59 | 
 60 |     bool open(const std::filesystem::path& path)
 61 |     {
 62 |       close();
 63 |       path_ = path_ / path;
 64 |       switch (mode_)
 65 |       {
 66 |         case Mode::FileSystem:
 67 |           if (!path_.parent_path().empty())
 68 |           {
 69 |             std::filesystem::create_directories(path_.parent_path());
 70 |           }
 71 |           fstream_.open(path_);
 72 |           return is_open_ = fstream_.is_open();
 73 | 
 74 |         case Mode::Console:
 75 |           std::cout << "OPEN " << path_ << std::endl << std::endl;
 76 |           return is_open_ = true;
 77 | 
 78 |         case Mode::Synthetic:
 79 |           return is_open_ = true;
 80 | 
 81 |         default:
 82 |           throw std::runtime_error("Invalid destination mode");
 83 |       }
 84 |     }
 85 | 
 86 |     void close()
 87 |     {
 88 |       if (!is_open_)
 89 |       {
 90 |         return;
 91 |       }
 92 | 
 93 |       std::string contents;
 94 |       switch (mode_)
 95 |       {
 96 |         case Mode::FileSystem:
 97 |           if (fstream_.is_open())
 98 |           {
 99 |             fstream_.close();
100 |           }
101 |           break;
102 | 
103 |         case Mode::Console:
104 |           std::cout << std::endl << "CLOSE " << path_ << std::endl;
105 |           break;
106 | 
107 |         case Mode::Synthetic:
108 |           contents = sstream_.str();
109 |           if (!contents.empty())
110 |           {
111 |             files_[path_.string()] = contents;
112 |             sstream_ = std::ostringstream();
113 |           }
114 |           break;
115 |       }
116 | 
117 |       path_ = path_.parent_path();
118 |       is_open_ = false;
119 |     }
120 | 
121 |     const std::map<std::string, std::string>& files() const
122 |     {
123 |       return files_;
124 |     }
125 | 
126 |     const std::string& file(const std::filesystem::path& path) const
127 |     {
128 |       return files_.at(path.string());
129 |     }
130 | 
131 |     static Destination dir(const std::filesystem::path& path)
132 |     {
133 |       auto d = Destination::make();
134 |       d->mode_ = Mode::FileSystem;
135 |       d->path_ = path;
136 |       return d;
137 |     }
138 | 
139 |     static Destination console()
140 |     {
141 |       auto d = Destination::make();
142 |       d->mode_ = Mode::Console;
143 |       d->path_ = ".";
144 |       return d;
145 |     }
146 | 
147 |     static Destination synthetic()
148 |     {
149 |       auto d = Destination::make();
150 |       d->mode_ = Mode::Synthetic;
151 |       d->path_ = ".";
152 |       return d;
153 |     }
154 |   };
155 | 
156 |   using WriteFile = std::function<bool(std::ostream&, Node)>;
157 |   using namespace wf::ops;
158 | 
159 |   inline const auto Path = TokenDef("path", flag::print);
160 |   inline const auto FileSeq = TokenDef("fileseq");
161 |   inline const auto Contents = TokenDef("contents");
162 | 
163 |   // clang-format off
164 |   inline const auto wf_writer =
165 |     (Top <<= Directory | File)
166 |     | (Directory <<= Path * FileSeq)
167 |     | (FileSeq <<= (Directory | File)++)
168 |     | (File <<= Path * Contents)
169 |     ;
170 |   // clang-format on
171 | 
172 |   class Writer
173 |   {
174 |   private:
175 |     std::string language_name_;
176 |     std::vector<Pass> passes_;
177 |     const wf::Wellformed* wf_;
178 |     WriteFile write_file_;
179 |     Destination destination_;
180 |     bool debug_enabled_;
181 |     bool wf_check_enabled_;
182 |     std::filesystem::path debug_path_;
183 | 
184 |   public:
185 |     Writer(
186 |       const std::string& language_name,
187 |       const std::vector<Pass>& passes,
188 |       const wf::Wellformed& input_wf,
189 |       WriteFile write_file)
190 |     : language_name_(language_name),
191 |       passes_(passes),
192 |       wf_(&input_wf),
193 |       write_file_(write_file),
194 |       debug_enabled_(false),
195 |       wf_check_enabled_(true),
196 |       debug_path_(".")
197 |     {
198 |       console();
199 |     }
200 | 
201 |     ProcessResult write(Node ast)
202 |     {
203 |       PassRange pass_range(
204 |         passes_.begin(), passes_.end(), *wf_, language_name_);
205 | 
206 |       logging::Info summary;
207 |       std::filesystem::path debug_path;
208 |       if (debug_enabled_)
209 |       {
210 |         debug_path = debug_path_;
211 |       }
212 | 
213 |       summary << "---------" << std::endl;
214 |       auto result =
215 |         Process(pass_range)
216 |           .set_check_well_formed(wf_check_enabled_)
217 |           .set_default_pass_complete(summary, language_name_, debug_path)
218 |           .run(ast);
219 |       summary << "---------" << std::endl;
220 | 
221 |       if (!result.ok)
222 |       {
223 |         return result;
224 |       }
225 | 
226 |       Destination dest = destination_;
227 |       WFContext context({wf_, &wf_writer});
228 | 
229 |       Nodes error_nodes;
230 |       std::vector<Node> stack;
231 |       stack.push_back(ast);
232 |       while (!stack.empty())
233 |       {
234 |         Node current = stack.back();
235 |         stack.pop_back();
236 |         if (current == Directory)
237 |         {
238 |           auto files = current / FileSeq;
239 |           for (auto& file : *files)
240 |           {
241 |             stack.push_back(file);
242 |           }
243 |         }
244 |         else if (current == File)
245 |         {
246 |           try
247 |           {
248 |             dest->open((current / Path)->location().view());
249 |             write_file_(dest->stream(), current / Contents);
250 |             dest->close();
251 |           }
252 |           catch (std::exception& e)
253 |           {
254 |             error_nodes.push_back(
255 |               Error << (ErrorMsg ^ e.what()) << (ErrorAst << current->clone()));
256 |           }
257 |         }
258 |         else if (current == Top)
259 |         {
260 |           stack.insert(stack.end(), current->begin(), current->end());
261 |         }
262 |       }
263 | 
264 |       if (!error_nodes.empty())
265 |       {
266 |         result.ok = false;
267 |         result.errors = error_nodes;
268 |       }
269 | 
270 |       return result;
271 |     }
272 | 
273 |     Writer& debug_enabled(bool value)
274 |     {
275 |       debug_enabled_ = value;
276 |       return *this;
277 |     }
278 | 
279 |     bool debug_enabled() const
280 |     {
281 |       return debug_enabled_;
282 |     }
283 | 
284 |     Writer& wf_check_enabled(bool value)
285 |     {
286 |       wf_check_enabled_ = value;
287 |       return *this;
288 |     }
289 | 
290 |     bool wf_check_enabled() const
291 |     {
292 |       return wf_check_enabled_;
293 |     }
294 | 
295 |     Writer& debug_path(const std::filesystem::path& path)
296 |     {
297 |       debug_path_ = path;
298 |       return *this;
299 |     }
300 | 
301 |     const std::filesystem::path& debug_path() const
302 |     {
303 |       return debug_path_;
304 |     }
305 | 
306 |     Writer& destination(const Destination& destination)
307 |     {
308 |       destination_ = destination;
309 |       return *this;
310 |     }
311 | 
312 |     Writer& dir(const std::filesystem::path& path)
313 |     {
314 |       destination_ = DestinationDef::dir(path);
315 |       return *this;
316 |     }
317 | 
318 |     Writer& console()
319 |     {
320 |       destination_ = DestinationDef::console();
321 |       return *this;
322 |     }
323 | 
324 |     Writer& synthetic()
325 |     {
326 |       destination_ = DestinationDef::synthetic();
327 |       return *this;
328 |     }
329 | 
330 |     Destination destination() const
331 |     {
332 |       return destination_;
333 |     }
334 | 
335 |     const wf::Wellformed& input_wf() const
336 |     {
337 |       return *wf_;
338 |     }
339 | 
340 |     const std::vector<Pass>& passes() const
341 |     {
342 |       return passes_;
343 |     }
344 |   };
345 | }
346 | 


--------------------------------------------------------------------------------
/include/trieste/passes.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "pass.h"
  4 | #include "wf.h"
  5 | 
  6 | #include <chrono>
  7 | #include <filesystem>
  8 | #include <fstream>
  9 | 
 10 | namespace trieste
 11 | {
 12 |   template<typename PassIterator>
 13 |   class PassRange
 14 |   {
 15 |     PassIterator start;
 16 |     PassIterator end;
 17 |     // Well-formed condition for entry into this Range.
 18 |     const wf::Wellformed* wf;
 19 |     std::string entry_name;
 20 | 
 21 |   public:
 22 |     PassRange(
 23 |       PassIterator start_,
 24 |       PassIterator end_,
 25 |       const wf::Wellformed& wf_,
 26 |       std::string entry_name_)
 27 |     : start(start_), end(end_), wf(&wf_), entry_name(entry_name_)
 28 |     {}
 29 | 
 30 |     template<typename Range>
 31 |     PassRange(Range& range, const wf::Wellformed& wf_, std::string entry_name_)
 32 |     : start(range.begin()), end(range.end()), wf(&wf_), entry_name(entry_name_)
 33 |     {}
 34 | 
 35 |     template<typename StringLike>
 36 |     bool move_start(StringLike name)
 37 |     {
 38 |       auto it = std::find_if(
 39 |         start, end, [&](auto& pass) { return pass->name() == name; });
 40 |       if (it == end)
 41 |         return false;
 42 | 
 43 |       wf = &((*it)->wf());
 44 |       entry_name = (*it)->name();
 45 |       start = it;
 46 |       return true;
 47 |     }
 48 | 
 49 |     template<typename StringLike>
 50 |     bool move_end(StringLike name)
 51 |     {
 52 |       auto it = std::find_if(
 53 |         start, end, [&](auto& pass) { return pass->name() == name; });
 54 |       if (it == end)
 55 |         return false;
 56 |       end = ++it;
 57 |       return true;
 58 |     }
 59 | 
 60 |     void disable()
 61 |     {
 62 |       start = end;
 63 |     }
 64 | 
 65 |     Pass& operator()()
 66 |     {
 67 |       return *start;
 68 |     }
 69 | 
 70 |     void operator++()
 71 |     {
 72 |       wf = &((*start)->wf());
 73 |       entry_name = (*start)->name();
 74 |       start++;
 75 |     }
 76 | 
 77 |     bool has_next()
 78 |     {
 79 |       return start != end;
 80 |     }
 81 | 
 82 |     const wf::Wellformed& input_wf() const
 83 |     {
 84 |       return *wf;
 85 |     }
 86 | 
 87 |     Pass& last_pass()
 88 |     {
 89 |       for (auto it = start; it != end; ++it)
 90 |       {
 91 |         if (it + 1 == end)
 92 |           return *it;
 93 |       }
 94 |       throw std::runtime_error("No passes in range");
 95 |     }
 96 | 
 97 |     std::string entry_pass_name()
 98 |     {
 99 |       return entry_name;
100 |     }
101 |   };
102 | 
103 |   // Deduction guide require for constructor of PassRange
104 |   template<typename PassIterator>
105 |   PassRange(PassIterator, PassIterator, const wf::Wellformed&, std::string)
106 |     -> PassRange<PassIterator>;
107 | 
108 |   // Deduction guide require for constructor of PassRange
109 |   template<typename Range>
110 |   PassRange(Range, const wf::Wellformed&, std::string)
111 |     -> PassRange<typename Range::iterator>;
112 | 
113 |   struct PassStatistics
114 |   {
115 |     size_t count;
116 |     size_t changes;
117 |     std::chrono::microseconds duration;
118 |   };
119 | 
120 |   struct ProcessResult
121 |   {
122 |     bool ok;
123 |     std::string last_pass;
124 |     Node ast;
125 |     Nodes errors;
126 |     size_t total_changes = 0;
127 | 
128 |     void print_errors(logging::Log& err) const
129 |     {
130 |       logging::Sep sep{"----------------"};
131 |       err << "Errors:";
132 | 
133 |       size_t count = 0;
134 | 
135 |       for (auto& error : errors)
136 |       {
137 |         err << sep << std::endl;
138 |         for (auto& child : *error)
139 |         {
140 |           if (child->type() == ErrorMsg)
141 |             err << child->location().view() << std::endl;
142 |           else
143 |           {
144 |             err << "-- " << child->location().origin_linecol() << std::endl
145 |                 << child->location().str() << std::endl;
146 |           }
147 |         }
148 |         if (count++ > 20)
149 |         {
150 |           err << "Too many errors, stopping here" << std::endl;
151 |           break;
152 |         }
153 |       }
154 |       err << "Pass " << last_pass << " failed with " << errors.size()
155 |           << (count > 1 ? " errors!" : " error!") << std::endl;
156 |     }
157 |   };
158 | 
159 |   /**
160 |    * @brief Process is used to run a collection of rewrite passes on an Ast.
161 |    * It provides a collection of hooks to produce output.
162 |    */
163 |   template<typename PassIterator>
164 |   class Process
165 |   {
166 |     PassRange<PassIterator> pass_range;
167 | 
168 |     bool check_well_formed{true};
169 | 
170 |     std::function<bool(Node&, std::string, size_t index, PassStatistics&)>
171 |       pass_complete;
172 | 
173 |     std::function<Nodes(Nodes&, std::string)> error_pass;
174 | 
175 |   public:
176 |     Process(const PassRange<PassIterator>& passes) : pass_range(passes) {}
177 | 
178 |     /**
179 |      * @brief After each pass the supplied function is called with the current
180 |      * AST and details of the pass that has just completed.
181 |      */
182 |     Process& set_pass_complete(
183 |       std::function<bool(Node&, std::string, size_t, PassStatistics&)> f)
184 |     {
185 |       pass_complete = f;
186 |       return *this;
187 |     }
188 | 
189 |     Process& set_default_pass_complete(
190 |       logging::Log& summary,
191 |       const std::string& language_name = "",
192 |       std::filesystem::path output_directory = {})
193 |     {
194 |       pass_complete = [output_directory, language_name, &summary](
195 |                         Node& ast,
196 |                         std::string pass_name,
197 |                         size_t index,
198 |                         PassStatistics& stats) {
199 |         auto [count, changes, duration] = stats;
200 |         std::string delim{"\t"};
201 |         if (index == 0)
202 |         {
203 |           summary << "Pass" << delim << "Iterations" << delim << "Changes"
204 |                   << delim << "Time (us)" << std::endl;
205 |         }
206 |         else
207 |         {
208 |           summary << pass_name << delim << count << delim << changes << delim
209 |                   << static_cast<size_t>(duration.count()) << std::endl;
210 |         }
211 | 
212 |         if (output_directory.empty())
213 |           return true;
214 | 
215 |         // Check if output_directory exists, and if not create it.
216 |         if (!std::filesystem::exists(output_directory))
217 |         {
218 |           if (!std::filesystem::create_directories(output_directory))
219 |           {
220 |             logging::Error()
221 |               << "Could not create output directory " << output_directory;
222 |             return false;
223 |           }
224 |         }
225 | 
226 |         std::filesystem::path output;
227 |         if (index < 10)
228 |         {
229 |           output = output_directory /
230 |             ("0" + std::to_string(index) + "_" + pass_name + ".trieste");
231 |         }
232 |         else
233 |         {
234 |           output = output_directory /
235 |             (std::to_string(index) + "_" + pass_name + ".trieste");
236 |         }
237 | 
238 |         std::ofstream f(output, std::ios::binary | std::ios::out);
239 | 
240 |         if (!f)
241 |         {
242 |           logging::Error() << "Could not open " << output << " for writing.";
243 |           return false;
244 |         }
245 | 
246 |         // Write the AST to the output file.
247 |         f << language_name << std::endl << pass_name << std::endl << ast;
248 |         return true;
249 |       };
250 | 
251 |       return *this;
252 |     }
253 | 
254 |     /**
255 |      * @brief Specified is well-formedness should be checked between passes.
256 |      */
257 |     Process& set_check_well_formed(bool b)
258 |     {
259 |       check_well_formed = b;
260 |       return *this;
261 |     }
262 | 
263 |     bool validate(Node ast, Nodes& errors)
264 |     {
265 |       auto wf = pass_range.input_wf();
266 |       auto ok = bool(ast);
267 | 
268 |       ok = ok && wf.build_st(ast);
269 | 
270 |       if (ast)
271 |         ast->get_errors(errors);
272 |       ok = ok && errors.empty();
273 | 
274 |       ok = ok && (!check_well_formed || wf.check(ast));
275 | 
276 |       return ok;
277 |     }
278 | 
279 |     /**
280 |      * @brief Run the supplied passes on the Ast.
281 |      *
282 |      * Returns the rewritten Ast, or an empty Node if the process failed.
283 |      */
284 |     ProcessResult run(Node& ast)
285 |     {
286 |       size_t index = 1;
287 |       size_t total_changes = 0;
288 | 
289 |       WFContext context(pass_range.input_wf());
290 | 
291 |       Nodes errors;
292 | 
293 |       // Check ast is well-formed before starting.
294 |       auto ok = validate(ast, errors);
295 | 
296 |       PassStatistics stats;
297 |       std::string last_pass = pass_range.entry_pass_name();
298 |       ok = pass_complete(ast, pass_range.entry_pass_name(), 0, stats) && ok;
299 | 
300 |       for (; ok && pass_range.has_next(); index++)
301 |       {
302 |         logging::Debug() << "Starting pass: \"" << pass_range()->name() << "\"";
303 | 
304 |         auto now = std::chrono::high_resolution_clock::now();
305 |         auto& pass = pass_range();
306 |         context.push_back(pass->wf());
307 | 
308 |         auto [new_ast, count, changes] = pass->run(ast);
309 |         total_changes += changes;
310 |         ast = new_ast;
311 |         context.pop_front();
312 | 
313 |         ++pass_range;
314 | 
315 |         ok = validate(ast, errors);
316 | 
317 |         auto then = std::chrono::high_resolution_clock::now();
318 |         stats = {
319 |           count,
320 |           changes,
321 |           std::chrono::duration_cast<std::chrono::microseconds>(then - now)};
322 | 
323 |         ok = pass_complete(ast, pass->name(), index, stats) && ok;
324 | 
325 |         last_pass = pass->name();
326 |       }
327 | 
328 |       return {ok, last_pass, ast, errors, total_changes};
329 |     }
330 |   };
331 | } // namespace trieste
332 | 


--------------------------------------------------------------------------------