├── .github └── workflows │ └── c-cpp.yml ├── .gitignore ├── .vscode ├── launch.json └── tasks.json ├── LICENSE ├── Matcheroni.code-workspace ├── README.md ├── build.hancho ├── data ├── Cargo.toml ├── canada.json ├── citm_catalog.json ├── conformance │ ├── i_number_double_huge_neg_exp.json │ ├── i_number_huge_exp.json │ ├── i_number_neg_int_huge_exp.json │ ├── i_number_pos_double_huge_exp.json │ ├── i_number_real_neg_overflow.json │ ├── i_number_real_pos_overflow.json │ ├── i_number_real_underflow.json │ ├── i_number_too_big_neg_int.json │ ├── i_number_too_big_pos_int.json │ ├── i_number_very_big_negative_int.json │ ├── i_object_key_lone_2nd_surrogate.json │ ├── i_string_1st_surrogate_but_2nd_missing.json │ ├── i_string_1st_valid_surrogate_2nd_invalid.json │ ├── i_string_UTF-16LE_with_BOM.json │ ├── i_string_UTF-8_invalid_sequence.json │ ├── i_string_UTF8_surrogate_U+D800.json │ ├── i_string_incomplete_surrogate_and_escape_valid.json │ ├── i_string_incomplete_surrogate_pair.json │ ├── i_string_incomplete_surrogates_escape_valid.json │ ├── i_string_invalid_lonely_surrogate.json │ ├── i_string_invalid_surrogate.json │ ├── i_string_invalid_utf-8.json │ ├── i_string_inverted_surrogates_U+1D11E.json │ ├── i_string_iso_latin_1.json │ ├── i_string_lone_second_surrogate.json │ ├── i_string_lone_utf8_continuation_byte.json │ ├── i_string_not_in_unicode_range.json │ ├── i_string_overlong_sequence_2_bytes.json │ ├── i_string_overlong_sequence_6_bytes.json │ ├── i_string_overlong_sequence_6_bytes_null.json │ ├── i_string_truncated-utf-8.json │ ├── i_string_utf16BE_no_BOM.json │ ├── i_string_utf16LE_no_BOM.json │ ├── i_structure_500_nested_arrays.json │ ├── i_structure_UTF-8_BOM_empty_object.json │ ├── n_array_1_true_without_comma.json │ ├── n_array_a_invalid_utf8.json │ ├── n_array_colon_instead_of_comma.json │ ├── n_array_comma_after_close.json │ ├── n_array_comma_and_number.json │ ├── n_array_double_comma.json │ ├── n_array_double_extra_comma.json │ ├── n_array_extra_close.json │ ├── n_array_extra_comma.json │ ├── n_array_incomplete.json │ ├── n_array_incomplete_invalid_value.json │ ├── n_array_inner_array_no_comma.json │ ├── n_array_invalid_utf8.json │ ├── n_array_items_separated_by_semicolon.json │ ├── n_array_just_comma.json │ ├── n_array_just_minus.json │ ├── n_array_missing_value.json │ ├── n_array_newlines_unclosed.json │ ├── n_array_number_and_comma.json │ ├── n_array_number_and_several_commas.json │ ├── n_array_spaces_vertical_tab_formfeed.json │ ├── n_array_star_inside.json │ ├── n_array_unclosed.json │ ├── n_array_unclosed_trailing_comma.json │ ├── n_array_unclosed_with_new_lines.json │ ├── n_array_unclosed_with_object_inside.json │ ├── n_incomplete_false.json │ ├── n_incomplete_null.json │ ├── n_incomplete_true.json │ ├── n_multidigit_number_then_00.json │ ├── n_number_++.json │ ├── n_number_+1.json │ ├── n_number_+Inf.json │ ├── n_number_-01.json │ ├── n_number_-1.0..json │ ├── n_number_-2..json │ ├── n_number_-NaN.json │ ├── n_number_.-1.json │ ├── n_number_.2e-3.json │ ├── n_number_0.1.2.json │ ├── n_number_0.3e+.json │ ├── n_number_0.3e.json │ ├── n_number_0.e1.json │ ├── n_number_0_capital_E+.json │ ├── n_number_0_capital_E.json │ ├── n_number_0e+.json │ ├── n_number_0e.json │ ├── n_number_1.0e+.json │ ├── n_number_1.0e-.json │ ├── n_number_1.0e.json │ ├── n_number_1_000.json │ ├── n_number_1eE2.json │ ├── n_number_2.e+3.json │ ├── n_number_2.e-3.json │ ├── n_number_2.e3.json │ ├── n_number_9.e+.json │ ├── n_number_Inf.json │ ├── n_number_NaN.json │ ├── n_number_U+FF11_fullwidth_digit_one.json │ ├── n_number_expression.json │ ├── n_number_hex_1_digit.json │ ├── n_number_hex_2_digits.json │ ├── n_number_infinity.json │ ├── n_number_invalid+-.json │ ├── n_number_invalid-negative-real.json │ ├── n_number_invalid-utf-8-in-bigger-int.json │ ├── n_number_invalid-utf-8-in-exponent.json │ ├── n_number_invalid-utf-8-in-int.json │ ├── n_number_minus_infinity.json │ ├── n_number_minus_sign_with_trailing_garbage.json │ ├── n_number_minus_space_1.json │ ├── n_number_neg_int_starting_with_zero.json │ ├── n_number_neg_real_without_int_part.json │ ├── n_number_neg_with_garbage_at_end.json │ ├── n_number_real_garbage_after_e.json │ ├── n_number_real_with_invalid_utf8_after_e.json │ ├── n_number_real_without_fractional_part.json │ ├── n_number_starting_with_dot.json │ ├── n_number_with_alpha.json │ ├── n_number_with_alpha_char.json │ ├── n_number_with_leading_zero.json │ ├── n_object_bad_value.json │ ├── n_object_bracket_key.json │ ├── n_object_comma_instead_of_colon.json │ ├── n_object_double_colon.json │ ├── n_object_emoji.json │ ├── n_object_garbage_at_end.json │ ├── n_object_key_with_single_quotes.json │ ├── n_object_lone_continuation_byte_in_key_and_trailing_comma.json │ ├── n_object_missing_colon.json │ ├── n_object_missing_key.json │ ├── n_object_missing_semicolon.json │ ├── n_object_missing_value.json │ ├── n_object_no-colon.json │ ├── n_object_non_string_key.json │ ├── n_object_non_string_key_but_huge_number_instead.json │ ├── n_object_repeated_null_null.json │ ├── n_object_several_trailing_commas.json │ ├── n_object_single_quote.json │ ├── n_object_trailing_comma.json │ ├── n_object_trailing_comment.json │ ├── n_object_trailing_comment_open.json │ ├── n_object_trailing_comment_slash_open.json │ ├── n_object_trailing_comment_slash_open_incomplete.json │ ├── n_object_two_commas_in_a_row.json │ ├── n_object_unquoted_key.json │ ├── n_object_unterminated-value.json │ ├── n_object_with_single_string.json │ ├── n_object_with_trailing_garbage.json │ ├── n_single_space.json │ ├── n_string_1_surrogate_then_escape.json │ ├── n_string_1_surrogate_then_escape_u.json │ ├── n_string_1_surrogate_then_escape_u1.json │ ├── n_string_1_surrogate_then_escape_u1x.json │ ├── n_string_accentuated_char_no_quotes.json │ ├── n_string_backslash_00.json │ ├── n_string_escape_x.json │ ├── n_string_escaped_backslash_bad.json │ ├── n_string_escaped_ctrl_char_tab.json │ ├── n_string_escaped_emoji.json │ ├── n_string_incomplete_escape.json │ ├── n_string_incomplete_escaped_character.json │ ├── n_string_incomplete_surrogate.json │ ├── n_string_incomplete_surrogate_escape_invalid.json │ ├── n_string_invalid-utf-8-in-escape.json │ ├── n_string_invalid_backslash_esc.json │ ├── n_string_invalid_unicode_escape.json │ ├── n_string_invalid_utf8_after_escape.json │ ├── n_string_leading_uescaped_thinspace.json │ ├── n_string_no_quotes_with_bad_escape.json │ ├── n_string_single_doublequote.json │ ├── n_string_single_quote.json │ ├── n_string_single_string_no_double_quotes.json │ ├── n_string_start_escape_unclosed.json │ ├── n_string_unescaped_ctrl_char.json │ ├── n_string_unescaped_newline.json │ ├── n_string_unescaped_tab.json │ ├── n_string_unicode_CapitalU.json │ ├── n_string_with_trailing_garbage.json │ ├── n_structure_100000_opening_arrays.json │ ├── n_structure_U+2060_word_joined.json │ ├── n_structure_UTF8_BOM_no_data.json │ ├── n_structure_angle_bracket_..json │ ├── n_structure_angle_bracket_null.json │ ├── n_structure_array_trailing_garbage.json │ ├── n_structure_array_with_extra_array_close.json │ ├── n_structure_array_with_unclosed_string.json │ ├── n_structure_ascii-unicode-identifier.json │ ├── n_structure_capitalized_True.json │ ├── n_structure_close_unopened_array.json │ ├── n_structure_comma_instead_of_closing_brace.json │ ├── n_structure_double_array.json │ ├── n_structure_end_array.json │ ├── n_structure_incomplete_UTF8_BOM.json │ ├── n_structure_lone-invalid-utf-8.json │ ├── n_structure_lone-open-bracket.json │ ├── n_structure_no_data.json │ ├── n_structure_null-byte-outside-string.json │ ├── n_structure_number_with_trailing_garbage.json │ ├── n_structure_object_followed_by_closing_object.json │ ├── n_structure_object_unclosed_no_value.json │ ├── n_structure_object_with_comment.json │ ├── n_structure_object_with_trailing_garbage.json │ ├── n_structure_open_array_apostrophe.json │ ├── n_structure_open_array_comma.json │ ├── n_structure_open_array_object.json │ ├── n_structure_open_array_open_object.json │ ├── n_structure_open_array_open_string.json │ ├── n_structure_open_array_string.json │ ├── n_structure_open_object.json │ ├── n_structure_open_object_close_array.json │ ├── n_structure_open_object_comma.json │ ├── n_structure_open_object_open_array.json │ ├── n_structure_open_object_open_string.json │ ├── n_structure_open_object_string_with_apostrophes.json │ ├── n_structure_open_open.json │ ├── n_structure_single_eacute.json │ ├── n_structure_single_star.json │ ├── n_structure_trailing_#.json │ ├── n_structure_uescaped_LF_before_string.json │ ├── n_structure_unclosed_array.json │ ├── n_structure_unclosed_array_partial_null.json │ ├── n_structure_unclosed_array_unfinished_false.json │ ├── n_structure_unclosed_array_unfinished_true.json │ ├── n_structure_unclosed_object.json │ ├── n_structure_unicode-identifier.json │ ├── n_structure_whitespace_U+2060_word_joiner.json │ ├── n_structure_whitespace_formfeed.json │ ├── readme.txt │ ├── y_array_arraysWithSpaces.json │ ├── y_array_empty-string.json │ ├── y_array_empty.json │ ├── y_array_ending_with_newline.json │ ├── y_array_false.json │ ├── y_array_heterogeneous.json │ ├── y_array_null.json │ ├── y_array_with_1_and_newline.json │ ├── y_array_with_leading_space.json │ ├── y_array_with_several_null.json │ ├── y_array_with_trailing_space.json │ ├── y_number.json │ ├── y_number_0e+1.json │ ├── y_number_0e1.json │ ├── y_number_after_space.json │ ├── y_number_double_close_to_zero.json │ ├── y_number_int_with_exp.json │ ├── y_number_minus_zero.json │ ├── y_number_negative_int.json │ ├── y_number_negative_one.json │ ├── y_number_negative_zero.json │ ├── y_number_real_capital_e.json │ ├── y_number_real_capital_e_neg_exp.json │ ├── y_number_real_capital_e_pos_exp.json │ ├── y_number_real_exponent.json │ ├── y_number_real_fraction_exponent.json │ ├── y_number_real_neg_exp.json │ ├── y_number_real_pos_exponent.json │ ├── y_number_simple_int.json │ ├── y_number_simple_real.json │ ├── y_object.json │ ├── y_object_basic.json │ ├── y_object_duplicated_key.json │ ├── y_object_duplicated_key_and_value.json │ ├── y_object_empty.json │ ├── y_object_empty_key.json │ ├── y_object_escaped_null_in_key.json │ ├── y_object_extreme_numbers.json │ ├── y_object_long_strings.json │ ├── y_object_simple.json │ ├── y_object_string_unicode.json │ ├── y_object_with_newlines.json │ ├── y_string_1_2_3_bytes_UTF-8_sequences.json │ ├── y_string_accepted_surrogate_pair.json │ ├── y_string_accepted_surrogate_pairs.json │ ├── y_string_allowed_escapes.json │ ├── y_string_backslash_and_u_escaped_zero.json │ ├── y_string_backslash_doublequotes.json │ ├── y_string_comments.json │ ├── y_string_double_escape_a.json │ ├── y_string_double_escape_n.json │ ├── y_string_escaped_control_character.json │ ├── y_string_escaped_noncharacter.json │ ├── y_string_in_array.json │ ├── y_string_in_array_with_leading_space.json │ ├── y_string_last_surrogates_1_and_2.json │ ├── y_string_nbsp_uescaped.json │ ├── y_string_nonCharacterInUTF-8_U+10FFFF.json │ ├── y_string_nonCharacterInUTF-8_U+FFFF.json │ ├── y_string_null_escape.json │ ├── y_string_one-byte-utf-8.json │ ├── y_string_pi.json │ ├── y_string_reservedCharacterInUTF-8_U+1BFFF.json │ ├── y_string_simple_ascii.json │ ├── y_string_space.json │ ├── y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json │ ├── y_string_three-byte-utf-8.json │ ├── y_string_two-byte-utf-8.json │ ├── y_string_u+2028_line_sep.json │ ├── y_string_u+2029_par_sep.json │ ├── y_string_uEscape.json │ ├── y_string_uescaped_newline.json │ ├── y_string_unescaped_char_delete.json │ ├── y_string_unicode.json │ ├── y_string_unicodeEscapedBackslash.json │ ├── y_string_unicode_2.json │ ├── y_string_unicode_U+10FFFE_nonchar.json │ ├── y_string_unicode_U+1FFFE_nonchar.json │ ├── y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json │ ├── y_string_unicode_U+2064_invisible_plus.json │ ├── y_string_unicode_U+FDD0_nonchar.json │ ├── y_string_unicode_U+FFFE_nonchar.json │ ├── y_string_unicode_escaped_double_quote.json │ ├── y_string_utf8.json │ ├── y_string_with_del_character.json │ ├── y_structure_lonely_false.json │ ├── y_structure_lonely_int.json │ ├── y_structure_lonely_negative_real.json │ ├── y_structure_lonely_null.json │ ├── y_structure_lonely_string.json │ ├── y_structure_lonely_true.json │ ├── y_structure_string_empty.json │ ├── y_structure_trailing_newline.json │ ├── y_structure_true_in_array.json │ └── y_structure_whitespace_array.json ├── input-text.txt ├── invalid.json ├── json_demo.json ├── rapidjson_sample.json ├── regex_demo.txt └── twitter.json ├── docs ├── assets │ ├── ansi_up.js │ ├── cheese_m.jpg │ ├── codejar.min.js │ ├── drawdown.js │ ├── highlight.min.js │ └── vs2015.min.css ├── index.html ├── index.md ├── junk.html ├── size_notes.txt ├── top.css └── tutorial │ ├── index.html │ ├── json_tut0a.js │ ├── json_tut0a.wasm │ ├── json_tut1a.js │ ├── json_tut1a.wasm │ ├── json_tut1b.js │ ├── json_tut1b.wasm │ ├── json_tut1c.js │ ├── json_tut1c.wasm │ ├── json_tut2a.js │ ├── json_tut2a.wasm │ ├── json_tut2b.js │ ├── json_tut2b.wasm │ ├── tiny_c_parser.js │ ├── tiny_c_parser.wasm │ ├── tutorial.css │ └── tutorial.js ├── examples ├── SST.hpp ├── c_lexer │ ├── CLexer.cpp │ ├── CLexer.hpp │ ├── CToken.cpp │ ├── CToken.hpp │ ├── build.hancho │ ├── c_lexer_benchmark.cpp │ ├── c_lexer_test.cpp │ └── test.hancho ├── c_parser │ ├── CContext.cpp │ ├── CContext.hpp │ ├── CNode.cpp │ ├── CNode.hpp │ ├── CScope.cpp │ ├── CScope.hpp │ ├── build.hancho │ ├── c_constants.hpp │ ├── c_parse_nodes.hpp │ ├── c_parser_benchmark.cpp │ ├── c_parser_test.cpp │ ├── c_reference.cpp.hax │ ├── c_reference_hax.cpp.hax │ ├── cgen.cpp │ └── test.hancho ├── ini │ ├── build.hancho │ └── ini_parser.cpp ├── json │ ├── build.hancho │ ├── json.hpp │ ├── json_benchmark.cpp │ ├── json_conformance.cpp │ ├── json_demo.cpp │ ├── json_matcher.cpp │ ├── json_parser.cpp │ ├── json_test.cpp │ └── test.hancho ├── junk ├── live_demo │ ├── bench1.sh │ ├── bench2.sh │ ├── bench3.sh │ ├── canada.json │ ├── citm_catalog.json │ ├── data.json │ ├── live_bench.cpp │ ├── live_demo.cpp │ ├── live_parser0.cpp │ ├── live_parser1.cpp │ ├── live_parser2.cpp │ ├── live_parser3.cpp │ ├── notes │ │ ├── benchmark │ │ ├── grammar.txt │ │ └── valgrind │ ├── rapidjson_sample.json │ ├── run1.sh │ ├── run2.sh │ ├── run3.sh │ └── twitter.json ├── regex │ ├── build.hancho │ ├── regex_benchmark.cpp │ ├── regex_demo.cpp │ ├── regex_parser.cpp │ ├── regex_test.cpp │ └── test.hancho ├── toml │ ├── build.hancho │ ├── test.hancho │ ├── toml_parser.cpp │ └── toml_test.cpp └── tutorial │ ├── json_tut0a.cpp │ ├── json_tut0a.input │ ├── json_tut1a.cpp │ ├── json_tut1a.input │ ├── json_tut1b.cpp │ ├── json_tut1b.input │ ├── json_tut1c.cpp │ ├── json_tut1c.input │ ├── json_tut2a.cpp │ ├── json_tut2a.input │ ├── json_tut2b.cpp │ ├── json_tut2b.input │ ├── json_tutorial.md │ ├── main1.cpp │ ├── main2.cpp │ ├── main3.cpp │ ├── test.hancho │ ├── tiny_c_parser.cpp │ └── tiny_c_parser.input ├── matcheroni ├── Cookbook.hpp ├── Matcheroni.hpp ├── Parseroni.hpp ├── Printeroni.hpp ├── Utilities.hpp ├── __init__.py ├── dump.hpp └── matcheroni.py ├── test.hancho └── tests ├── dummy.h ├── matcheroni_test.cpp ├── parseroni_test.cpp └── scratch.cpp /.github/workflows/c-cpp.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | on: 3 | push: 4 | branches: [ "main" ] 5 | pull_request: 6 | branches: [ "main" ] 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | - name: Get Boost 13 | run: sudo apt install libboost-regex-dev libboost-system-dev 14 | - name: Get Ninja 15 | run: sudo apt install ninja-build 16 | - name: Run Ninja 17 | run: ninja 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .vscode/settings.json 3 | .ninja_deps 4 | .ninja_log 5 | platform.info 6 | csmith_files 7 | test_files 8 | __pycache__ 9 | hancho.py 10 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | // See https://go.microsoft.com/fwlink/?LinkId=733558 3 | // for the documentation about the tasks.json format 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "type": "shell", 8 | "label": "wipe_build", 9 | "command": "rm -rf obj bin gen build", 10 | }, 11 | { 12 | "type": "shell", 13 | "label": "build_matcheroni", 14 | "command": "ninja", 15 | "problemMatcher": "$gcc", 16 | "group": { 17 | "kind": "build", 18 | "isDefault": true 19 | }, 20 | }, 21 | { 22 | "type": "shell", 23 | "label": "build_matcheroni_test", 24 | "command": "ninja bin/tests/matcheroni_test", 25 | "problemMatcher": "$gcc", 26 | "group": { 27 | "kind": "build", 28 | "isDefault": false 29 | }, 30 | }, 31 | { 32 | "type": "shell", 33 | "label": "build_parseroni_test", 34 | "command": "ninja bin/tests/parseroni_test", 35 | "problemMatcher": "$gcc", 36 | "group": { 37 | "kind": "build", 38 | "isDefault": false 39 | }, 40 | }, 41 | ] 42 | } 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 aappleby 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Matcheroni.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": { 8 | "files.associations": { 9 | "chrono": "cpp", 10 | "*.tcc": "cpp", 11 | "unordered_map": "cpp", 12 | "fstream": "cpp", 13 | "istream": "cpp", 14 | "numeric": "cpp", 15 | "ostream": "cpp", 16 | "sstream": "cpp", 17 | "charconv": "cpp", 18 | "string": "cpp", 19 | "array": "cpp", 20 | "compare": "cpp", 21 | "functional": "cpp", 22 | "ratio": "cpp", 23 | "tuple": "cpp", 24 | "type_traits": "cpp", 25 | "utility": "cpp", 26 | "variant": "cpp", 27 | "deque": "cpp", 28 | "list": "cpp", 29 | "vector": "cpp", 30 | "string_view": "cpp", 31 | "initializer_list": "cpp", 32 | "regex": "cpp", 33 | "cctype": "cpp", 34 | "clocale": "cpp", 35 | "cmath": "cpp", 36 | "cstdarg": "cpp", 37 | "cstddef": "cpp", 38 | "cstdio": "cpp", 39 | "cstdlib": "cpp", 40 | "cstring": "cpp", 41 | "ctime": "cpp", 42 | "cwchar": "cpp", 43 | "cwctype": "cpp", 44 | "atomic": "cpp", 45 | "bit": "cpp", 46 | "bitset": "cpp", 47 | "codecvt": "cpp", 48 | "complex": "cpp", 49 | "concepts": "cpp", 50 | "condition_variable": "cpp", 51 | "cstdint": "cpp", 52 | "map": "cpp", 53 | "set": "cpp", 54 | "exception": "cpp", 55 | "algorithm": "cpp", 56 | "iterator": "cpp", 57 | "memory": "cpp", 58 | "memory_resource": "cpp", 59 | "optional": "cpp", 60 | "random": "cpp", 61 | "system_error": "cpp", 62 | "iomanip": "cpp", 63 | "iosfwd": "cpp", 64 | "limits": "cpp", 65 | "mutex": "cpp", 66 | "new": "cpp", 67 | "numbers": "cpp", 68 | "semaphore": "cpp", 69 | "stdexcept": "cpp", 70 | "stop_token": "cpp", 71 | "streambuf": "cpp", 72 | "thread": "cpp", 73 | "cinttypes": "cpp", 74 | "typeindex": "cpp", 75 | "typeinfo": "cpp", 76 | "span": "cpp" 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /build.hancho: -------------------------------------------------------------------------------- 1 | # Nothing here, Matcheroni is header-only -------------------------------------------------------------------------------- /data/conformance/i_number_double_huge_neg_exp.json: -------------------------------------------------------------------------------- 1 | [123.456e-789] -------------------------------------------------------------------------------- /data/conformance/i_number_huge_exp.json: -------------------------------------------------------------------------------- 1 | [0.4e00669999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999969999999006] -------------------------------------------------------------------------------- /data/conformance/i_number_neg_int_huge_exp.json: -------------------------------------------------------------------------------- 1 | [-1e+9999] -------------------------------------------------------------------------------- /data/conformance/i_number_pos_double_huge_exp.json: -------------------------------------------------------------------------------- 1 | [1.5e+9999] -------------------------------------------------------------------------------- /data/conformance/i_number_real_neg_overflow.json: -------------------------------------------------------------------------------- 1 | [-123123e100000] -------------------------------------------------------------------------------- /data/conformance/i_number_real_pos_overflow.json: -------------------------------------------------------------------------------- 1 | [123123e100000] -------------------------------------------------------------------------------- /data/conformance/i_number_real_underflow.json: -------------------------------------------------------------------------------- 1 | [123e-10000000] -------------------------------------------------------------------------------- /data/conformance/i_number_too_big_neg_int.json: -------------------------------------------------------------------------------- 1 | [-123123123123123123123123123123] -------------------------------------------------------------------------------- /data/conformance/i_number_too_big_pos_int.json: -------------------------------------------------------------------------------- 1 | [100000000000000000000] -------------------------------------------------------------------------------- /data/conformance/i_number_very_big_negative_int.json: -------------------------------------------------------------------------------- 1 | [-237462374673276894279832749832423479823246327846] -------------------------------------------------------------------------------- /data/conformance/i_object_key_lone_2nd_surrogate.json: -------------------------------------------------------------------------------- 1 | {"\uDFAA":0} -------------------------------------------------------------------------------- /data/conformance/i_string_1st_surrogate_but_2nd_missing.json: -------------------------------------------------------------------------------- 1 | ["\uDADA"] -------------------------------------------------------------------------------- /data/conformance/i_string_1st_valid_surrogate_2nd_invalid.json: -------------------------------------------------------------------------------- 1 | ["\uD888\u1234"] -------------------------------------------------------------------------------- /data/conformance/i_string_UTF-16LE_with_BOM.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_UTF-16LE_with_BOM.json -------------------------------------------------------------------------------- /data/conformance/i_string_UTF-8_invalid_sequence.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_UTF-8_invalid_sequence.json -------------------------------------------------------------------------------- /data/conformance/i_string_UTF8_surrogate_U+D800.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_UTF8_surrogate_U+D800.json -------------------------------------------------------------------------------- /data/conformance/i_string_incomplete_surrogate_and_escape_valid.json: -------------------------------------------------------------------------------- 1 | ["\uD800\n"] -------------------------------------------------------------------------------- /data/conformance/i_string_incomplete_surrogate_pair.json: -------------------------------------------------------------------------------- 1 | ["\uDd1ea"] -------------------------------------------------------------------------------- /data/conformance/i_string_incomplete_surrogates_escape_valid.json: -------------------------------------------------------------------------------- 1 | ["\uD800\uD800\n"] -------------------------------------------------------------------------------- /data/conformance/i_string_invalid_lonely_surrogate.json: -------------------------------------------------------------------------------- 1 | ["\ud800"] -------------------------------------------------------------------------------- /data/conformance/i_string_invalid_surrogate.json: -------------------------------------------------------------------------------- 1 | ["\ud800abc"] -------------------------------------------------------------------------------- /data/conformance/i_string_invalid_utf-8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_invalid_utf-8.json -------------------------------------------------------------------------------- /data/conformance/i_string_inverted_surrogates_U+1D11E.json: -------------------------------------------------------------------------------- 1 | ["\uDd1e\uD834"] -------------------------------------------------------------------------------- /data/conformance/i_string_iso_latin_1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_iso_latin_1.json -------------------------------------------------------------------------------- /data/conformance/i_string_lone_second_surrogate.json: -------------------------------------------------------------------------------- 1 | ["\uDFAA"] -------------------------------------------------------------------------------- /data/conformance/i_string_lone_utf8_continuation_byte.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_lone_utf8_continuation_byte.json -------------------------------------------------------------------------------- /data/conformance/i_string_not_in_unicode_range.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_not_in_unicode_range.json -------------------------------------------------------------------------------- /data/conformance/i_string_overlong_sequence_2_bytes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_overlong_sequence_2_bytes.json -------------------------------------------------------------------------------- /data/conformance/i_string_overlong_sequence_6_bytes.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_overlong_sequence_6_bytes.json -------------------------------------------------------------------------------- /data/conformance/i_string_overlong_sequence_6_bytes_null.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_overlong_sequence_6_bytes_null.json -------------------------------------------------------------------------------- /data/conformance/i_string_truncated-utf-8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_truncated-utf-8.json -------------------------------------------------------------------------------- /data/conformance/i_string_utf16BE_no_BOM.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_utf16BE_no_BOM.json -------------------------------------------------------------------------------- /data/conformance/i_string_utf16LE_no_BOM.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/i_string_utf16LE_no_BOM.json -------------------------------------------------------------------------------- /data/conformance/i_structure_500_nested_arrays.json: -------------------------------------------------------------------------------- 1 | [[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]] -------------------------------------------------------------------------------- /data/conformance/i_structure_UTF-8_BOM_empty_object.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /data/conformance/n_array_1_true_without_comma.json: -------------------------------------------------------------------------------- 1 | [1 true] -------------------------------------------------------------------------------- /data/conformance/n_array_a_invalid_utf8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_array_a_invalid_utf8.json -------------------------------------------------------------------------------- /data/conformance/n_array_colon_instead_of_comma.json: -------------------------------------------------------------------------------- 1 | ["": 1] -------------------------------------------------------------------------------- /data/conformance/n_array_comma_after_close.json: -------------------------------------------------------------------------------- 1 | [""], -------------------------------------------------------------------------------- /data/conformance/n_array_comma_and_number.json: -------------------------------------------------------------------------------- 1 | [,1] -------------------------------------------------------------------------------- /data/conformance/n_array_double_comma.json: -------------------------------------------------------------------------------- 1 | [1,,2] -------------------------------------------------------------------------------- /data/conformance/n_array_double_extra_comma.json: -------------------------------------------------------------------------------- 1 | ["x",,] -------------------------------------------------------------------------------- /data/conformance/n_array_extra_close.json: -------------------------------------------------------------------------------- 1 | ["x"]] -------------------------------------------------------------------------------- /data/conformance/n_array_extra_comma.json: -------------------------------------------------------------------------------- 1 | ["",] -------------------------------------------------------------------------------- /data/conformance/n_array_incomplete.json: -------------------------------------------------------------------------------- 1 | ["x" -------------------------------------------------------------------------------- /data/conformance/n_array_incomplete_invalid_value.json: -------------------------------------------------------------------------------- 1 | [x -------------------------------------------------------------------------------- /data/conformance/n_array_inner_array_no_comma.json: -------------------------------------------------------------------------------- 1 | [3[4]] -------------------------------------------------------------------------------- /data/conformance/n_array_invalid_utf8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_array_invalid_utf8.json -------------------------------------------------------------------------------- /data/conformance/n_array_items_separated_by_semicolon.json: -------------------------------------------------------------------------------- 1 | [1:2] -------------------------------------------------------------------------------- /data/conformance/n_array_just_comma.json: -------------------------------------------------------------------------------- 1 | [,] -------------------------------------------------------------------------------- /data/conformance/n_array_just_minus.json: -------------------------------------------------------------------------------- 1 | [-] -------------------------------------------------------------------------------- /data/conformance/n_array_missing_value.json: -------------------------------------------------------------------------------- 1 | [ , ""] -------------------------------------------------------------------------------- /data/conformance/n_array_newlines_unclosed.json: -------------------------------------------------------------------------------- 1 | ["a", 2 | 4 3 | ,1, -------------------------------------------------------------------------------- /data/conformance/n_array_number_and_comma.json: -------------------------------------------------------------------------------- 1 | [1,] -------------------------------------------------------------------------------- /data/conformance/n_array_number_and_several_commas.json: -------------------------------------------------------------------------------- 1 | [1,,] -------------------------------------------------------------------------------- /data/conformance/n_array_spaces_vertical_tab_formfeed.json: -------------------------------------------------------------------------------- 1 | [" a"\f] -------------------------------------------------------------------------------- /data/conformance/n_array_star_inside.json: -------------------------------------------------------------------------------- 1 | [*] -------------------------------------------------------------------------------- /data/conformance/n_array_unclosed.json: -------------------------------------------------------------------------------- 1 | ["" -------------------------------------------------------------------------------- /data/conformance/n_array_unclosed_trailing_comma.json: -------------------------------------------------------------------------------- 1 | [1, -------------------------------------------------------------------------------- /data/conformance/n_array_unclosed_with_new_lines.json: -------------------------------------------------------------------------------- 1 | [1, 2 | 1 3 | ,1 -------------------------------------------------------------------------------- /data/conformance/n_array_unclosed_with_object_inside.json: -------------------------------------------------------------------------------- 1 | [{} -------------------------------------------------------------------------------- /data/conformance/n_incomplete_false.json: -------------------------------------------------------------------------------- 1 | [fals] -------------------------------------------------------------------------------- /data/conformance/n_incomplete_null.json: -------------------------------------------------------------------------------- 1 | [nul] -------------------------------------------------------------------------------- /data/conformance/n_incomplete_true.json: -------------------------------------------------------------------------------- 1 | [tru] -------------------------------------------------------------------------------- /data/conformance/n_multidigit_number_then_00.json: -------------------------------------------------------------------------------- 1 | 123 -------------------------------------------------------------------------------- /data/conformance/n_number_++.json: -------------------------------------------------------------------------------- 1 | [++1234] -------------------------------------------------------------------------------- /data/conformance/n_number_+1.json: -------------------------------------------------------------------------------- 1 | [+1] -------------------------------------------------------------------------------- /data/conformance/n_number_+Inf.json: -------------------------------------------------------------------------------- 1 | [+Inf] -------------------------------------------------------------------------------- /data/conformance/n_number_-01.json: -------------------------------------------------------------------------------- 1 | [-01] -------------------------------------------------------------------------------- /data/conformance/n_number_-1.0..json: -------------------------------------------------------------------------------- 1 | [-1.0.] -------------------------------------------------------------------------------- /data/conformance/n_number_-2..json: -------------------------------------------------------------------------------- 1 | [-2.] -------------------------------------------------------------------------------- /data/conformance/n_number_-NaN.json: -------------------------------------------------------------------------------- 1 | [-NaN] -------------------------------------------------------------------------------- /data/conformance/n_number_.-1.json: -------------------------------------------------------------------------------- 1 | [.-1] -------------------------------------------------------------------------------- /data/conformance/n_number_.2e-3.json: -------------------------------------------------------------------------------- 1 | [.2e-3] -------------------------------------------------------------------------------- /data/conformance/n_number_0.1.2.json: -------------------------------------------------------------------------------- 1 | [0.1.2] -------------------------------------------------------------------------------- /data/conformance/n_number_0.3e+.json: -------------------------------------------------------------------------------- 1 | [0.3e+] -------------------------------------------------------------------------------- /data/conformance/n_number_0.3e.json: -------------------------------------------------------------------------------- 1 | [0.3e] -------------------------------------------------------------------------------- /data/conformance/n_number_0.e1.json: -------------------------------------------------------------------------------- 1 | [0.e1] -------------------------------------------------------------------------------- /data/conformance/n_number_0_capital_E+.json: -------------------------------------------------------------------------------- 1 | [0E+] -------------------------------------------------------------------------------- /data/conformance/n_number_0_capital_E.json: -------------------------------------------------------------------------------- 1 | [0E] -------------------------------------------------------------------------------- /data/conformance/n_number_0e+.json: -------------------------------------------------------------------------------- 1 | [0e+] -------------------------------------------------------------------------------- /data/conformance/n_number_0e.json: -------------------------------------------------------------------------------- 1 | [0e] -------------------------------------------------------------------------------- /data/conformance/n_number_1.0e+.json: -------------------------------------------------------------------------------- 1 | [1.0e+] -------------------------------------------------------------------------------- /data/conformance/n_number_1.0e-.json: -------------------------------------------------------------------------------- 1 | [1.0e-] -------------------------------------------------------------------------------- /data/conformance/n_number_1.0e.json: -------------------------------------------------------------------------------- 1 | [1.0e] -------------------------------------------------------------------------------- /data/conformance/n_number_1_000.json: -------------------------------------------------------------------------------- 1 | [1 000.0] -------------------------------------------------------------------------------- /data/conformance/n_number_1eE2.json: -------------------------------------------------------------------------------- 1 | [1eE2] -------------------------------------------------------------------------------- /data/conformance/n_number_2.e+3.json: -------------------------------------------------------------------------------- 1 | [2.e+3] -------------------------------------------------------------------------------- /data/conformance/n_number_2.e-3.json: -------------------------------------------------------------------------------- 1 | [2.e-3] -------------------------------------------------------------------------------- /data/conformance/n_number_2.e3.json: -------------------------------------------------------------------------------- 1 | [2.e3] -------------------------------------------------------------------------------- /data/conformance/n_number_9.e+.json: -------------------------------------------------------------------------------- 1 | [9.e+] -------------------------------------------------------------------------------- /data/conformance/n_number_Inf.json: -------------------------------------------------------------------------------- 1 | [Inf] -------------------------------------------------------------------------------- /data/conformance/n_number_NaN.json: -------------------------------------------------------------------------------- 1 | [NaN] -------------------------------------------------------------------------------- /data/conformance/n_number_U+FF11_fullwidth_digit_one.json: -------------------------------------------------------------------------------- 1 | [1] -------------------------------------------------------------------------------- /data/conformance/n_number_expression.json: -------------------------------------------------------------------------------- 1 | [1+2] -------------------------------------------------------------------------------- /data/conformance/n_number_hex_1_digit.json: -------------------------------------------------------------------------------- 1 | [0x1] -------------------------------------------------------------------------------- /data/conformance/n_number_hex_2_digits.json: -------------------------------------------------------------------------------- 1 | [0x42] -------------------------------------------------------------------------------- /data/conformance/n_number_infinity.json: -------------------------------------------------------------------------------- 1 | [Infinity] -------------------------------------------------------------------------------- /data/conformance/n_number_invalid+-.json: -------------------------------------------------------------------------------- 1 | [0e+-1] -------------------------------------------------------------------------------- /data/conformance/n_number_invalid-negative-real.json: -------------------------------------------------------------------------------- 1 | [-123.123foo] -------------------------------------------------------------------------------- /data/conformance/n_number_invalid-utf-8-in-bigger-int.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_number_invalid-utf-8-in-bigger-int.json -------------------------------------------------------------------------------- /data/conformance/n_number_invalid-utf-8-in-exponent.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_number_invalid-utf-8-in-exponent.json -------------------------------------------------------------------------------- /data/conformance/n_number_invalid-utf-8-in-int.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_number_invalid-utf-8-in-int.json -------------------------------------------------------------------------------- /data/conformance/n_number_minus_infinity.json: -------------------------------------------------------------------------------- 1 | [-Infinity] -------------------------------------------------------------------------------- /data/conformance/n_number_minus_sign_with_trailing_garbage.json: -------------------------------------------------------------------------------- 1 | [-foo] -------------------------------------------------------------------------------- /data/conformance/n_number_minus_space_1.json: -------------------------------------------------------------------------------- 1 | [- 1] -------------------------------------------------------------------------------- /data/conformance/n_number_neg_int_starting_with_zero.json: -------------------------------------------------------------------------------- 1 | [-012] -------------------------------------------------------------------------------- /data/conformance/n_number_neg_real_without_int_part.json: -------------------------------------------------------------------------------- 1 | [-.123] -------------------------------------------------------------------------------- /data/conformance/n_number_neg_with_garbage_at_end.json: -------------------------------------------------------------------------------- 1 | [-1x] -------------------------------------------------------------------------------- /data/conformance/n_number_real_garbage_after_e.json: -------------------------------------------------------------------------------- 1 | [1ea] -------------------------------------------------------------------------------- /data/conformance/n_number_real_with_invalid_utf8_after_e.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_number_real_with_invalid_utf8_after_e.json -------------------------------------------------------------------------------- /data/conformance/n_number_real_without_fractional_part.json: -------------------------------------------------------------------------------- 1 | [1.] -------------------------------------------------------------------------------- /data/conformance/n_number_starting_with_dot.json: -------------------------------------------------------------------------------- 1 | [.123] -------------------------------------------------------------------------------- /data/conformance/n_number_with_alpha.json: -------------------------------------------------------------------------------- 1 | [1.2a-3] -------------------------------------------------------------------------------- /data/conformance/n_number_with_alpha_char.json: -------------------------------------------------------------------------------- 1 | [1.8011670033376514H-308] -------------------------------------------------------------------------------- /data/conformance/n_number_with_leading_zero.json: -------------------------------------------------------------------------------- 1 | [012] -------------------------------------------------------------------------------- /data/conformance/n_object_bad_value.json: -------------------------------------------------------------------------------- 1 | ["x", truth] -------------------------------------------------------------------------------- /data/conformance/n_object_bracket_key.json: -------------------------------------------------------------------------------- 1 | {[: "x"} 2 | -------------------------------------------------------------------------------- /data/conformance/n_object_comma_instead_of_colon.json: -------------------------------------------------------------------------------- 1 | {"x", null} -------------------------------------------------------------------------------- /data/conformance/n_object_double_colon.json: -------------------------------------------------------------------------------- 1 | {"x"::"b"} -------------------------------------------------------------------------------- /data/conformance/n_object_emoji.json: -------------------------------------------------------------------------------- 1 | {🇨🇭} -------------------------------------------------------------------------------- /data/conformance/n_object_garbage_at_end.json: -------------------------------------------------------------------------------- 1 | {"a":"a" 123} -------------------------------------------------------------------------------- /data/conformance/n_object_key_with_single_quotes.json: -------------------------------------------------------------------------------- 1 | {key: 'value'} -------------------------------------------------------------------------------- /data/conformance/n_object_lone_continuation_byte_in_key_and_trailing_comma.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_object_lone_continuation_byte_in_key_and_trailing_comma.json -------------------------------------------------------------------------------- /data/conformance/n_object_missing_colon.json: -------------------------------------------------------------------------------- 1 | {"a" b} -------------------------------------------------------------------------------- /data/conformance/n_object_missing_key.json: -------------------------------------------------------------------------------- 1 | {:"b"} -------------------------------------------------------------------------------- /data/conformance/n_object_missing_semicolon.json: -------------------------------------------------------------------------------- 1 | {"a" "b"} -------------------------------------------------------------------------------- /data/conformance/n_object_missing_value.json: -------------------------------------------------------------------------------- 1 | {"a": -------------------------------------------------------------------------------- /data/conformance/n_object_no-colon.json: -------------------------------------------------------------------------------- 1 | {"a" -------------------------------------------------------------------------------- /data/conformance/n_object_non_string_key.json: -------------------------------------------------------------------------------- 1 | {1:1} -------------------------------------------------------------------------------- /data/conformance/n_object_non_string_key_but_huge_number_instead.json: -------------------------------------------------------------------------------- 1 | {9999E9999:1} -------------------------------------------------------------------------------- /data/conformance/n_object_repeated_null_null.json: -------------------------------------------------------------------------------- 1 | {null:null,null:null} -------------------------------------------------------------------------------- /data/conformance/n_object_several_trailing_commas.json: -------------------------------------------------------------------------------- 1 | {"id":0,,,,,} -------------------------------------------------------------------------------- /data/conformance/n_object_single_quote.json: -------------------------------------------------------------------------------- 1 | {'a':0} -------------------------------------------------------------------------------- /data/conformance/n_object_trailing_comma.json: -------------------------------------------------------------------------------- 1 | {"id":0,} -------------------------------------------------------------------------------- /data/conformance/n_object_trailing_comment.json: -------------------------------------------------------------------------------- 1 | {"a":"b"}/**/ -------------------------------------------------------------------------------- /data/conformance/n_object_trailing_comment_open.json: -------------------------------------------------------------------------------- 1 | {"a":"b"}/**// -------------------------------------------------------------------------------- /data/conformance/n_object_trailing_comment_slash_open.json: -------------------------------------------------------------------------------- 1 | {"a":"b"}// -------------------------------------------------------------------------------- /data/conformance/n_object_trailing_comment_slash_open_incomplete.json: -------------------------------------------------------------------------------- 1 | {"a":"b"}/ -------------------------------------------------------------------------------- /data/conformance/n_object_two_commas_in_a_row.json: -------------------------------------------------------------------------------- 1 | {"a":"b",,"c":"d"} -------------------------------------------------------------------------------- /data/conformance/n_object_unquoted_key.json: -------------------------------------------------------------------------------- 1 | {a: "b"} -------------------------------------------------------------------------------- /data/conformance/n_object_unterminated-value.json: -------------------------------------------------------------------------------- 1 | {"a":"a -------------------------------------------------------------------------------- /data/conformance/n_object_with_single_string.json: -------------------------------------------------------------------------------- 1 | { "foo" : "bar", "a" } -------------------------------------------------------------------------------- /data/conformance/n_object_with_trailing_garbage.json: -------------------------------------------------------------------------------- 1 | {"a":"b"}# -------------------------------------------------------------------------------- /data/conformance/n_single_space.json: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/conformance/n_string_1_surrogate_then_escape.json: -------------------------------------------------------------------------------- 1 | ["\uD800\"] -------------------------------------------------------------------------------- /data/conformance/n_string_1_surrogate_then_escape_u.json: -------------------------------------------------------------------------------- 1 | ["\uD800\u"] -------------------------------------------------------------------------------- /data/conformance/n_string_1_surrogate_then_escape_u1.json: -------------------------------------------------------------------------------- 1 | ["\uD800\u1"] -------------------------------------------------------------------------------- /data/conformance/n_string_1_surrogate_then_escape_u1x.json: -------------------------------------------------------------------------------- 1 | ["\uD800\u1x"] -------------------------------------------------------------------------------- /data/conformance/n_string_accentuated_char_no_quotes.json: -------------------------------------------------------------------------------- 1 | [é] -------------------------------------------------------------------------------- /data/conformance/n_string_backslash_00.json: -------------------------------------------------------------------------------- 1 | ["\"] -------------------------------------------------------------------------------- /data/conformance/n_string_escape_x.json: -------------------------------------------------------------------------------- 1 | ["\x00"] -------------------------------------------------------------------------------- /data/conformance/n_string_escaped_backslash_bad.json: -------------------------------------------------------------------------------- 1 | ["\\\"] -------------------------------------------------------------------------------- /data/conformance/n_string_escaped_ctrl_char_tab.json: -------------------------------------------------------------------------------- 1 | ["\ "] -------------------------------------------------------------------------------- /data/conformance/n_string_escaped_emoji.json: -------------------------------------------------------------------------------- 1 | ["\🌀"] -------------------------------------------------------------------------------- /data/conformance/n_string_incomplete_escape.json: -------------------------------------------------------------------------------- 1 | ["\"] -------------------------------------------------------------------------------- /data/conformance/n_string_incomplete_escaped_character.json: -------------------------------------------------------------------------------- 1 | ["\u00A"] -------------------------------------------------------------------------------- /data/conformance/n_string_incomplete_surrogate.json: -------------------------------------------------------------------------------- 1 | ["\uD834\uDd"] -------------------------------------------------------------------------------- /data/conformance/n_string_incomplete_surrogate_escape_invalid.json: -------------------------------------------------------------------------------- 1 | ["\uD800\uD800\x"] -------------------------------------------------------------------------------- /data/conformance/n_string_invalid-utf-8-in-escape.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_string_invalid-utf-8-in-escape.json -------------------------------------------------------------------------------- /data/conformance/n_string_invalid_backslash_esc.json: -------------------------------------------------------------------------------- 1 | ["\a"] -------------------------------------------------------------------------------- /data/conformance/n_string_invalid_unicode_escape.json: -------------------------------------------------------------------------------- 1 | ["\uqqqq"] -------------------------------------------------------------------------------- /data/conformance/n_string_invalid_utf8_after_escape.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_string_invalid_utf8_after_escape.json -------------------------------------------------------------------------------- /data/conformance/n_string_leading_uescaped_thinspace.json: -------------------------------------------------------------------------------- 1 | [\u0020"asd"] -------------------------------------------------------------------------------- /data/conformance/n_string_no_quotes_with_bad_escape.json: -------------------------------------------------------------------------------- 1 | [\n] -------------------------------------------------------------------------------- /data/conformance/n_string_single_doublequote.json: -------------------------------------------------------------------------------- 1 | " -------------------------------------------------------------------------------- /data/conformance/n_string_single_quote.json: -------------------------------------------------------------------------------- 1 | ['single quote'] -------------------------------------------------------------------------------- /data/conformance/n_string_single_string_no_double_quotes.json: -------------------------------------------------------------------------------- 1 | abc -------------------------------------------------------------------------------- /data/conformance/n_string_start_escape_unclosed.json: -------------------------------------------------------------------------------- 1 | ["\ -------------------------------------------------------------------------------- /data/conformance/n_string_unescaped_ctrl_char.json: -------------------------------------------------------------------------------- 1 | ["aa"] -------------------------------------------------------------------------------- /data/conformance/n_string_unescaped_newline.json: -------------------------------------------------------------------------------- 1 | ["new 2 | line"] -------------------------------------------------------------------------------- /data/conformance/n_string_unescaped_tab.json: -------------------------------------------------------------------------------- 1 | [" "] -------------------------------------------------------------------------------- /data/conformance/n_string_unicode_CapitalU.json: -------------------------------------------------------------------------------- 1 | "\UA66D" -------------------------------------------------------------------------------- /data/conformance/n_string_with_trailing_garbage.json: -------------------------------------------------------------------------------- 1 | ""x -------------------------------------------------------------------------------- /data/conformance/n_structure_U+2060_word_joined.json: -------------------------------------------------------------------------------- 1 | [⁠] -------------------------------------------------------------------------------- /data/conformance/n_structure_UTF8_BOM_no_data.json: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /data/conformance/n_structure_angle_bracket_..json: -------------------------------------------------------------------------------- 1 | <.> -------------------------------------------------------------------------------- /data/conformance/n_structure_angle_bracket_null.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /data/conformance/n_structure_array_trailing_garbage.json: -------------------------------------------------------------------------------- 1 | [1]x -------------------------------------------------------------------------------- /data/conformance/n_structure_array_with_extra_array_close.json: -------------------------------------------------------------------------------- 1 | [1]] -------------------------------------------------------------------------------- /data/conformance/n_structure_array_with_unclosed_string.json: -------------------------------------------------------------------------------- 1 | ["asd] -------------------------------------------------------------------------------- /data/conformance/n_structure_ascii-unicode-identifier.json: -------------------------------------------------------------------------------- 1 | aå -------------------------------------------------------------------------------- /data/conformance/n_structure_capitalized_True.json: -------------------------------------------------------------------------------- 1 | [True] -------------------------------------------------------------------------------- /data/conformance/n_structure_close_unopened_array.json: -------------------------------------------------------------------------------- 1 | 1] -------------------------------------------------------------------------------- /data/conformance/n_structure_comma_instead_of_closing_brace.json: -------------------------------------------------------------------------------- 1 | {"x": true, -------------------------------------------------------------------------------- /data/conformance/n_structure_double_array.json: -------------------------------------------------------------------------------- 1 | [][] -------------------------------------------------------------------------------- /data/conformance/n_structure_end_array.json: -------------------------------------------------------------------------------- 1 | ] -------------------------------------------------------------------------------- /data/conformance/n_structure_incomplete_UTF8_BOM.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_structure_incomplete_UTF8_BOM.json -------------------------------------------------------------------------------- /data/conformance/n_structure_lone-invalid-utf-8.json: -------------------------------------------------------------------------------- 1 | � -------------------------------------------------------------------------------- /data/conformance/n_structure_lone-open-bracket.json: -------------------------------------------------------------------------------- 1 | [ -------------------------------------------------------------------------------- /data/conformance/n_structure_no_data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/data/conformance/n_structure_no_data.json -------------------------------------------------------------------------------- /data/conformance/n_structure_null-byte-outside-string.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /data/conformance/n_structure_number_with_trailing_garbage.json: -------------------------------------------------------------------------------- 1 | 2@ -------------------------------------------------------------------------------- /data/conformance/n_structure_object_followed_by_closing_object.json: -------------------------------------------------------------------------------- 1 | {}} -------------------------------------------------------------------------------- /data/conformance/n_structure_object_unclosed_no_value.json: -------------------------------------------------------------------------------- 1 | {"": -------------------------------------------------------------------------------- /data/conformance/n_structure_object_with_comment.json: -------------------------------------------------------------------------------- 1 | {"a":/*comment*/"b"} -------------------------------------------------------------------------------- /data/conformance/n_structure_object_with_trailing_garbage.json: -------------------------------------------------------------------------------- 1 | {"a": true} "x" -------------------------------------------------------------------------------- /data/conformance/n_structure_open_array_apostrophe.json: -------------------------------------------------------------------------------- 1 | [' -------------------------------------------------------------------------------- /data/conformance/n_structure_open_array_comma.json: -------------------------------------------------------------------------------- 1 | [, -------------------------------------------------------------------------------- /data/conformance/n_structure_open_array_open_object.json: -------------------------------------------------------------------------------- 1 | [{ -------------------------------------------------------------------------------- /data/conformance/n_structure_open_array_open_string.json: -------------------------------------------------------------------------------- 1 | ["a -------------------------------------------------------------------------------- /data/conformance/n_structure_open_array_string.json: -------------------------------------------------------------------------------- 1 | ["a" -------------------------------------------------------------------------------- /data/conformance/n_structure_open_object.json: -------------------------------------------------------------------------------- 1 | { -------------------------------------------------------------------------------- /data/conformance/n_structure_open_object_close_array.json: -------------------------------------------------------------------------------- 1 | {] -------------------------------------------------------------------------------- /data/conformance/n_structure_open_object_comma.json: -------------------------------------------------------------------------------- 1 | {, -------------------------------------------------------------------------------- /data/conformance/n_structure_open_object_open_array.json: -------------------------------------------------------------------------------- 1 | {[ -------------------------------------------------------------------------------- /data/conformance/n_structure_open_object_open_string.json: -------------------------------------------------------------------------------- 1 | {"a -------------------------------------------------------------------------------- /data/conformance/n_structure_open_object_string_with_apostrophes.json: -------------------------------------------------------------------------------- 1 | {'a' -------------------------------------------------------------------------------- /data/conformance/n_structure_open_open.json: -------------------------------------------------------------------------------- 1 | ["\{["\{["\{["\{ -------------------------------------------------------------------------------- /data/conformance/n_structure_single_eacute.json: -------------------------------------------------------------------------------- 1 | � -------------------------------------------------------------------------------- /data/conformance/n_structure_single_star.json: -------------------------------------------------------------------------------- 1 | * -------------------------------------------------------------------------------- /data/conformance/n_structure_trailing_#.json: -------------------------------------------------------------------------------- 1 | {"a":"b"}#{} -------------------------------------------------------------------------------- /data/conformance/n_structure_uescaped_LF_before_string.json: -------------------------------------------------------------------------------- 1 | [\u000A""] -------------------------------------------------------------------------------- /data/conformance/n_structure_unclosed_array.json: -------------------------------------------------------------------------------- 1 | [1 -------------------------------------------------------------------------------- /data/conformance/n_structure_unclosed_array_partial_null.json: -------------------------------------------------------------------------------- 1 | [ false, nul -------------------------------------------------------------------------------- /data/conformance/n_structure_unclosed_array_unfinished_false.json: -------------------------------------------------------------------------------- 1 | [ true, fals -------------------------------------------------------------------------------- /data/conformance/n_structure_unclosed_array_unfinished_true.json: -------------------------------------------------------------------------------- 1 | [ false, tru -------------------------------------------------------------------------------- /data/conformance/n_structure_unclosed_object.json: -------------------------------------------------------------------------------- 1 | {"asd":"asd" -------------------------------------------------------------------------------- /data/conformance/n_structure_unicode-identifier.json: -------------------------------------------------------------------------------- 1 | å -------------------------------------------------------------------------------- /data/conformance/n_structure_whitespace_U+2060_word_joiner.json: -------------------------------------------------------------------------------- 1 | [⁠] -------------------------------------------------------------------------------- /data/conformance/n_structure_whitespace_formfeed.json: -------------------------------------------------------------------------------- 1 | [ ] -------------------------------------------------------------------------------- /data/conformance/readme.txt: -------------------------------------------------------------------------------- 1 | Conformance tests from https://github.com/nst/JSONTestSuite 2 | -------------------------------------------------------------------------------- /data/conformance/y_array_arraysWithSpaces.json: -------------------------------------------------------------------------------- 1 | [[] ] -------------------------------------------------------------------------------- /data/conformance/y_array_empty-string.json: -------------------------------------------------------------------------------- 1 | [""] -------------------------------------------------------------------------------- /data/conformance/y_array_empty.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /data/conformance/y_array_ending_with_newline.json: -------------------------------------------------------------------------------- 1 | ["a"] -------------------------------------------------------------------------------- /data/conformance/y_array_false.json: -------------------------------------------------------------------------------- 1 | [false] -------------------------------------------------------------------------------- /data/conformance/y_array_heterogeneous.json: -------------------------------------------------------------------------------- 1 | [null, 1, "1", {}] -------------------------------------------------------------------------------- /data/conformance/y_array_null.json: -------------------------------------------------------------------------------- 1 | [null] -------------------------------------------------------------------------------- /data/conformance/y_array_with_1_and_newline.json: -------------------------------------------------------------------------------- 1 | [1 2 | ] -------------------------------------------------------------------------------- /data/conformance/y_array_with_leading_space.json: -------------------------------------------------------------------------------- 1 | [1] -------------------------------------------------------------------------------- /data/conformance/y_array_with_several_null.json: -------------------------------------------------------------------------------- 1 | [1,null,null,null,2] -------------------------------------------------------------------------------- /data/conformance/y_array_with_trailing_space.json: -------------------------------------------------------------------------------- 1 | [2] -------------------------------------------------------------------------------- /data/conformance/y_number.json: -------------------------------------------------------------------------------- 1 | [123e65] -------------------------------------------------------------------------------- /data/conformance/y_number_0e+1.json: -------------------------------------------------------------------------------- 1 | [0e+1] -------------------------------------------------------------------------------- /data/conformance/y_number_0e1.json: -------------------------------------------------------------------------------- 1 | [0e1] -------------------------------------------------------------------------------- /data/conformance/y_number_after_space.json: -------------------------------------------------------------------------------- 1 | [ 4] -------------------------------------------------------------------------------- /data/conformance/y_number_double_close_to_zero.json: -------------------------------------------------------------------------------- 1 | [-0.000000000000000000000000000000000000000000000000000000000000000000000000000001] 2 | -------------------------------------------------------------------------------- /data/conformance/y_number_int_with_exp.json: -------------------------------------------------------------------------------- 1 | [20e1] -------------------------------------------------------------------------------- /data/conformance/y_number_minus_zero.json: -------------------------------------------------------------------------------- 1 | [-0] -------------------------------------------------------------------------------- /data/conformance/y_number_negative_int.json: -------------------------------------------------------------------------------- 1 | [-123] -------------------------------------------------------------------------------- /data/conformance/y_number_negative_one.json: -------------------------------------------------------------------------------- 1 | [-1] -------------------------------------------------------------------------------- /data/conformance/y_number_negative_zero.json: -------------------------------------------------------------------------------- 1 | [-0] -------------------------------------------------------------------------------- /data/conformance/y_number_real_capital_e.json: -------------------------------------------------------------------------------- 1 | [1E22] -------------------------------------------------------------------------------- /data/conformance/y_number_real_capital_e_neg_exp.json: -------------------------------------------------------------------------------- 1 | [1E-2] -------------------------------------------------------------------------------- /data/conformance/y_number_real_capital_e_pos_exp.json: -------------------------------------------------------------------------------- 1 | [1E+2] -------------------------------------------------------------------------------- /data/conformance/y_number_real_exponent.json: -------------------------------------------------------------------------------- 1 | [123e45] -------------------------------------------------------------------------------- /data/conformance/y_number_real_fraction_exponent.json: -------------------------------------------------------------------------------- 1 | [123.456e78] -------------------------------------------------------------------------------- /data/conformance/y_number_real_neg_exp.json: -------------------------------------------------------------------------------- 1 | [1e-2] -------------------------------------------------------------------------------- /data/conformance/y_number_real_pos_exponent.json: -------------------------------------------------------------------------------- 1 | [1e+2] -------------------------------------------------------------------------------- /data/conformance/y_number_simple_int.json: -------------------------------------------------------------------------------- 1 | [123] -------------------------------------------------------------------------------- /data/conformance/y_number_simple_real.json: -------------------------------------------------------------------------------- 1 | [123.456789] -------------------------------------------------------------------------------- /data/conformance/y_object.json: -------------------------------------------------------------------------------- 1 | {"asd":"sdf", "dfg":"fgh"} -------------------------------------------------------------------------------- /data/conformance/y_object_basic.json: -------------------------------------------------------------------------------- 1 | {"asd":"sdf"} -------------------------------------------------------------------------------- /data/conformance/y_object_duplicated_key.json: -------------------------------------------------------------------------------- 1 | {"a":"b","a":"c"} -------------------------------------------------------------------------------- /data/conformance/y_object_duplicated_key_and_value.json: -------------------------------------------------------------------------------- 1 | {"a":"b","a":"b"} -------------------------------------------------------------------------------- /data/conformance/y_object_empty.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /data/conformance/y_object_empty_key.json: -------------------------------------------------------------------------------- 1 | {"":0} -------------------------------------------------------------------------------- /data/conformance/y_object_escaped_null_in_key.json: -------------------------------------------------------------------------------- 1 | {"foo\u0000bar": 42} -------------------------------------------------------------------------------- /data/conformance/y_object_extreme_numbers.json: -------------------------------------------------------------------------------- 1 | { "min": -1.0e+28, "max": 1.0e+28 } -------------------------------------------------------------------------------- /data/conformance/y_object_long_strings.json: -------------------------------------------------------------------------------- 1 | {"x":[{"id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}], "id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"} -------------------------------------------------------------------------------- /data/conformance/y_object_simple.json: -------------------------------------------------------------------------------- 1 | {"a":[]} -------------------------------------------------------------------------------- /data/conformance/y_object_string_unicode.json: -------------------------------------------------------------------------------- 1 | {"title":"\u041f\u043e\u043b\u0442\u043e\u0440\u0430 \u0417\u0435\u043c\u043b\u0435\u043a\u043e\u043f\u0430" } -------------------------------------------------------------------------------- /data/conformance/y_object_with_newlines.json: -------------------------------------------------------------------------------- 1 | { 2 | "a": "b" 3 | } -------------------------------------------------------------------------------- /data/conformance/y_string_1_2_3_bytes_UTF-8_sequences.json: -------------------------------------------------------------------------------- 1 | ["\u0060\u012a\u12AB"] -------------------------------------------------------------------------------- /data/conformance/y_string_accepted_surrogate_pair.json: -------------------------------------------------------------------------------- 1 | ["\uD801\udc37"] -------------------------------------------------------------------------------- /data/conformance/y_string_accepted_surrogate_pairs.json: -------------------------------------------------------------------------------- 1 | ["\ud83d\ude39\ud83d\udc8d"] -------------------------------------------------------------------------------- /data/conformance/y_string_allowed_escapes.json: -------------------------------------------------------------------------------- 1 | ["\"\\\/\b\f\n\r\t"] -------------------------------------------------------------------------------- /data/conformance/y_string_backslash_and_u_escaped_zero.json: -------------------------------------------------------------------------------- 1 | ["\\u0000"] -------------------------------------------------------------------------------- /data/conformance/y_string_backslash_doublequotes.json: -------------------------------------------------------------------------------- 1 | ["\""] -------------------------------------------------------------------------------- /data/conformance/y_string_comments.json: -------------------------------------------------------------------------------- 1 | ["a/*b*/c/*d//e"] -------------------------------------------------------------------------------- /data/conformance/y_string_double_escape_a.json: -------------------------------------------------------------------------------- 1 | ["\\a"] -------------------------------------------------------------------------------- /data/conformance/y_string_double_escape_n.json: -------------------------------------------------------------------------------- 1 | ["\\n"] -------------------------------------------------------------------------------- /data/conformance/y_string_escaped_control_character.json: -------------------------------------------------------------------------------- 1 | ["\u0012"] -------------------------------------------------------------------------------- /data/conformance/y_string_escaped_noncharacter.json: -------------------------------------------------------------------------------- 1 | ["\uFFFF"] -------------------------------------------------------------------------------- /data/conformance/y_string_in_array.json: -------------------------------------------------------------------------------- 1 | ["asd"] -------------------------------------------------------------------------------- /data/conformance/y_string_in_array_with_leading_space.json: -------------------------------------------------------------------------------- 1 | [ "asd"] -------------------------------------------------------------------------------- /data/conformance/y_string_last_surrogates_1_and_2.json: -------------------------------------------------------------------------------- 1 | ["\uDBFF\uDFFF"] -------------------------------------------------------------------------------- /data/conformance/y_string_nbsp_uescaped.json: -------------------------------------------------------------------------------- 1 | ["new\u00A0line"] -------------------------------------------------------------------------------- /data/conformance/y_string_nonCharacterInUTF-8_U+10FFFF.json: -------------------------------------------------------------------------------- 1 | ["􏿿"] -------------------------------------------------------------------------------- /data/conformance/y_string_nonCharacterInUTF-8_U+FFFF.json: -------------------------------------------------------------------------------- 1 | ["￿"] -------------------------------------------------------------------------------- /data/conformance/y_string_null_escape.json: -------------------------------------------------------------------------------- 1 | ["\u0000"] -------------------------------------------------------------------------------- /data/conformance/y_string_one-byte-utf-8.json: -------------------------------------------------------------------------------- 1 | ["\u002c"] -------------------------------------------------------------------------------- /data/conformance/y_string_pi.json: -------------------------------------------------------------------------------- 1 | ["π"] -------------------------------------------------------------------------------- /data/conformance/y_string_reservedCharacterInUTF-8_U+1BFFF.json: -------------------------------------------------------------------------------- 1 | ["𛿿"] -------------------------------------------------------------------------------- /data/conformance/y_string_simple_ascii.json: -------------------------------------------------------------------------------- 1 | ["asd "] -------------------------------------------------------------------------------- /data/conformance/y_string_space.json: -------------------------------------------------------------------------------- 1 | " " -------------------------------------------------------------------------------- /data/conformance/y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json: -------------------------------------------------------------------------------- 1 | ["\uD834\uDd1e"] -------------------------------------------------------------------------------- /data/conformance/y_string_three-byte-utf-8.json: -------------------------------------------------------------------------------- 1 | ["\u0821"] -------------------------------------------------------------------------------- /data/conformance/y_string_two-byte-utf-8.json: -------------------------------------------------------------------------------- 1 | ["\u0123"] -------------------------------------------------------------------------------- /data/conformance/y_string_u+2028_line_sep.json: -------------------------------------------------------------------------------- 1 | ["
"] -------------------------------------------------------------------------------- /data/conformance/y_string_u+2029_par_sep.json: -------------------------------------------------------------------------------- 1 | ["
"] -------------------------------------------------------------------------------- /data/conformance/y_string_uEscape.json: -------------------------------------------------------------------------------- 1 | ["\u0061\u30af\u30EA\u30b9"] -------------------------------------------------------------------------------- /data/conformance/y_string_uescaped_newline.json: -------------------------------------------------------------------------------- 1 | ["new\u000Aline"] -------------------------------------------------------------------------------- /data/conformance/y_string_unescaped_char_delete.json: -------------------------------------------------------------------------------- 1 | [""] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode.json: -------------------------------------------------------------------------------- 1 | ["\uA66D"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicodeEscapedBackslash.json: -------------------------------------------------------------------------------- 1 | ["\u005C"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_2.json: -------------------------------------------------------------------------------- 1 | ["⍂㈴⍂"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_U+10FFFE_nonchar.json: -------------------------------------------------------------------------------- 1 | ["\uDBFF\uDFFE"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_U+1FFFE_nonchar.json: -------------------------------------------------------------------------------- 1 | ["\uD83F\uDFFE"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json: -------------------------------------------------------------------------------- 1 | ["\u200B"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_U+2064_invisible_plus.json: -------------------------------------------------------------------------------- 1 | ["\u2064"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_U+FDD0_nonchar.json: -------------------------------------------------------------------------------- 1 | ["\uFDD0"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_U+FFFE_nonchar.json: -------------------------------------------------------------------------------- 1 | ["\uFFFE"] -------------------------------------------------------------------------------- /data/conformance/y_string_unicode_escaped_double_quote.json: -------------------------------------------------------------------------------- 1 | ["\u0022"] -------------------------------------------------------------------------------- /data/conformance/y_string_utf8.json: -------------------------------------------------------------------------------- 1 | ["€𝄞"] -------------------------------------------------------------------------------- /data/conformance/y_string_with_del_character.json: -------------------------------------------------------------------------------- 1 | ["aa"] -------------------------------------------------------------------------------- /data/conformance/y_structure_lonely_false.json: -------------------------------------------------------------------------------- 1 | false -------------------------------------------------------------------------------- /data/conformance/y_structure_lonely_int.json: -------------------------------------------------------------------------------- 1 | 42 -------------------------------------------------------------------------------- /data/conformance/y_structure_lonely_negative_real.json: -------------------------------------------------------------------------------- 1 | -0.1 -------------------------------------------------------------------------------- /data/conformance/y_structure_lonely_null.json: -------------------------------------------------------------------------------- 1 | null -------------------------------------------------------------------------------- /data/conformance/y_structure_lonely_string.json: -------------------------------------------------------------------------------- 1 | "asd" -------------------------------------------------------------------------------- /data/conformance/y_structure_lonely_true.json: -------------------------------------------------------------------------------- 1 | true -------------------------------------------------------------------------------- /data/conformance/y_structure_string_empty.json: -------------------------------------------------------------------------------- 1 | "" -------------------------------------------------------------------------------- /data/conformance/y_structure_trailing_newline.json: -------------------------------------------------------------------------------- 1 | ["a"] 2 | -------------------------------------------------------------------------------- /data/conformance/y_structure_true_in_array.json: -------------------------------------------------------------------------------- 1 | [true] -------------------------------------------------------------------------------- /data/conformance/y_structure_whitespace_array.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /data/invalid.json: -------------------------------------------------------------------------------- 1 | { 2 | "foo" : "bar", 3 | "baz" : [1, 2, 3, -5.238492834e-123, ajlksdjfs] 4 | } 5 | -------------------------------------------------------------------------------- /data/json_demo.json: -------------------------------------------------------------------------------- 1 | { 2 | "foo" : "bar", 3 | "baz" : [1, 2, 3, -5.238492834e-123], 4 | "bar" : { "a": 1, "b":2}, 5 | "blep" : true, 6 | "blap" : false, 7 | "blop" : null 8 | } 9 | -------------------------------------------------------------------------------- /data/regex_demo.txt: -------------------------------------------------------------------------------- 1 | Vext cwm zing jabs fly kurd qoph. 2 | Sphinx of black quartz, judge my vow. 3 | Jackdaws love my big sphinx of quartz. 4 | Pack my box with five dozen liquor jugs. 5 | The quick brown fox jumps over the lazy dog. 6 | -------------------------------------------------------------------------------- /docs/assets/cheese_m.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/assets/cheese_m.jpg -------------------------------------------------------------------------------- /docs/assets/codejar.min.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Minified by jsDelivr using Terser v5.10.0. 3 | * Original file: /npm/codejar@3.6.0/codejar.js 4 | * 5 | * Do NOT use SRI with dynamically generated files! More information: https://www.jsdelivr.com/using-sri-with-dynamic-files 6 | */ 7 | const globalWindow=window;export function CodeJar(t,e,n={}){const o=Object.assign({tab:"\t",indentOn:/[({\[]$/,moveToNewLine:/^[)}\]]/,spellcheck:!1,catchTab:!0,preserveIdent:!0,addClosing:!0,history:!0,window:globalWindow},n),r=o.window,i=r.document;let s,d,l=[],a=[],c=-1,f=!1;t.setAttribute("contenteditable","plaintext-only"),t.setAttribute("spellcheck",o.spellcheck?"true":"false"),t.style.outline="none",t.style.overflowWrap="break-word",t.style.overflowY="auto",t.style.whiteSpace="pre-wrap";let u=!1;e(t),"plaintext-only"!==t.contentEditable&&(u=!0),u&&t.setAttribute("contenteditable","true");const p=L((()=>{const n=T();e(t,n),b(n)}),30);let h=!1;const g=t=>!O(t)&&!x(t)&&"Meta"!==t.key&&"Control"!==t.key&&"Alt"!==t.key&&!t.key.startsWith("Arrow"),y=L((t=>{g(t)&&(C(),h=!1)}),300),N=(e,n)=>{l.push([e,n]),t.addEventListener(e,n)};function T(){const e=K(),n={start:0,end:0,dir:void 0};let{anchorNode:o,anchorOffset:r,focusNode:s,focusOffset:d}=e;if(!o||!s)throw"error1";if(o.nodeType===Node.ELEMENT_NODE){const t=i.createTextNode("");o.insertBefore(t,o.childNodes[r]),o=t,r=0}if(s.nodeType===Node.ELEMENT_NODE){const t=i.createTextNode("");s.insertBefore(t,s.childNodes[d]),s=t,d=0}return v(t,(t=>{if(t===o&&t===s)return n.start+=r,n.end+=d,n.dir=r<=d?"->":"<-","stop";if(t===o){if(n.start+=r,n.dir)return"stop";n.dir="->"}else if(t===s){if(n.end+=d,n.dir)return"stop";n.dir="<-"}t.nodeType===Node.TEXT_NODE&&("->"!=n.dir&&(n.start+=t.nodeValue.length),"<-"!=n.dir&&(n.end+=t.nodeValue.length))})),t.normalize(),n}function b(e){const n=K();let o,r,i=0,s=0;if(e.dir||(e.dir="->"),e.start<0&&(e.start=0),e.end<0&&(e.end=0),"<-"==e.dir){const{start:t,end:n}=e;e.start=n,e.end=t}let d=0;v(t,(t=>{if(t.nodeType!==Node.TEXT_NODE)return;const n=(t.nodeValue||"").length;if(d+n>e.start&&(o||(o=t,i=e.start-d),d+n>e.end))return r=t,s=e.end-d,"stop";d+=n})),o||(o=t,i=t.childNodes.length),r||(r=t,s=t.childNodes.length),"<-"==e.dir&&([o,i,r,s]=[r,s,o,i]),n.setBaseAndExtent(o,i,r,s)}function E(){const e=K().getRangeAt(0),n=i.createRange();return n.selectNodeContents(t),n.setEnd(e.startContainer,e.startOffset),n.toString()}function m(){const e=K().getRangeAt(0),n=i.createRange();return n.selectNodeContents(t),n.setStart(e.endContainer,e.endOffset),n.toString()}function k(t){if(u&&"Enter"===t.key)if(D(t),t.stopPropagation(),""==m()){M("\n ");const t=T();t.start=--t.end,b(t)}else M("\n")}function C(){if(!f)return;const e=t.innerHTML,n=T(),o=a[c];if(o&&o.html===e&&o.pos.start===n.start&&o.pos.end===n.end)return;c++,a[c]={html:e,pos:n},a.splice(c+1);c>300&&(c=300,a.splice(0,1))}function v(t,e){const n=[];t.firstChild&&n.push(t.firstChild);let o=n.pop();for(;o&&"stop"!==e(o);)o.nextSibling&&n.push(o.nextSibling),o.firstChild&&n.push(o.firstChild),o=n.pop()}function w(t){return t.metaKey||t.ctrlKey}function O(t){return w(t)&&!t.shiftKey&&"KeyZ"===t.code}function x(t){return w(t)&&t.shiftKey&&"KeyZ"===t.code}function M(t){t=t.replace(/&/g,"&").replace(//g,">").replace(/"/g,""").replace(/'/g,"'"),i.execCommand("insertHTML",!1,t)}function L(t,e){let n=0;return(...o)=>{clearTimeout(n),n=r.setTimeout((()=>t(...o)),e)}}function S(t){let e=t.length-1;for(;e>=0&&"\n"!==t[e];)e--;e++;let n=e;for(;n{e.defaultPrevented||(d=A(),o.preserveIdent?function(t){if("Enter"===t.key){const e=E(),n=m();let[r]=S(e),i=r;if(o.indentOn.test(e)&&(i+=o.tab),i.length>0?(D(t),t.stopPropagation(),M("\n"+i)):k(t),i!==r&&o.moveToNewLine.test(n)){const t=T();M("\n"+r),b(t)}}}(e):k(e),o.catchTab&&function(t){if("Tab"===t.key)if(D(t),t.shiftKey){const t=E();let[e,n]=S(t);if(e.length>0){const t=T(),r=Math.min(o.tab.length,e.length);b({start:n,end:n+r}),i.execCommand("delete"),t.start-=r,t.end-=r,b(t)}}else M(o.tab)}(e),o.addClosing&&function(t){const e="([{'\"",n=")]}'\"",o=m(),r=E(),i="\\"===r.substr(r.length-1),s=o.substr(0,1);if(n.includes(t.key)&&!i&&s===t.key){const e=T();D(t),e.start=++e.end,b(e)}else if(e.includes(t.key)&&!i&&("\"'".includes(t.key)||[""," ","\n"].includes(s))){D(t);const o=T(),r=o.start==o.end?"":K().toString();M(t.key+r+n[e.indexOf(t.key)]),o.start++,o.end++,b(o)}}(e),o.history&&(!function(e){if(O(e)){D(e),c--;const n=a[c];n&&(t.innerHTML=n.html,b(n.pos)),c<0&&(c=0)}if(x(e)){D(e),c++;const n=a[c];n&&(t.innerHTML=n.html,b(n.pos)),c>=a.length&&c--}}(e),g(e)&&!h&&(C(),h=!0)),u&&b(T()))})),N("keyup",(t=>{t.defaultPrevented||t.isComposing||(d!==A()&&p(),y(t),s&&s(A()))})),N("focus",(t=>{f=!0})),N("blur",(t=>{f=!1})),N("paste",(n=>{C(),function(n){D(n);const o=(n.originalEvent||n).clipboardData.getData("text/plain").replace(/\r/g,""),r=T();M(o),e(t),b({start:Math.min(r.start,r.end)+o.length,end:Math.min(r.start,r.end)+o.length,dir:"<-"})}(n),C(),s&&s(A())})),{updateOptions(t){Object.assign(o,t)},updateCode(n){t.textContent=n,e(t)},onUpdate(t){s=t},toString:A,save:T,restore:b,recordHistory:C,destroy(){for(let[e,n]of l)t.removeEventListener(e,n)}}} 8 | //# sourceMappingURL=/sm/96f81945d5ecf71ee083a671465d6081d317ddc1afb05de3cce12b31422fa964.map -------------------------------------------------------------------------------- /docs/assets/drawdown.js: -------------------------------------------------------------------------------- 1 | /** 2 | * drawdown.js 3 | * (c) Adam Leggett 4 | */ 5 | 6 | 7 | ;function markdown(src) { 8 | 9 | var rx_lt = //g; 11 | var rx_space = /\t|\r|\uf8ff/g; 12 | var rx_escape = /\\([\\\|`*_{}\[\]()#+\-~])/g; 13 | var rx_hr = /^([*\-=_] *){3,}$/gm; 14 | var rx_blockquote = /\n *> *([^]*?)(?=(\n|$){2})/g; 15 | var rx_list = /\n( *)(?:[*\-+]|((\d+)|([a-z])|[A-Z])[.)]) +([^]*?)(?=(\n|$){2})/g; 16 | var rx_listjoin = /<\/(ol|ul)>\n\n<\1>/g; 17 | var rx_highlight = /(^|[^A-Za-z\d\\])(([*_])|(~)|(\^)|(--)|(\+\+)|`)(\2?)([^<]*?)\2\8(?!\2)(?=\W|_|$)/g; 18 | var rx_code = /\n((```|~~~).*\n?([^]*?)\n?\2|(( .*?\n)+))/g; 19 | var rx_link = /((!?)\[(.*?)\]\((.*?)( ".*")?\)|\\([\\`*_{}\[\]()#+\-.!~]))/g; 20 | var rx_table = /\n(( *\|.*?\| *\n)+)/g; 21 | var rx_thead = /^.*\n( *\|( *\:?-+\:?-+\:? *\|)* *\n|)/; 22 | var rx_row = /.*\n/g; 23 | var rx_cell = /\||(.*?[^\\])\|/g; 24 | var rx_heading = /(?=^|>|\n)([>\s]*?)(#{1,6}) (.*?)( #*)? *(?=\n|$)/g; 25 | var rx_para = /(?=^|>|\n)\s*\n+([^<]+?)\n+\s*(?=\n|<|$)/g; 26 | var rx_stash = /-\d+\uf8ff/g; 27 | 28 | function replace(rex, fn) { 29 | src = src.replace(rex, fn); 30 | } 31 | 32 | function element(tag, content) { 33 | return '<' + tag + '>' + content + ''; 34 | } 35 | 36 | function blockquote(src) { 37 | return src.replace(rx_blockquote, function(all, content) { 38 | return element('blockquote', blockquote(highlight(content.replace(/^ *> */gm, '')))); 39 | }); 40 | } 41 | 42 | function list(src) { 43 | return src.replace(rx_list, function(all, ind, ol, num, low, content) { 44 | var entry = element('li', highlight(content.split( 45 | RegExp('\n ?' + ind + '(?:(?:\\d+|[a-zA-Z])[.)]|[*\\-+]) +', 'g')).map(list).join('
  • '))); 46 | 47 | return '\n' + (ol 48 | ? '
      ' 50 | : parseInt(ol,36) - 9 + '" style="list-style-type:' + (low ? 'low' : 'upp') + 'er-alpha">') + entry + '
    ' 51 | : element('ul', entry)); 52 | }); 53 | } 54 | 55 | function highlight(src) { 56 | return src.replace(rx_highlight, function(all, _, p1, emp, sub, sup, small, big, p2, content) { 57 | return _ + element( 58 | emp ? (p2 ? 'strong' : 'em') 59 | : sub ? (p2 ? 's' : 'sub') 60 | : sup ? 'sup' 61 | : small ? 'small' 62 | : big ? 'big' 63 | : 'code', 64 | highlight(content)); 65 | }); 66 | } 67 | 68 | function unesc(str) { 69 | return str.replace(rx_escape, '$1'); 70 | } 71 | 72 | var stash = []; 73 | var si = 0; 74 | 75 | src = '\n' + src + '\n'; 76 | 77 | replace(rx_lt, '<'); 78 | replace(rx_gt, '>'); 79 | replace(rx_space, ' '); 80 | 81 | // blockquote 82 | src = blockquote(src); 83 | 84 | // horizontal rule 85 | replace(rx_hr, '
    '); 86 | 87 | // list 88 | src = list(src); 89 | replace(rx_listjoin, ''); 90 | 91 | // code 92 | replace(rx_code, function(all, p1, p2, p3, p4) { 93 | stash[--si] = element('pre', element('code', p3||p4.replace(/^ /gm, ''))); 94 | return si + '\uf8ff'; 95 | }); 96 | 97 | // link or image 98 | replace(rx_link, function(all, p1, p2, p3, p4, p5, p6) { 99 | stash[--si] = p4 100 | ? p2 101 | ? '' + p3 + '' 102 | : '' + unesc(highlight(p3)) + '' 103 | : p6; 104 | return si + '\uf8ff'; 105 | }); 106 | 107 | // table 108 | replace(rx_table, function(all, table) { 109 | var sep = table.match(rx_thead)[1]; 110 | return '\n' + element('table', 111 | table.replace(rx_row, function(row, ri) { 112 | return row == sep ? '' : element('tr', row.replace(rx_cell, function(all, cell, ci) { 113 | return ci ? element(sep && !ri ? 'th' : 'td', unesc(highlight(cell || ''))) : '' 114 | })) 115 | }) 116 | ) 117 | }); 118 | 119 | // heading 120 | replace(rx_heading, function(all, _, p1, p2) { return _ + element('h' + p1.length, unesc(highlight(p2))) }); 121 | 122 | // paragraph 123 | replace(rx_para, function(all, content) { return element('p', unesc(highlight(content))) }); 124 | 125 | // stash 126 | replace(rx_stash, function(all) { return stash[parseInt(all)] }); 127 | 128 | return src.trim(); 129 | }; 130 | -------------------------------------------------------------------------------- /docs/assets/vs2015.min.css: -------------------------------------------------------------------------------- 1 | .hljs { 2 | display: block; 3 | overflow-x: auto; 4 | /*padding: .5em;*/ 5 | background: #1E1E1E; 6 | color: #DCDCDC 7 | } 8 | 9 | .hljs-keyword, 10 | .hljs-literal, 11 | .hljs-symbol, 12 | .hljs-name { 13 | color: #569CD6 14 | } 15 | 16 | .hljs-link { 17 | color: #569CD6; 18 | text-decoration: underline 19 | } 20 | 21 | .hljs-built_in, 22 | .hljs-type { 23 | color: #4EC9B0 24 | } 25 | 26 | .hljs-number, 27 | .hljs-class { 28 | color: #B8D7A3 29 | } 30 | 31 | .hljs-string, 32 | .hljs-meta-string { 33 | color: #D69D85 34 | } 35 | 36 | .hljs-regexp, 37 | .hljs-template-tag { 38 | color: #9A5334 39 | } 40 | 41 | .hljs-subst, 42 | .hljs-function, 43 | .hljs-title, 44 | .hljs-params, 45 | .hljs-formula { 46 | color: #DCDCDC 47 | } 48 | 49 | .hljs-comment, 50 | .hljs-quote { 51 | color: #57A64A; 52 | font-style: italic 53 | } 54 | 55 | .hljs-doctag { 56 | color: #608B4E 57 | } 58 | 59 | .hljs-meta, 60 | .hljs-meta-keyword, 61 | .hljs-tag { 62 | color: #9B9B9B 63 | } 64 | 65 | .hljs-variable, 66 | .hljs-template-variable { 67 | color: #BD63C5 68 | } 69 | 70 | .hljs-attr, 71 | .hljs-attribute, 72 | .hljs-builtin-name { 73 | color: #9CDCFE 74 | } 75 | 76 | .hljs-section { 77 | color: gold 78 | } 79 | 80 | .hljs-emphasis { 81 | font-style: italic 82 | } 83 | 84 | .hljs-strong { 85 | font-weight: bold 86 | } 87 | 88 | .hljs-bullet, 89 | .hljs-selector-tag, 90 | .hljs-selector-id, 91 | .hljs-selector-class, 92 | .hljs-selector-attr, 93 | .hljs-selector-pseudo { 94 | color: #D7BA7D 95 | } 96 | 97 | .hljs-addition { 98 | background-color: #144212; 99 | display: inline-block; 100 | width: 100% 101 | } 102 | 103 | .hljs-deletion { 104 | background-color: #600; 105 | display: inline-block; 106 | width: 100% 107 | } 108 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Matcheroni & Parseroni 13 | 14 | 15 | 16 | 17 | 18 | 19 |
    20 | 21 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /docs/size_notes.txt: -------------------------------------------------------------------------------- 1 | Matcheroni JSON matching/parsing benchmark 2 | 3 | ---------------------------------------- 4 | Byte total 4609770.000000 5 | Line total 65959.000000 6 | Match time 3.492900 7 | Parse time 4.609600 8 | Match byte rate 1319.754359 megabytes per second 9 | Match line rate 18.883736 megalines per second 10 | Parse byte rate 1000.036880 megabytes per second 11 | Parse line rate 14.309051 megalines per second 12 | 13 | 14 | optimized build of json_benchmark with only matching turned on produces a single 15 | match function that is 3524 bytes 16 | 17 | json_benchmark with only parse - 18 | 1644 recycle() 19 | 1836 some match 20 | 5787 some match 21 | 22 | 23 | -O3 -flto + size bin + minimized ifdef ranges: 24 | both 22165 25 | match 12429 26 | parse 18747 27 | none 8628 28 | 29 | parse 9736 30 | match 3418 31 | common 383 32 | 33 | 34 | 35 | both 22726 36 | match 12818 37 | parse 19024 38 | none 8628 39 | 40 | parse 9908 41 | match 3702 42 | common 488 43 | 44 | 45 | 46 | 47 | -O3 -flto + size bin 48 | both = 22695 49 | match = 11469 50 | parse = 17682 51 | none = 4193 52 | 53 | parse = 11226 54 | match = 5013 55 | common = 2263 56 | 57 | -Os -flto + size bin 58 | 59 | both = 15345 60 | match = 10673 61 | parse = 12373 62 | none = 4556 63 | 64 | parse = 4672 65 | match = 2972 66 | common = 3145 67 | 68 | 69 | 70 | 71 | 72 | bah these are ls -al not size bin 73 | 74 | -O3 -flto 75 | 76 | match + parse = 38768 77 | only match = 22288 78 | only parse = 32720 79 | neither = 16280 80 | 81 | parse = 16540 82 | match = 6048 83 | neither+match+parse = 38868 84 | 85 | -Os -flto 86 | 87 | match + parse = 33536 88 | only match = 24360 89 | only parse = 25864 90 | neither = 16632 91 | parse = 9176 92 | match = 7672 93 | neither+match+parse = 33480 94 | 95 | -O3 -noflto 96 | neither 34720 97 | match 39648 98 | parse 39648 99 | both 39648 100 | -------------------------------------------------------------------------------- /docs/top.css: -------------------------------------------------------------------------------- 1 | html { 2 | background-color: #444; 3 | color: #CCC; 4 | font-family: monospace; 5 | font-size: 16px; 6 | } 7 | 8 | body { 9 | background-color:#282828; 10 | margin:auto; 11 | padding: 20px; 12 | width: fit-content; 13 | max-width: 1200px; 14 | } 15 | 16 | /* unvisited link */ 17 | a:link { 18 | color: #CCF; 19 | } 20 | 21 | /* visited link */ 22 | a:visited { 23 | color: #DCF; 24 | } 25 | 26 | pre { 27 | max-width:80ch; 28 | margin:auto; 29 | } 30 | 31 | table tr td { font-size: 14px; } 32 | 33 | p { 34 | color: #CCC; 35 | /*text-indent:30px;*/ 36 | margin-left:auto; 37 | margin-right:auto; 38 | /*max-width:800px;*/ 39 | max-width:80ch; 40 | } 41 | 42 | table { 43 | width: 100%; 44 | } 45 | 46 | td { 47 | text-align:center; 48 | } 49 | 50 | .highlight { 51 | color: #FFFFBB; 52 | } 53 | 54 | /******************************************************************************/ 55 | 56 | hr { 57 | color: #444; 58 | background-color: #444; 59 | border-color: #6D6D6D; 60 | border-width: 2px; 61 | } 62 | 63 | pre { 64 | background-color: #222; 65 | padding: 6px; 66 | } 67 | 68 | table, th, td { 69 | border: 1px solid; 70 | border-collapse: collapse; 71 | background-color: #222; 72 | } 73 | 74 | ul li { 75 | width: 80ch; 76 | margin:auto; 77 | } 78 | 79 | th, td { 80 | padding: 6px; 81 | } 82 | 83 | h1, h2, h3, h4 { 84 | color: #CCE; 85 | /*max-width:704px;*/ 86 | width:fit-content; 87 | margin:auto; 88 | } 89 | -------------------------------------------------------------------------------- /docs/tutorial/json_tut0a.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/json_tut0a.wasm -------------------------------------------------------------------------------- /docs/tutorial/json_tut1a.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/json_tut1a.wasm -------------------------------------------------------------------------------- /docs/tutorial/json_tut1b.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/json_tut1b.wasm -------------------------------------------------------------------------------- /docs/tutorial/json_tut1c.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/json_tut1c.wasm -------------------------------------------------------------------------------- /docs/tutorial/json_tut2a.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/json_tut2a.wasm -------------------------------------------------------------------------------- /docs/tutorial/json_tut2b.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/json_tut2b.wasm -------------------------------------------------------------------------------- /docs/tutorial/tiny_c_parser.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/docs/tutorial/tiny_c_parser.wasm -------------------------------------------------------------------------------- /docs/tutorial/tutorial.css: -------------------------------------------------------------------------------- 1 | html { 2 | background-color: #444; 3 | color: #CCC; 4 | font-family: monospace; 5 | font-size: 15px; 6 | } 7 | 8 | body { 9 | background-color: #222; 10 | margin:auto; 11 | padding:20px; 12 | width:fit-content; 13 | max-width: 1600px; 14 | } 15 | 16 | /* unvisited link */ 17 | a:link { 18 | color: #CCF; 19 | } 20 | 21 | /* visited link */ 22 | a:visited { 23 | color: #DCF; 24 | } 25 | 26 | p { 27 | color: #CCC; 28 | text-indent:30px; 29 | margin-left:auto; 30 | margin-right:auto; 31 | max-width:800px; 32 | } 33 | 34 | .highlight { 35 | color: #FFFFBB; 36 | } 37 | 38 | /******************************************************************************/ 39 | 40 | .topbar { 41 | display: flex; 42 | flex-direction: row; 43 | gap: 20px; 44 | justify-content:space-between; 45 | } 46 | 47 | .topbar_title { 48 | display:flex; 49 | align-items: center; 50 | flex-basis:auto; 51 | font-size: 20px; 52 | color: #CBF; 53 | margin:8px; 54 | } 55 | 56 | .topbar_spacer { 57 | display:flex; 58 | align-items: center; 59 | flex-basis:250px; 60 | } 61 | 62 | /******************************************************************************/ 63 | 64 | .divider { 65 | margin:auto; 66 | padding:10px; 67 | background-color: #3B3344; 68 | border: solid #664466 1px; 69 | text-align:center; 70 | font-size:20px; 71 | max-width:800px; 72 | } 73 | 74 | /******************************************************************************/ 75 | 76 | .question { 77 | color: #FAA; 78 | margin:auto; 79 | padding:5px; 80 | padding-left:40px; 81 | padding-right:40px; 82 | background-color: #333333; 83 | border: solid #111 1px; 84 | text-align:center; 85 | font-size:16px; 86 | width:fit-content; 87 | } 88 | 89 | /******************************************************************************/ 90 | 91 | .text_box { 92 | color: #CCC; 93 | display: flex; 94 | background-color: #1e1e1e; 95 | border: solid #444444 1px; 96 | margin:0px; 97 | margin-left: auto; 98 | margin-right: auto; 99 | padding:8px; 100 | width:80ch; 101 | flex-grow: 1; 102 | } 103 | 104 | .code_box { 105 | display: flex; 106 | background-color: #1e1e1e; 107 | border: solid #444444 1px; 108 | margin:0px; 109 | margin-left: auto; 110 | margin-right: auto; 111 | padding:8px; 112 | width:80ch; 113 | flex-grow: 1; 114 | } 115 | 116 | /******************************************************************************/ 117 | 118 | .live_code { 119 | display: flex; 120 | flex-direction: row; 121 | flex-wrap: wrap; 122 | gap: 20px; 123 | justify-content: center; 124 | } 125 | 126 | .left_panel { 127 | max-height:900px; 128 | flex-basis: 1; 129 | flex-grow: 0; 130 | align-items: stretch; 131 | border: solid #444444 1px; 132 | 133 | display: flex; 134 | flex-direction: column; 135 | min-width:85ch; 136 | max-width:85ch; 137 | } 138 | 139 | .right_panel { 140 | max-height:900px; 141 | flex-basis: 1; 142 | flex-grow: 0; 143 | border: solid #444444 1px; 144 | 145 | display: flex; 146 | flex-direction: column; 147 | min-width:85ch; 148 | max-width:85ch; 149 | } 150 | 151 | .header_bar { 152 | flex-basis: 1; 153 | flex-grow: 0; 154 | background-color: #333333; 155 | padding: 8px; 156 | } 157 | 158 | .source_pane { 159 | flex-basis: auto; 160 | flex-grow: 1; 161 | padding:8px; 162 | background-color: #1E1E1E; 163 | /*overflow:scroll;*/ 164 | } 165 | 166 | .spacer { 167 | height: 20px; 168 | } 169 | 170 | .input_pane { 171 | flex-grow: 0; 172 | flex-shrink: 0; 173 | background-color: #1E1E1E; 174 | white-space: pre; 175 | padding:8px; 176 | overflow-y:scroll; 177 | scrollbar-color: #007 #bada55; 178 | max-height:300px; 179 | } 180 | 181 | .output_pane { 182 | flex-grow: 1; 183 | flex-shrink:1; 184 | background-color: #1E1E1E; 185 | white-space: pre; 186 | padding:8px; 187 | overflow-y:scroll; 188 | } 189 | -------------------------------------------------------------------------------- /docs/tutorial/tutorial.js: -------------------------------------------------------------------------------- 1 | console.log("tutorial.js load @ " + performance.now()); 2 | 3 | import { CodeJar } from "../assets/codejar.min.js"; 4 | 5 | const highlight = (editor) => { 6 | editor.textContent = editor.textContent 7 | hljs.highlightElement(editor) 8 | } 9 | 10 | //------------------------------------------------------------------------------ 11 | 12 | class LeftPanel { 13 | constructor(pane_div) { 14 | this.mod = null; 15 | this.panel_div = pane_div; 16 | this.source_header = pane_div.querySelector(".header_bar"); 17 | this.source_pane = pane_div.querySelector(".source_pane"); 18 | this.source_jar = new CodeJar(this.source_pane, highlight, {tab:" "}); 19 | } 20 | 21 | set_mod(mod) { 22 | this.mod = mod; 23 | } 24 | 25 | reload() { 26 | try { 27 | let filename = this.source_header.innerText; 28 | let src_contents = new TextDecoder().decode(this.mod.FS.readFile(filename)); 29 | this.source_jar.updateCode(src_contents); 30 | } catch {} 31 | } 32 | 33 | }; 34 | 35 | //------------------------------------------------------------------------------ 36 | 37 | class RightPanel { 38 | constructor(panel) { 39 | this.mod = null; 40 | this.panel_div = panel; 41 | this.input_header = panel.querySelector(".input_header"); 42 | this.input_pane = panel.querySelector(".input_pane"); 43 | this.input_jar = new CodeJar(this.input_pane, highlight, {tab:" "}); 44 | this.output_header = panel.querySelector(".output_header"); 45 | this.output_pane = panel.querySelector(".output_pane"); 46 | this.fitAddon = null; 47 | } 48 | 49 | set_mod(mod) { 50 | this.mod = mod; 51 | } 52 | 53 | reload() { 54 | try { 55 | let filename = this.input_header.innerText; 56 | let input_contents = new TextDecoder().decode(this.mod.FS.readFile(filename)); 57 | this.input_jar.updateCode(input_contents); 58 | //this.input_pane.innerText = input_contents; 59 | } catch {} 60 | } 61 | }; 62 | 63 | //------------------------------------------------------------------------------ 64 | 65 | window.tutorials = []; 66 | 67 | class Tutorial { 68 | constructor(div) { 69 | this.mod = null; 70 | this.div = div; 71 | this.stdout = ""; 72 | this.stderr = ""; 73 | this.ansi_up = new AnsiUp; 74 | 75 | this.left_panel = new LeftPanel(div.querySelector(".left_panel")); 76 | this.right_panel = new RightPanel(div.querySelector(".right_panel")); 77 | this.compile_timeout = null; 78 | window.tutorials.push(this); 79 | } 80 | 81 | set_mod(mod) { 82 | this.mod = mod; 83 | 84 | this.left_panel.set_mod(mod); 85 | this.right_panel.set_mod(mod); 86 | 87 | this.right_panel.input_pane.oninput = ()=> { 88 | let filename = this.right_panel.input_header.innerText; 89 | let contents = this.right_panel.input_pane.innerText; 90 | this.mod.FS.writeFile(filename, contents); 91 | this.convert(); 92 | }; 93 | 94 | this.left_panel.reload(); 95 | this.right_panel.reload(); 96 | 97 | this.convert(); 98 | } 99 | 100 | convert() { 101 | this.stdout = ""; 102 | this.stderr = ""; 103 | 104 | let args = [this.right_panel.input_header.innerText]; 105 | let ret = this.mod.callMain(args); 106 | 107 | this.right_panel.output_header.style.backgroundColor = ret == 0 ? "#353" : "#533"; 108 | this.right_panel.output_pane.innerHTML = this.ansi_up.ansi_to_html(this.stdout); 109 | } 110 | } 111 | 112 | //------------------------------------------------------------------------------ 113 | 114 | function load_tutorial(name) { 115 | let tutorial = new Tutorial(document.querySelector("#" + name)); 116 | 117 | console.log("Importing " + name + " @ " + performance.now()); 118 | import("./" + name + ".js").then((tutorial_mod) => { 119 | console.log("Initializing " + name + "_mod @ " + performance.now()); 120 | var mod_options = {}; 121 | mod_options.noInitialRun = true; 122 | mod_options.thisProgram = name; 123 | mod_options.print = (text) => { tutorial.stdout = tutorial.stdout + text + "\n"; } 124 | mod_options.printErr = (text) => { tutorial.stderr = tutorial.stderr + text + "\n"; } 125 | mod_options.onRuntimeInitialized = function() { 126 | console.log(name + ".onRuntimeInitialized " + performance.now()); 127 | } 128 | tutorial_mod.default(mod_options).then((mod) => { 129 | console.log("Initializing " + name + " @ " + performance.now()); 130 | tutorial.set_mod(mod); 131 | }); 132 | }); 133 | } 134 | 135 | //------------------------------------------------------------------------------ 136 | 137 | load_tutorial("json_tut0a"); 138 | load_tutorial("json_tut1a"); 139 | load_tutorial("json_tut1b"); 140 | load_tutorial("json_tut1c"); 141 | load_tutorial("json_tut2a"); 142 | load_tutorial("json_tut2b"); 143 | load_tutorial("tiny_c_parser"); 144 | 145 | //------------------------------------------------------------------------------ 146 | 147 | let code_boxes = document.querySelectorAll(".code_box"); 148 | let code_box_jars = []; 149 | 150 | console.log(code_boxes); 151 | 152 | for(let c of code_boxes) { 153 | let jar = new CodeJar(c, highlight, {tab:" "}); 154 | code_box_jars.push(jar); 155 | } 156 | 157 | //------------------------------------------------------------------------------ 158 | -------------------------------------------------------------------------------- /examples/SST.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | //------------------------------------------------------------------------------ 9 | // Sorted string table matcher thing. 10 | 11 | template 12 | struct SST; 13 | 14 | template& table> 15 | struct SST { 16 | 17 | constexpr static size_t top_bit(size_t x) { 18 | for (size_t b = 31; b >= 0; b--) { 19 | if (x & (1 << b)) return (1 << b); 20 | } 21 | return 0; 22 | } 23 | 24 | constexpr static bool contains(const char* text) { 25 | for (auto table_entry : table) { 26 | if (__builtin_strcmp(table_entry, text) == 0) return true; 27 | } 28 | return false; 29 | } 30 | 31 | inline static int strcmp_span(const char* a, const char* b, const char* lit) { 32 | while (1) { 33 | auto ca = a == b ? 0 : *a; 34 | auto cb = *lit; 35 | if (ca != cb || ca == 0) return ca - cb; 36 | a++; 37 | lit++; 38 | } 39 | } 40 | 41 | static const char* match(const char* a, const char* b) { 42 | if (!a || *a == 0) return nullptr; 43 | size_t bit = top_bit(N); 44 | size_t index = 0; 45 | 46 | // I'm not actually sure if 8 is the best tradeoff but it seems OK 47 | if (N > 8) { 48 | // Binary search for large tables 49 | while(1) { 50 | size_t new_index = index | bit; 51 | if (new_index < N) { 52 | auto lit = table[new_index]; 53 | auto c = strcmp_span(a, b, lit); 54 | if (c == 0) return lit; 55 | if (c > 0) index = new_index; 56 | } 57 | if (bit == 0) return nullptr; 58 | bit >>= 1; 59 | } 60 | } 61 | else { 62 | // Linear scan for small tables 63 | for (auto lit : table) { 64 | if (strcmp_span(a, b, lit) == 0) return lit; 65 | } 66 | } 67 | 68 | return nullptr; 69 | } 70 | }; 71 | 72 | //------------------------------------------------------------------------------ 73 | -------------------------------------------------------------------------------- /examples/c_lexer/CLexer.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include "examples/c_lexer/CToken.hpp" 9 | #include "matcheroni/Matcheroni.hpp" 10 | 11 | //------------------------------------------------------------------------------ 12 | 13 | struct CLexer { 14 | CLexer(); 15 | void reset(); 16 | bool lex(matcheroni::TextSpan text); 17 | 18 | std::vector tokens; 19 | }; 20 | 21 | CToken next_lexeme(matcheroni::TextMatchContext& ctx, matcheroni::TextSpan body); 22 | 23 | //------------------------------------------------------------------------------ 24 | -------------------------------------------------------------------------------- /examples/c_lexer/CToken.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "examples/c_lexer/CToken.hpp" 5 | 6 | #include 7 | 8 | using namespace matcheroni; 9 | 10 | //------------------------------------------------------------------------------ 11 | 12 | CToken::CToken(LexemeType type, TextSpan text) { 13 | this->type = type; 14 | this->text.begin = text.begin; 15 | this->text.end = text.end; 16 | } 17 | 18 | //---------------------------------------------------------------------------- 19 | 20 | bool CToken::is_bof() const { 21 | return type == LEX_BOF; 22 | } 23 | 24 | bool CToken::is_eof() const { 25 | return type == LEX_EOF; 26 | } 27 | 28 | bool CToken::is_gap() const { 29 | switch(type) { 30 | case LEX_NEWLINE: 31 | case LEX_SPACE: 32 | case LEX_COMMENT: 33 | case LEX_SPLICE: 34 | case LEX_FORMFEED: 35 | return true; 36 | default: 37 | return false; 38 | } 39 | } 40 | 41 | //---------------------------------------------------------------------------- 42 | 43 | const char* CToken::type_to_str() const { 44 | switch(type) { 45 | case LEX_INVALID : return "LEX_INVALID"; 46 | case LEX_SPACE : return "LEX_SPACE"; 47 | case LEX_NEWLINE : return "LEX_NEWLINE"; 48 | case LEX_STRING : return "LEX_STRING"; 49 | case LEX_KEYWORD : return "LEX_KEYWORD"; 50 | case LEX_IDENTIFIER : return "LEX_IDENTIFIER"; 51 | case LEX_COMMENT : return "LEX_COMMENT"; 52 | case LEX_PREPROC : return "LEX_PREPROC"; 53 | case LEX_FLOAT : return "LEX_FLOAT"; 54 | case LEX_INT : return "LEX_INT"; 55 | case LEX_PUNCT : return "LEX_PUNCT"; 56 | case LEX_CHAR : return "LEX_CHAR"; 57 | case LEX_SPLICE : return "LEX_SPLICE"; 58 | case LEX_FORMFEED : return "LEX_FORMFEED"; 59 | case LEX_BOF : return "LEX_BOF"; 60 | case LEX_EOF : return "LEX_EOF"; 61 | case LEX_LAST : return ""; 62 | } 63 | return ""; 64 | } 65 | 66 | //---------------------------------------------------------------------------- 67 | 68 | uint32_t CToken::type_to_color() const { 69 | switch(type) { 70 | case LEX_INVALID : return 0x0000FF; 71 | case LEX_SPACE : return 0x804040; 72 | case LEX_NEWLINE : return 0x404080; 73 | case LEX_STRING : return 0x4488AA; 74 | case LEX_KEYWORD : return 0x0088FF; 75 | case LEX_IDENTIFIER : return 0xCCCC40; 76 | case LEX_COMMENT : return 0x66AA66; 77 | case LEX_PREPROC : return 0xCC88CC; 78 | case LEX_FLOAT : return 0xFF88AA; 79 | case LEX_INT : return 0xFF8888; 80 | case LEX_PUNCT : return 0x808080; 81 | case LEX_CHAR : return 0x44DDDD; 82 | case LEX_SPLICE : return 0x00CCFF; 83 | case LEX_FORMFEED : return 0xFF00FF; 84 | case LEX_BOF : return 0x80FF80; 85 | case LEX_EOF : return 0x8080FF; 86 | case LEX_LAST : return 0xFF00FF; 87 | } 88 | return 0xFF00FF; 89 | } 90 | 91 | //---------------------------------------------------------------------------- 92 | 93 | void CToken::dump() const { 94 | const int span_len = 20; 95 | std::string dump = ""; 96 | 97 | if (type == LEX_BOF) dump = ""; 98 | if (type == LEX_EOF) dump = ""; 99 | 100 | for (auto c = text.begin; c < text.end; c++) { 101 | if (*c == '\n') dump += "\\n"; 102 | else if (*c == '\t') dump += "\\t"; 103 | else if (*c == '\r') dump += "\\r"; 104 | else dump += *c; 105 | if (dump.size() >= span_len) break; 106 | } 107 | 108 | dump = '`' + dump + '`'; 109 | if (dump.size() > span_len) { 110 | dump.resize(span_len - 4); 111 | dump = dump + "...`"; 112 | } 113 | while (dump.size() < span_len) dump += " "; 114 | 115 | utils::set_color(type_to_color()); 116 | printf("%-14.14s ", type_to_str()); 117 | utils::set_color(0); 118 | printf("%s", dump.c_str()); 119 | utils::set_color(0); 120 | } 121 | 122 | //---------------------------------------------------------------------------- 123 | -------------------------------------------------------------------------------- /examples/c_lexer/CToken.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #pragma once 5 | #include 6 | 7 | #include "examples/c_parser/c_constants.hpp" 8 | #include "matcheroni/Matcheroni.hpp" 9 | #include "matcheroni/Utilities.hpp" 10 | 11 | //------------------------------------------------------------------------------ 12 | 13 | enum LexemeType { 14 | LEX_INVALID = 0, 15 | LEX_SPACE, 16 | LEX_NEWLINE, 17 | LEX_STRING, 18 | LEX_KEYWORD, 19 | LEX_IDENTIFIER, 20 | LEX_COMMENT, 21 | LEX_PREPROC, 22 | LEX_FLOAT, 23 | LEX_INT, 24 | LEX_PUNCT, 25 | LEX_CHAR, 26 | LEX_SPLICE, 27 | LEX_FORMFEED, 28 | LEX_BOF, 29 | LEX_EOF, 30 | LEX_LAST 31 | }; 32 | 33 | //------------------------------------------------------------------------------ 34 | 35 | struct CToken { 36 | CToken(LexemeType type, matcheroni::TextSpan text); 37 | 38 | matcheroni::TextSpan as_text_span() const { return text; } 39 | 40 | bool is_bof() const; 41 | bool is_eof() const; 42 | bool is_gap() const; 43 | 44 | const char* type_to_str() const; 45 | uint32_t type_to_color() const; 46 | void dump() const; 47 | 48 | //---------------------------------------- 49 | 50 | LexemeType type; 51 | matcheroni::TextSpan text; 52 | }; 53 | 54 | //------------------------------------------------------------------------------ 55 | -------------------------------------------------------------------------------- /examples/c_lexer/build.hancho: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Matcheroni C lexer demo 3 | 4 | lib = hancho( 5 | hancho.base_rules.cpp_lib, 6 | in_srcs = ["CLexer.cpp", "CToken.cpp"], 7 | out_lib = "c_lexer.a", 8 | ) 9 | -------------------------------------------------------------------------------- /examples/c_lexer/c_lexer_test.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "matcheroni/Utilities.hpp" 5 | 6 | #include "examples/c_lexer/CLexer.hpp" 7 | 8 | using namespace matcheroni; 9 | 10 | //------------------------------------------------------------------------------ 11 | // FIXME - Is there anything we need to cover here that isn't already covered 12 | // by matcheroni_test? 13 | 14 | // FIXME - yeah we need a raw string backreference test :P 15 | 16 | /* 17 | void test_match_string() { 18 | const char* text1 = "asdfasdf \"Hello World\" 123456789"; 19 | matcheroni_assert("\"Hello World\"" == get_first_match(text1, match_string)); 20 | 21 | const char* text2 = "asdfasdf \"Hello\nWorld\" 123456789"; 22 | matcheroni_assert("\"Hello\nWorld\"" == get_first_match(text2, match_string)); 23 | 24 | const char* text3 = "asdfasdf \"Hello\\\"World\" 123456789"; 25 | matcheroni_assert("\"Hello\\\"World\"" == get_first_match(text3, match_string)); 26 | 27 | const char* text4 = "asdfasdf \"Hello\\\\World\" 123456789"; 28 | matcheroni_assert("\"Hello\\\\World\"" == get_first_match(text4, match_string)); 29 | 30 | printf("test_match_string() pass\n"); 31 | } 32 | */ 33 | 34 | const char* some_text = 35 | R"( 36 | #include 37 | 38 | int main(int argc, char** argv) { 39 | printf("Hello World\n"); 40 | return 0; 41 | } 42 | )"; 43 | 44 | int main(int argc, char** argv) { 45 | 46 | std::string raw_text = some_text; 47 | raw_text.push_back(0); 48 | 49 | CLexer lexer; 50 | lexer.lex(utils::to_span(raw_text)); 51 | 52 | for (auto& l : lexer.tokens) { 53 | l.dump(); 54 | printf("\n"); 55 | } 56 | 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /examples/c_lexer/test.hancho: -------------------------------------------------------------------------------- 1 | c_lexer = hancho.load("build.hancho") 2 | 3 | hancho( 4 | hancho.base_rules.cpp_test, 5 | in_srcs = "c_lexer_test.cpp", 6 | in_libs = c_lexer.lib, 7 | out_bin = "c_lexer_test", 8 | ) 9 | 10 | hancho( 11 | hancho.base_rules.cpp_bin, 12 | in_srcs = "c_lexer_benchmark.cpp", 13 | in_libs = c_lexer.lib, 14 | out_bin = "c_lexer_benchmark", 15 | ) 16 | -------------------------------------------------------------------------------- /examples/c_parser/CContext.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "examples/c_parser/CContext.hpp" 5 | 6 | #include "examples/c_parser/c_parse_nodes.hpp" 7 | 8 | using namespace matcheroni; 9 | 10 | //------------------------------------------------------------------------------ 11 | 12 | CContext::CContext() { 13 | type_scope = new CScope(); 14 | tokens.reserve(65536); 15 | } 16 | 17 | //------------------------------------------------------------------------------ 18 | 19 | void CContext::reset() { 20 | NodeContext::reset(); 21 | 22 | tokens.clear(); 23 | while (type_scope->parent) pop_scope(); 24 | type_scope->clear(); 25 | } 26 | 27 | //------------------------------------------------------------------------------ 28 | 29 | bool CContext::parse(matcheroni::TextSpan text, TokenSpan lexemes) { 30 | this->text_span = text; 31 | this->lexemes = lexemes; 32 | 33 | for (auto t = lexemes.begin; t < lexemes.end; t++) { 34 | if (!t->is_gap()) { 35 | tokens.push_back(*t); 36 | } 37 | } 38 | 39 | // Skip over BOF, stop before EOF 40 | auto tok_a = tokens.data() + 1; 41 | auto tok_b = tokens.data() + tokens.size() - 1; 42 | TokenSpan body(tok_a, tok_b); 43 | 44 | auto tail = NodeTranslationUnit::match(*this, body); 45 | return tail.is_valid(); 46 | } 47 | 48 | /* 49 | bool CContext::parse(std::vector& lexemes) { 50 | 51 | for (auto& t : lexemes) { 52 | if (!t.is_gap()) { 53 | tokens.push_back(t); 54 | } 55 | } 56 | 57 | // Skip over BOF, stop before EOF 58 | auto tok_a = tokens.data() + 1; 59 | auto tok_b = tokens.data() + tokens.size() - 1; 60 | TokenSpan body(tok_a, tok_b); 61 | 62 | auto tail = NodeTranslationUnit::match(*this, body); 63 | return tail.is_valid(); 64 | } 65 | */ 66 | 67 | //------------------------------------------------------------------------------ 68 | 69 | TokenSpan CContext::match_class_type(TokenSpan body) { 70 | return type_scope->has_class_type(*this, body) ? body.advance(1) : body.fail(); 71 | } 72 | 73 | TokenSpan CContext::match_struct_type(TokenSpan body) { 74 | return type_scope->has_struct_type(*this, body) ? body.advance(1) : body.fail(); 75 | } 76 | 77 | TokenSpan CContext::match_union_type(TokenSpan body) { 78 | return type_scope->has_union_type(*this, body) ? body.advance(1) : body.fail(); 79 | } 80 | 81 | TokenSpan CContext::match_enum_type(TokenSpan body) { 82 | return type_scope->has_enum_type(*this, body) ? body.advance(1) : body.fail(); 83 | } 84 | 85 | TokenSpan CContext::match_typedef_type(TokenSpan body) { 86 | return type_scope->has_typedef_type(*this, body) ? body.advance(1) : body.fail(); 87 | } 88 | 89 | void CContext::add_class_type (const CToken* a) { type_scope->add_class_type(*this, a); } 90 | void CContext::add_struct_type (const CToken* a) { type_scope->add_struct_type(*this, a); } 91 | void CContext::add_union_type (const CToken* a) { type_scope->add_union_type(*this, a); } 92 | void CContext::add_enum_type (const CToken* a) { type_scope->add_enum_type(*this, a); } 93 | void CContext::add_typedef_type(const CToken* a) { type_scope->add_typedef_type(*this, a); } 94 | 95 | //---------------------------------------------------------------------------- 96 | 97 | void CContext::push_scope() { 98 | CScope* new_scope = new CScope(); 99 | new_scope->parent = type_scope; 100 | type_scope = new_scope; 101 | } 102 | 103 | void CContext::pop_scope() { 104 | CScope* old_scope = type_scope->parent; 105 | if (old_scope) { 106 | delete type_scope; 107 | type_scope = old_scope; 108 | } 109 | } 110 | 111 | //---------------------------------------------------------------------------- 112 | 113 | TokenSpan CContext::match_builtin_type_base(TokenSpan body) { 114 | if (!body.is_valid() || body.is_empty()) return body.fail(); 115 | if (SST::match(body.begin->text.begin, body.begin->text.end)) { 116 | return body.advance(1); 117 | } 118 | else { 119 | return body.fail(); 120 | } 121 | } 122 | 123 | TokenSpan CContext::match_builtin_type_prefix(TokenSpan body) { 124 | if (!body.is_valid() || body.is_empty()) return body.fail(); 125 | if (SST::match(body.begin->text.begin, body.begin->text.end)) { 126 | return body.advance(1); 127 | } 128 | else { 129 | return body.fail(); 130 | } 131 | } 132 | 133 | TokenSpan CContext::match_builtin_type_suffix(TokenSpan body) { 134 | if (!body.is_valid() || body.is_empty()) return body.fail(); 135 | if (SST::match(body.begin->text.begin, body.begin->text.end)) { 136 | return body.advance(1); 137 | } 138 | else { 139 | return body.fail(); 140 | } 141 | } 142 | 143 | //------------------------------------------------------------------------------ 144 | -------------------------------------------------------------------------------- /examples/c_parser/CContext.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #pragma once 5 | 6 | #include "matcheroni/Matcheroni.hpp" 7 | #include "matcheroni/Parseroni.hpp" 8 | #include "matcheroni/Utilities.hpp" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "examples/c_parser/c_constants.hpp" 16 | #include "examples/c_lexer/CToken.hpp" 17 | #include "examples/c_lexer/CLexer.hpp" 18 | #include "examples/c_parser/CNode.hpp" 19 | #include "examples/c_parser/CScope.hpp" 20 | #include "examples/SST.hpp" 21 | 22 | struct CToken; 23 | struct CNode; 24 | struct CContext; 25 | struct CScope; 26 | 27 | using TokenSpan = matcheroni::Span; 28 | 29 | //------------------------------------------------------------------------------ 30 | 31 | class CContext : public parseroni::NodeContext { 32 | public: 33 | 34 | using AtomType = CToken; 35 | using SpanType = matcheroni::Span; 36 | using NodeType = CNode; 37 | 38 | CContext(); 39 | 40 | static int atom_cmp(char a, int b) { 41 | return (unsigned char)a - b; 42 | } 43 | 44 | static int atom_cmp(const CToken& a, const LexemeType& b) { 45 | return a.type - b; 46 | } 47 | 48 | static int atom_cmp(const CToken& a, const char& b) { 49 | if (auto d = a.text.len() - 1) return d; 50 | return a.text.begin[0] - b; 51 | } 52 | 53 | static int atom_cmp(const CToken& a, const matcheroni::TextSpan& b) { 54 | return strcmp_span(a.text, b); 55 | } 56 | 57 | void reset(); 58 | //bool parse(std::vector& lexemes); 59 | bool parse(matcheroni::TextSpan text, TokenSpan lexemes); 60 | 61 | TokenSpan match_builtin_type_base (TokenSpan body); 62 | TokenSpan match_builtin_type_prefix(TokenSpan body); 63 | TokenSpan match_builtin_type_suffix(TokenSpan body); 64 | 65 | TokenSpan match_class_type (TokenSpan body); 66 | TokenSpan match_struct_type (TokenSpan body); 67 | TokenSpan match_union_type (TokenSpan body); 68 | TokenSpan match_enum_type (TokenSpan body); 69 | TokenSpan match_typedef_type(TokenSpan body); 70 | 71 | void add_class_type (const CToken* a); 72 | void add_struct_type (const CToken* a); 73 | void add_union_type (const CToken* a); 74 | void add_enum_type (const CToken* a); 75 | void add_typedef_type(const CToken* a); 76 | 77 | void push_scope(); 78 | void pop_scope(); 79 | 80 | void append_node(CNode* node); 81 | void enclose_nodes(CNode* start, CNode* node); 82 | 83 | void debug_dump(std::string& out) { 84 | for (auto node = top_head; node; node = node->node_next) { 85 | node->debug_dump(out); 86 | } 87 | } 88 | 89 | //---------------------------------------- 90 | 91 | matcheroni::TextSpan text_span; 92 | TokenSpan lexemes; 93 | 94 | std::vector tokens; 95 | CScope* type_scope; 96 | }; 97 | 98 | //------------------------------------------------------------------------------ 99 | -------------------------------------------------------------------------------- /examples/c_parser/CNode.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "examples/c_parser/CNode.hpp" 5 | 6 | using namespace matcheroni; 7 | -------------------------------------------------------------------------------- /examples/c_parser/CNode.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #pragma once 5 | 6 | #include // for size_t 7 | #include 8 | #include 9 | 10 | #include "matcheroni/Parseroni.hpp" 11 | #include "examples/c_lexer/CToken.hpp" 12 | 13 | typedef matcheroni::Span TokenSpan; 14 | 15 | //------------------------------------------------------------------------------ 16 | 17 | struct CNode : public parseroni::NodeBase { 18 | using AtomType = CToken; 19 | using SpanType = matcheroni::Span; 20 | 21 | matcheroni::TextSpan as_text_span() const { 22 | return matcheroni::TextSpan(span.begin->text.begin, (span.end - 1)->text.end); 23 | } 24 | 25 | void debug_dump(std::string& out) { 26 | out += "["; 27 | out += match_tag; 28 | out += ":"; 29 | if (child_head) { 30 | for (auto c = child_head; c; c = c->node_next) { 31 | c->debug_dump(out); 32 | } 33 | } 34 | else { 35 | out += '`'; 36 | out += std::string(span.begin->text.begin, (span.end - 1)->text.end); 37 | out += '`'; 38 | } 39 | out += "]"; 40 | } 41 | 42 | 43 | //---------------------------------------- 44 | 45 | /* 46 | template 47 | bool is_a() const { 48 | return typeid(*this) == typeid(P); 49 | } 50 | 51 | template 52 | P* child() { 53 | for (auto cursor = child_head; cursor; cursor = cursor->node_next) { 54 | if (cursor->is_a

    ()) { 55 | return dynamic_cast(cursor); 56 | } 57 | } 58 | return nullptr; 59 | } 60 | 61 | template 62 | const P* child() const { 63 | for (auto cursor = child_head; cursor; cursor = cursor->node_next) { 64 | if (cursor->is_a

    ()) { 65 | return dynamic_cast(cursor); 66 | } 67 | } 68 | return nullptr; 69 | } 70 | 71 | template 72 | P* as_a() { 73 | return dynamic_cast(this); 74 | } 75 | 76 | template 77 | const P* as_a() const { 78 | return dynamic_cast(this); 79 | } 80 | */ 81 | 82 | //---------------------------------------- 83 | 84 | int precedence = 0; 85 | 86 | // -2 = prefix, -1 = right-to-left, 0 = none, 1 = left-to-right, 2 = suffix 87 | int assoc = 0; 88 | }; 89 | 90 | //------------------------------------------------------------------------------ 91 | -------------------------------------------------------------------------------- /examples/c_parser/CScope.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "examples/c_parser/CScope.hpp" 5 | 6 | #include "examples/c_parser/c_constants.hpp" 7 | #include "examples/c_parser/CContext.hpp" 8 | #include "examples/c_lexer/CToken.hpp" 9 | 10 | #include 11 | 12 | using matcheroni::TextSpan; 13 | 14 | using std::string_view; 15 | 16 | void CScope::clear() { 17 | class_types.clear(); 18 | struct_types.clear(); 19 | union_types.clear(); 20 | enum_types.clear(); 21 | typedef_types.clear(); 22 | } 23 | 24 | bool CScope::has_type(CContext& ctx, TokenSpan body, token_list& types) { 25 | if(ctx.atom_cmp(*body.begin, LEX_IDENTIFIER)) { 26 | return false; 27 | } 28 | 29 | TextSpan span(body.begin->text.begin, body.begin->text.end); 30 | 31 | for (const auto& c : types) { 32 | if (strcmp_span(span, c) == 0) return true; 33 | } 34 | 35 | return false; 36 | } 37 | 38 | void CScope::add_type(CContext& ctx, const CToken* a, token_list& types) { 39 | matcheroni_assert(ctx.atom_cmp(*a, LEX_IDENTIFIER) == 0); 40 | 41 | TextSpan span(a->text.begin, a->text.end); 42 | 43 | for (const auto& c : types) { 44 | if (strcmp_span(span, c) == 0) return; 45 | } 46 | 47 | types.push_back(span); 48 | } 49 | 50 | //---------------------------------------- 51 | 52 | void CScope::add_typedef_type(const char* t) { 53 | TextSpan span = matcheroni::utils::to_span(t); 54 | 55 | for (const auto& c : typedef_types) { 56 | if (strcmp_span(span, c) == 0) return; 57 | } 58 | 59 | typedef_types.push_back(span); 60 | } 61 | 62 | //---------------------------------------- 63 | 64 | bool CScope::has_class_type (CContext& ctx, TokenSpan body) { if (has_type(ctx, body, class_types )) return true; if (parent) return parent->has_class_type (ctx, body); else return false; } 65 | bool CScope::has_struct_type (CContext& ctx, TokenSpan body) { if (has_type(ctx, body, struct_types )) return true; if (parent) return parent->has_struct_type (ctx, body); else return false; } 66 | bool CScope::has_union_type (CContext& ctx, TokenSpan body) { if (has_type(ctx, body, union_types )) return true; if (parent) return parent->has_union_type (ctx, body); else return false; } 67 | bool CScope::has_enum_type (CContext& ctx, TokenSpan body) { if (has_type(ctx, body, enum_types )) return true; if (parent) return parent->has_enum_type (ctx, body); else return false; } 68 | bool CScope::has_typedef_type(CContext& ctx, TokenSpan body) { if (has_type(ctx, body, typedef_types)) return true; if (parent) return parent->has_typedef_type(ctx, body); else return false; } 69 | 70 | void CScope::add_class_type (CContext& ctx, const CToken* a) { return add_type(ctx, a, class_types ); } 71 | void CScope::add_struct_type (CContext& ctx, const CToken* a) { return add_type(ctx, a, struct_types ); } 72 | void CScope::add_union_type (CContext& ctx, const CToken* a) { return add_type(ctx, a, union_types ); } 73 | void CScope::add_enum_type (CContext& ctx, const CToken* a) { return add_type(ctx, a, enum_types ); } 74 | void CScope::add_typedef_type(CContext& ctx, const CToken* a) { return add_type(ctx, a, typedef_types); } 75 | -------------------------------------------------------------------------------- /examples/c_parser/CScope.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #pragma once 5 | #include 6 | #include 7 | #include "matcheroni/Matcheroni.hpp" 8 | 9 | struct CToken; 10 | struct CContext; 11 | typedef matcheroni::Span TokenSpan; 12 | 13 | //------------------------------------------------------------------------------ 14 | 15 | struct CScope { 16 | 17 | //using token_list = std::vector; 18 | using token_list = std::vector; 19 | 20 | void clear(); 21 | bool has_type(CContext& ctx, TokenSpan body, token_list& types); 22 | void add_type(CContext& ctx, const CToken* a, token_list& types); 23 | 24 | bool has_class_type (CContext& ctx, TokenSpan body); 25 | bool has_struct_type (CContext& ctx, TokenSpan body); 26 | bool has_union_type (CContext& ctx, TokenSpan body); 27 | bool has_enum_type (CContext& ctx, TokenSpan body); 28 | bool has_typedef_type(CContext& ctx, TokenSpan body); 29 | 30 | void add_class_type (CContext& ctx, const CToken* a); 31 | void add_struct_type (CContext& ctx, const CToken* a); 32 | void add_union_type (CContext& ctx, const CToken* a); 33 | void add_enum_type (CContext& ctx, const CToken* a); 34 | void add_typedef_type(CContext& ctx, const CToken* a); 35 | 36 | void add_typedef_type(const char* t); 37 | 38 | CScope* parent; 39 | token_list class_types; 40 | token_list struct_types; 41 | token_list union_types; 42 | token_list enum_types; 43 | token_list typedef_types; 44 | }; 45 | 46 | //------------------------------------------------------------------------------ 47 | -------------------------------------------------------------------------------- /examples/c_parser/build.hancho: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------- 2 | # C parser example (not finished) 3 | 4 | lib = hancho( 5 | hancho.base_rules.cpp_lib, 6 | in_srcs=["CContext.cpp", "CNode.cpp", "CScope.cpp"], 7 | out_lib="c_parser.a", 8 | ) 9 | -------------------------------------------------------------------------------- /examples/c_parser/test.hancho: -------------------------------------------------------------------------------- 1 | 2 | 3 | hancho( 4 | hancho.base_rules.cpp_bin, 5 | in_srcs = "c_parser_benchmark.cpp", 6 | in_libs = [hancho.c_lexer.lib, lib], 7 | out_bin = "c_parser_benchmark", 8 | ) 9 | 10 | # Broken? 11 | #rules.c_test( 12 | # "c_parser_test.cpp", 13 | # "c_parser_test", 14 | # libs = [c_lexer.c_lexer_lib, c_parser_lib], 15 | #) 16 | -------------------------------------------------------------------------------- /examples/ini/build.hancho: -------------------------------------------------------------------------------- 1 | lib = hancho( 2 | hancho.base_rules.cpp_lib, 3 | in_srcs = "ini_parser.cpp", 4 | out_lib = "ini_parser.a", 5 | ) 6 | 7 | -------------------------------------------------------------------------------- /examples/ini/ini_parser.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Utilities.hpp" 3 | 4 | using namespace matcheroni; 5 | 6 | using ws = Any>; 7 | using digit = Range<'0', '9'>; 8 | using alpha = Ranges<'a', 'z', 'A', 'Z', '_', '_'>; 9 | 10 | using ident = Seq>; 11 | using token = Any>; 12 | using value = Seq>>; 13 | 14 | using section = Seq, ws, ident, ws, Atom<']'>, ws, EOL>; 15 | using keyval = Seq { 26 | static int atom_cmp(char a, int b) { return (unsigned char)a - b; } 27 | }; 28 | 29 | matcheroni::TextSpan parse_json(JsonParseContext& ctx, matcheroni::TextSpan body); 30 | -------------------------------------------------------------------------------- /examples/json/json_conformance.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // This file uses JSON conformance tests from 3 | // https://github.com/nst/JSONTestSuite to verify that the JSON parser conforms 4 | // with the http://JSON.org spec (except for the two that blow up the stack of 5 | // a recursive parser :D ) 6 | 7 | // SPDX-FileCopyrightText: 2023 Austin Appleby 8 | // SPDX-License-Identifier: MIT License 9 | 10 | #include "json.hpp" 11 | #include "matcheroni/Utilities.hpp" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | using namespace matcheroni; 20 | 21 | //------------------------------------------------------------------------------ 22 | 23 | int main(int argc, char** argv) { 24 | auto base_path = "data/conformance"; 25 | 26 | std::vector tests_good; 27 | std::vector tests_bad; 28 | std::vector tests_other; 29 | 30 | printf("Scanning source files in %s\n", base_path); 31 | using rdit = std::filesystem::recursive_directory_iterator; 32 | for (const auto& f : rdit(base_path)) { 33 | if (!f.is_regular_file()) continue; 34 | auto path = f.path().native(); 35 | auto name = f.path().filename().native(); 36 | 37 | if (name.starts_with("i_") && name.ends_with(".json")) { 38 | tests_other.push_back(path); 39 | } 40 | 41 | if (name.starts_with("y_") && name.ends_with(".json")) { 42 | tests_good.push_back(path); 43 | } 44 | 45 | if (name.starts_with("n_") && name.ends_with(".json")) { 46 | tests_bad.push_back(path); 47 | } 48 | } 49 | 50 | double time; 51 | 52 | int y_pass = 0; 53 | int y_fail = 0; 54 | 55 | printf("tests_good... "); 56 | time = 0; 57 | for (auto path : tests_good) { 58 | std::string raw_text; 59 | utils::read(path.c_str(), raw_text); 60 | 61 | TextMatchContext ctx2; 62 | JsonParseContext ctx; 63 | TextSpan text = utils::to_span(raw_text); 64 | time -= utils::timestamp_ms(); 65 | TextSpan tail = parse_json(ctx, text); 66 | time += utils::timestamp_ms(); 67 | 68 | if (tail.is_valid() && tail.begin == text.end) { 69 | y_pass++; 70 | } 71 | else { 72 | y_fail++; 73 | printf("\n"); 74 | printf("FAIL %s\n", path.c_str()); 75 | } 76 | } 77 | printf("%f msec\n", time); 78 | 79 | int n_pass = 0; 80 | int n_fail = 0; 81 | int skipped = 0; 82 | 83 | printf("tests_bad... "); 84 | time = 0; 85 | for (auto path : tests_bad) { 86 | // We are a recursive descent parser, these blow our call stack 87 | if (path == "data/conformance/n_structure_open_array_object.json") { 88 | skipped++; 89 | continue; 90 | } 91 | if (path == "data/conformance/n_structure_100000_opening_arrays.json") { 92 | skipped++; 93 | continue; 94 | } 95 | 96 | std::string raw_text; 97 | utils::read(path.c_str(), raw_text); 98 | 99 | JsonParseContext ctx; 100 | TextSpan text = utils::to_span(raw_text); 101 | time -= utils::timestamp_ms(); 102 | TextSpan tail = parse_json(ctx, text); 103 | time += utils::timestamp_ms(); 104 | 105 | if (tail.is_valid() && tail.begin == text.end) { 106 | n_fail++; 107 | printf("FAIL %s\n", path.c_str()); 108 | } 109 | else { 110 | n_pass++; 111 | } 112 | } 113 | printf("%f msec\n", time); 114 | 115 | int i_pass = 0; 116 | int i_fail = 0; 117 | 118 | printf("tests_other... "); 119 | time = 0; 120 | for (auto path : tests_other) { 121 | std::string raw_text; 122 | utils::read(path.c_str(), raw_text); 123 | 124 | JsonParseContext ctx; 125 | TextSpan text = utils::to_span(raw_text); 126 | time -= utils::timestamp_ms(); 127 | TextSpan tail = parse_json(ctx, text); 128 | time += utils::timestamp_ms(); 129 | 130 | if (tail.is_valid() && tail.begin == text.end) { 131 | i_pass++; 132 | } 133 | else { 134 | i_fail++; 135 | } 136 | } 137 | printf("%f msec\n", time); 138 | 139 | printf("Known good pass %d\n", y_pass); 140 | printf("Known good fail %d\n", y_fail); 141 | printf("Known bad pass %d\n", n_pass); 142 | printf("Known bad fail %d\n", n_fail); 143 | printf("Other pass %d\n", i_pass); 144 | printf("Other fail %d\n", i_fail); 145 | printf("Skipped %d\n", skipped); 146 | 147 | return (y_fail || n_fail) ? -1 : 0; 148 | } 149 | 150 | //------------------------------------------------------------------------------ 151 | -------------------------------------------------------------------------------- /examples/json/json_demo.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // This file is a full working example of using Matcheroni to build a JSON 3 | // parser. 4 | 5 | // Example usage: 6 | // bin/json_parser test_file.json 7 | 8 | // SPDX-FileCopyrightText: 2023 Austin Appleby 9 | // SPDX-License-Identifier: MIT License 10 | 11 | #include "json.hpp" 12 | #include "matcheroni/Utilities.hpp" 13 | 14 | #include 15 | 16 | using namespace matcheroni; 17 | using namespace parseroni; 18 | 19 | //------------------------------------------------------------------------------ 20 | 21 | int main(int argc, char** argv) { 22 | 23 | if (argc < 2) { 24 | printf("Usage: json_demo \n"); 25 | return 1; 26 | } 27 | 28 | printf("Parsing %s\n", argv[1]); 29 | printf("\n"); 30 | 31 | char* buf = nullptr; 32 | size_t size = 0; 33 | utils::read(argv[1], buf, size); 34 | if (size == 0) { 35 | printf("Could not load %s\n", argv[1]); 36 | exit(-1); 37 | } 38 | 39 | 40 | //---------------------------------------- 41 | 42 | JsonParseContext ctx; 43 | double time = -utils::timestamp_ms(); 44 | TextSpan text = {buf, buf + size}; 45 | TextSpan parse_end = parse_json(ctx, text); 46 | time += utils::timestamp_ms(); 47 | 48 | if (parse_end.begin < text.end) { 49 | printf("Parse failed!\n"); 50 | printf("Failure near `"); 51 | printf("`\n"); 52 | exit(-1); 53 | } 54 | 55 | //---------------------------------------- 56 | 57 | utils::print_trees(ctx, text, 40, 0); 58 | printf("\n"); 59 | 60 | printf("Size %ld bytes\n", size); 61 | printf("Time %f msec\n", time); 62 | printf("Rate %f mb/sec\n", (size / 1000000.0) / (time / 1000.0)); 63 | printf("\n"); 64 | 65 | delete [] buf; 66 | return 0; 67 | } 68 | 69 | //------------------------------------------------------------------------------ 70 | -------------------------------------------------------------------------------- /examples/json/json_matcher.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "json.hpp" 5 | 6 | using namespace matcheroni; 7 | using namespace parseroni; 8 | 9 | using sign = Atoms<'+', '-'>; 10 | using digit = Range<'0', '9'>; 11 | using onenine = Range<'1', '9'>; 12 | using digits = Some; 13 | using integer = Seq>, Oneof, digit>>; 14 | using fraction = Seq, digits>; 15 | using exponent = Seq, Opt, digits>; 16 | using number = Seq, Opt>; 17 | 18 | using ws = Any>; 19 | using hex = Ranges<'0','9','a','f','A','F'>; 20 | using escape = Oneof, Seq, Rep<4, hex>>>; 21 | using character = Oneof< 22 | Seq>, Not>, Range<0x0020, 0x10FFFF>>, 23 | Seq, escape> 24 | >; 25 | using string = Seq, Any, Atom<'"'>>; 26 | 27 | template 28 | using list = Seq, ws, P>>>; 29 | 30 | static TextSpan match_value(JsonMatchContext& ctx, TextSpan body); 31 | using value = Ref; 32 | using array = Seq, ws, Opt>, ws, Atom<']'>>; 33 | using key = string; 34 | using member = Seq, ws, value>; 35 | using object = Seq, ws, Opt>, ws, Atom<'}'>>; 36 | 37 | static TextSpan match_value(JsonMatchContext& ctx, TextSpan body) { 38 | using value = Oneof, Lit<"false">, Lit<"null">>; 39 | return value::match(ctx, body); 40 | } 41 | 42 | using json = Seq; 43 | 44 | TextSpan match_json(JsonMatchContext& ctx, TextSpan body) { 45 | return json::match(ctx, body); 46 | } 47 | -------------------------------------------------------------------------------- /examples/json/json_parser.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "json.hpp" 5 | 6 | using namespace matcheroni; 7 | using namespace parseroni; 8 | 9 | using sign = Atoms<'+', '-'>; 10 | using digit = Range<'0', '9'>; 11 | using onenine = Range<'1', '9'>; 12 | using digits = Some; 13 | using integer = Seq>, Oneof, digit>>; 14 | using fraction = Seq, digits>; 15 | using exponent = Seq, Opt, digits>; 16 | using number = Seq, Opt>; 17 | 18 | using ws = Any>; 19 | using hex = Ranges<'0','9','a','f','A','F'>; 20 | using escape = Oneof, Seq, Rep<4, hex>>>; 21 | using character = Oneof< 22 | Seq>, Not>, Range<0x0020, 0x10FFFF>>, 23 | Seq, escape> 24 | >; 25 | using string = Seq, Any, Atom<'"'>>; 26 | 27 | template 28 | using list = Seq, ws, P>>>; 29 | 30 | TextSpan match_value(JsonParseContext& ctx, TextSpan body); 31 | using value = Ref; 32 | using array = Seq, ws, Opt>, ws, Atom<']'>>; 33 | using key = Capture<"key", string, JsonString>; 34 | using member = Capture<"member", Seq, ws, value>, JsonKeyVal>; 35 | using object = Seq, ws, Opt>, ws, Atom<'}'>>; 36 | 37 | TextSpan match_value(JsonParseContext& ctx, TextSpan body) { 38 | using value = Oneof< 39 | Capture<"val", string, JsonString>, 40 | Capture<"val", number, JsonNumber>, 41 | Capture<"val", array, JsonArray>, 42 | Capture<"val", object, JsonObject>, 43 | Capture<"val", Lit<"true">, JsonKeyword>, 44 | Capture<"val", Lit<"false">, JsonKeyword>, 45 | Capture<"val", Lit<"null">, JsonKeyword> 46 | >; 47 | return value::match(ctx, body); 48 | } 49 | 50 | using json = Seq; 51 | 52 | TextSpan parse_json(JsonParseContext& ctx, TextSpan body) { 53 | return json::match(ctx, body); 54 | } 55 | -------------------------------------------------------------------------------- /examples/json/json_test.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // SPDX-FileCopyrightText: 2023 Austin Appleby 3 | // SPDX-License-Identifier: MIT License 4 | 5 | #include "json.hpp" 6 | #include "matcheroni/Utilities.hpp" 7 | 8 | #include 9 | 10 | using namespace matcheroni; 11 | 12 | //------------------------------------------------------------------------------ 13 | 14 | const char* json = R"( 15 | { 16 | "asdf" : "slkjdfsldkj" 17 | } 18 | )"; 19 | 20 | int main(int argc, char** argv) { 21 | /* 22 | if (argc < 2) { 23 | printf("Usage: toml_test \n"); 24 | return 1; 25 | } 26 | 27 | printf("argv[0] = %s\n", argv[0]); 28 | printf("argv[1] = %s\n", argv[1]); 29 | 30 | std::string buf; 31 | read(argv[1], buf); 32 | 33 | TextSpan text = to_span(buf); 34 | */ 35 | 36 | TextSpan text = utils::to_span(json); 37 | JsonParseContext ctx; 38 | 39 | double time_a, time_b; 40 | TextSpan tail; 41 | 42 | for (int rep = 0; rep < 100; rep++) { 43 | ctx.reset(); 44 | time_a = utils::timestamp_ms(); 45 | tail = parse_json(ctx, text); 46 | time_b = utils::timestamp_ms(); 47 | } 48 | 49 | printf("Parsing json took %f msec\n", time_b - time_a); 50 | utils::print_summary(ctx, text, tail, 50); 51 | 52 | return 0; 53 | } 54 | 55 | //------------------------------------------------------------------------------ 56 | -------------------------------------------------------------------------------- /examples/json/test.hancho: -------------------------------------------------------------------------------- 1 | json = hancho.load("build.hancho") 2 | 3 | hancho( 4 | hancho.base_rules.cpp_bin, 5 | in_srcs = "json_conformance.cpp", 6 | in_libs = json.lib, 7 | out_bin = "json_conformance", 8 | ) 9 | 10 | hancho( 11 | hancho.base_rules.cpp_bin, 12 | in_srcs = "json_benchmark.cpp", 13 | in_libs = json.lib, 14 | out_bin = "json_benchmark", 15 | ) 16 | 17 | hancho( 18 | hancho.base_rules.cpp_bin, 19 | in_srcs = "json_demo.cpp", 20 | in_libs = json.lib, 21 | out_bin = "json_demo", 22 | ) 23 | 24 | hancho( 25 | hancho.base_rules.cpp_test, 26 | in_srcs = "json_test.cpp", 27 | in_libs = json.lib, 28 | out_bin = "json_test", 29 | ) 30 | -------------------------------------------------------------------------------- /examples/live_demo/bench1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p bin/opt 3 | set -v 4 | g++ -std=c++20 -O3 -I.. -c live_bench.cpp -o bin/opt/live_bench.o 5 | g++ -std=c++20 -O3 -I.. -c live_parser1.cpp -o bin/opt/live_parser1.o 6 | g++ bin/opt/live_bench.o bin/opt/live_parser1.o -o bin/opt/live_bench1 7 | 8 | bin/opt/live_bench1 9 | 10 | size bin/opt/live_parser1.o 11 | -------------------------------------------------------------------------------- /examples/live_demo/bench2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p bin/opt 3 | set -v 4 | g++ -std=c++20 -O3 -I.. -c live_bench.cpp -o bin/opt/live_bench.o 5 | g++ -std=c++20 -O3 -I.. -c live_parser2.cpp -o bin/opt/live_parser2.o 6 | g++ bin/opt/live_bench.o bin/opt/live_parser2.o -o bin/opt/live_bench2 7 | 8 | bin/opt/live_bench2 9 | 10 | size bin/opt/live_parser2.o 11 | -------------------------------------------------------------------------------- /examples/live_demo/bench3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p bin/opt 3 | set -v 4 | g++ -std=c++20 -O3 -I.. -c live_bench.cpp -o bin/opt/live_bench.o 5 | g++ -std=c++20 -O3 -I.. -c live_parser3.cpp -o bin/opt/live_parser3.o 6 | g++ bin/opt/live_bench.o bin/opt/live_parser3.o -o bin/opt/live_bench3 7 | 8 | bin/opt/live_bench3 9 | 10 | size bin/opt/live_parser3.o 11 | -------------------------------------------------------------------------------- /examples/live_demo/data.json: -------------------------------------------------------------------------------- 1 | { "nested" : { "some_key" : "some_value" }, "foo" : "bar", "baz" : [1, 2, 3] } 2 | -------------------------------------------------------------------------------- /examples/live_demo/live_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "matcheroni/Utilities.hpp" 7 | 8 | using namespace matcheroni; 9 | 10 | bool parse_json(const char* text, int size); 11 | 12 | //------------------------------------------------------------------------------ 13 | 14 | void benchmark(const char* path) { 15 | int len = strlen(path); 16 | 17 | printf("Benchmarking %s", path); 18 | fflush(stdout); 19 | 20 | std::string buf; 21 | utils::read(path, buf); 22 | if (buf.size() == 0) { 23 | printf("Could not load %s\n", path); 24 | exit(-1); 25 | } 26 | 27 | //---------------------------------------- 28 | 29 | const int warmup = 20; 30 | const int reps = 100; 31 | std::vector parse_times; 32 | parse_times.reserve(reps); 33 | for (int rep = 0; rep < warmup + reps; rep++) { 34 | if (rep % 10 == 0) { printf("."); fflush(stdout); } 35 | auto time_a = utils::timestamp_ms(); 36 | auto result = parse_json(buf.data(), buf.size()); 37 | auto time_b = utils::timestamp_ms(); 38 | if (!result) { 39 | printf("Parse FAIL!\n"); 40 | exit(-1); 41 | } 42 | 43 | if (rep >= warmup) parse_times.push_back(time_b - time_a); 44 | } 45 | 46 | //---------------------------------------- 47 | 48 | std::sort(parse_times.begin(), parse_times.end()); 49 | auto parse_time = parse_times[parse_times.size()/2]; 50 | 51 | for (int i = 0; i < 22 - len; i++) printf(" "); 52 | 53 | printf("Rate %f MB/sec\n", (buf.size() / 1e6) / (parse_time / 1e3)); 54 | } 55 | 56 | //------------------------------------------------------------------------------ 57 | 58 | int main(int argc, char** argv) { 59 | 60 | benchmark("canada.json"); 61 | benchmark("rapidjson_sample.json"); 62 | benchmark("twitter.json"); 63 | benchmark("citm_catalog.json"); 64 | 65 | return 0; 66 | } 67 | 68 | //------------------------------------------------------------------------------ 69 | -------------------------------------------------------------------------------- /examples/live_demo/live_demo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "matcheroni/Utilities.hpp" 7 | 8 | using namespace matcheroni; 9 | 10 | bool parse_json(const char* text, int size); 11 | 12 | //------------------------------------------------------------------------------ 13 | 14 | int main(int argc, char** argv) { 15 | 16 | const char* path = "data.json"; 17 | 18 | printf("Parsing %s: ", path); 19 | 20 | std::string buf; 21 | utils::read(path, buf); 22 | if (buf.size() == 0) { 23 | printf("Could not load %s\n", path); 24 | exit(-1); 25 | } 26 | 27 | printf("%s\n", buf.data()); 28 | 29 | auto result = parse_json(buf.data(), buf.size()); 30 | printf(result ? "Parse OK!\n" : "Parse fail!\n"); 31 | 32 | return 0; 33 | } 34 | 35 | //------------------------------------------------------------------------------ 36 | -------------------------------------------------------------------------------- /examples/live_demo/live_parser0.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | #include 4 | 5 | using namespace matcheroni; 6 | using namespace parseroni; 7 | 8 | using JsonContext = matcheroni::TextMatchContext; 9 | static JsonContext ctx; 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | //------------------------------------------------------------------------------ 34 | // Numbers 35 | 36 | using sign = Atoms<'+', '-'>; 37 | using digit = Range<'0', '9'>; 38 | using onenine = Range<'1', '9'>; 39 | using digits = Some; 40 | using integer = Seq>, Oneof, digit>>; 41 | using fraction = Seq, digits>; 42 | using exponent = Seq, Opt, digits>; 43 | using number = Seq, Opt>; 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | //------------------------------------------------------------------------------ 69 | // Strings 70 | 71 | using ws = Any>; 72 | using hex = Ranges<'0','9','a','f','A','F'>; 73 | 74 | using escape = Seq< 75 | Atom<'\\'>, 76 | Oneof< 77 | Charset<"\"\\/bfnrt">, 78 | Seq, Rep<4, hex>> 79 | >>; 80 | 81 | using character = Oneof< 82 | Seq< 83 | Not>, 84 | Not>, 85 | Range<0x0020, 0x10FFFF> 86 | >, 87 | escape>; 88 | 89 | using string = Seq, Any, Atom<'"'>>; 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | //------------------------------------------------------------------------------ 116 | // Arrays 117 | 118 | TextSpan match_value(JsonContext& ctx, TextSpan body); 119 | using value = Ref; 120 | 121 | template 122 | using list = 123 | Seq< 124 | pattern, 125 | Any< 126 | Seq, ws, pattern> 127 | > 128 | >; 129 | 130 | using array = 131 | Seq< 132 | Atom<'['>, 133 | ws, 134 | Opt>, 135 | ws, 136 | Atom<']'> 137 | >; 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | //------------------------------------------------------------------------------ 164 | // Objects 165 | 166 | using key = string; 167 | using field = Seq, ws, value>; 168 | using object = Seq, ws, Opt>, ws, Atom<'}'>>; 169 | using json = Seq; 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | TextSpan match_value(JsonContext& ctx, TextSpan body) { 197 | using value = 198 | Oneof< 199 | number, 200 | string, 201 | array, 202 | object, 203 | Lit<"true">, 204 | Lit<"false">, 205 | Lit<"null"> 206 | >; 207 | return value::match(ctx, body); 208 | } 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | //------------------------------------------------------------------------------ 235 | // Parser 236 | 237 | bool parse_json(const std::string& text, bool verbose) { 238 | TextSpan body(text.data(), text.data() + text.size()); 239 | 240 | // The actual parsing code 241 | auto result = json::match(ctx, body); 242 | 243 | // That's it, we're done. 244 | return result.is_valid(); 245 | } 246 | 247 | //------------------------------------------------------------------------------ 248 | -------------------------------------------------------------------------------- /examples/live_demo/live_parser1.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | 4 | using namespace matcheroni; 5 | using namespace parseroni; 6 | 7 | using JsonContext = matcheroni::TextMatchContext; 8 | 9 | //------------------------------------------------------------------------------ 10 | // Numbers 11 | 12 | using sign = Atoms<'+', '-'>; 13 | using digit = Range<'0', '9'>; 14 | using onenine = Range<'1', '9'>; 15 | using digits = Some; 16 | using integer = Seq>, Oneof, digit>>; 17 | using fraction = Seq, digits>; 18 | using exponent = Seq, Opt, digits>; 19 | using number = Seq, Opt>; 20 | 21 | //------------------------------------------------------------------------------ 22 | // Strings 23 | 24 | using ws = Any>; 25 | using hex = Ranges<'0','9','a','f','A','F'>; 26 | using escape = Seq, Oneof, Seq, Rep<4, hex>>>>; 27 | using character = Oneof>, Not>, Range<0x0020, 0x10FFFF>>, escape>; 28 | using string = Seq, Any, Atom<'"'>>; 29 | 30 | //------------------------------------------------------------------------------ 31 | // Arrays 32 | 33 | TextSpan match_value(JsonContext& ctx, TextSpan body); 34 | 35 | template 36 | using list = Seq, ws, pattern>>>; 37 | using value = Ref; 38 | using array = Seq, ws, Opt>, ws, Atom<']'>>; 39 | 40 | //------------------------------------------------------------------------------ 41 | // Objects 42 | 43 | using key = string; 44 | using field = Seq, ws, value>; 45 | using object = Seq, ws, Opt>, ws, Atom<'}'>>; 46 | using json = Seq; 47 | 48 | TextSpan match_value(JsonContext& ctx, TextSpan body) { 49 | using value = Oneof, Lit<"false">, Lit<"null">>; 50 | return value::match(ctx, body); 51 | } 52 | 53 | //------------------------------------------------------------------------------ 54 | // Parser 55 | 56 | bool parse_json(const char* text, int size) { 57 | static JsonContext ctx; 58 | TextSpan body(text, text + size); 59 | 60 | auto result = json::match(ctx, body); 61 | 62 | return result.is_valid(); 63 | } 64 | 65 | //------------------------------------------------------------------------------ 66 | -------------------------------------------------------------------------------- /examples/live_demo/live_parser2.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | 4 | #ifdef DEBUG 5 | #include "matcheroni/Utilities.hpp" 6 | #endif 7 | 8 | using namespace matcheroni; 9 | using namespace parseroni; 10 | 11 | using JsonContext = parseroni::TextParseContext; 12 | using JsonNode = parseroni::TextParseNode; 13 | 14 | //------------------------------------------------------------------------------ 15 | // Numbers 16 | 17 | using sign = Atoms<'+', '-'>; 18 | using digit = Range<'0', '9'>; 19 | using onenine = Range<'1', '9'>; 20 | using digits = Some; 21 | using integer = Seq>, Oneof, digit>>; 22 | using fraction = Seq, digits>; 23 | using exponent = Seq, Opt, digits>; 24 | using number = Seq, Opt>; 25 | 26 | //------------------------------------------------------------------------------ 27 | // Strings 28 | 29 | using ws = Any>; 30 | using hex = Ranges<'0','9','a','f','A','F'>; 31 | using escape = Seq, Oneof, Seq, Rep<4, hex>>>>; 32 | using character = Oneof>, Not>, Range<0x0020, 0x10FFFF>>, escape>; 33 | using string = Seq, Any, Atom<'"'>>; 34 | 35 | //------------------------------------------------------------------------------ 36 | // Arrays 37 | 38 | TextSpan match_value(JsonContext& ctx, TextSpan body); 39 | 40 | template 41 | using list = Seq, ws, pattern>>>; 42 | using value = Ref; 43 | using array = Seq, ws, Opt>, ws, Atom<']'>>; 44 | 45 | //------------------------------------------------------------------------------ 46 | // Objects 47 | 48 | using key = Capture<"key", string, JsonNode>; 49 | using field = Capture<"field", Seq, ws, value>, JsonNode>; 50 | using object = Seq, ws, Opt>, ws, Atom<'}'>>; 51 | using json = Seq; 52 | 53 | TextSpan match_value(JsonContext& ctx, TextSpan body) { 54 | using value = 55 | Capture< 56 | "val", 57 | Oneof, Lit<"false">, Lit<"null">>, 58 | JsonNode 59 | >; 60 | return value::match(ctx, body); 61 | } 62 | 63 | //------------------------------------------------------------------------------ 64 | // Parser 65 | 66 | bool parse_json(const char* text, int size) { 67 | static JsonContext ctx; 68 | TextSpan body(text, text + size); 69 | 70 | auto result = json::match(ctx, body); 71 | 72 | #ifdef DEBUG 73 | utils::print_trees(ctx, body, 40, 0); 74 | #endif 75 | 76 | return result.is_valid(); 77 | } 78 | 79 | //------------------------------------------------------------------------------ 80 | -------------------------------------------------------------------------------- /examples/live_demo/live_parser3.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | 4 | #ifdef DEBUG 5 | #include "matcheroni/Utilities.hpp" 6 | #endif 7 | 8 | using namespace matcheroni; 9 | using namespace parseroni; 10 | 11 | //------------------------------------------------------------------------------ 12 | // Custom node types 13 | 14 | struct JsonNode : public parseroni::NodeBase { 15 | virtual ~JsonNode() {} 16 | matcheroni::TextSpan as_text_span() const { return span; } 17 | }; 18 | 19 | struct JsonNumber : public JsonNode {}; 20 | struct JsonString : public JsonNode {}; 21 | struct JsonArray : public JsonNode {}; 22 | struct JsonKeyVal : public JsonNode {}; 23 | struct JsonObject : public JsonNode {}; 24 | struct JsonKeyword : public JsonNode {}; 25 | 26 | struct JsonContext : public parseroni::NodeContext { 27 | static int atom_cmp(char a, int b) { return (unsigned char)a - b; } 28 | }; 29 | 30 | //------------------------------------------------------------------------------ 31 | // Numbers 32 | 33 | using sign = Atoms<'+', '-'>; 34 | using digit = Range<'0', '9'>; 35 | using onenine = Range<'1', '9'>; 36 | using digits = Some; 37 | using integer = Seq>, Oneof, digit>>; 38 | using fraction = Seq, digits>; 39 | using exponent = Seq, Opt, digits>; 40 | using number = Seq, Opt>; 41 | 42 | //------------------------------------------------------------------------------ 43 | // Strings 44 | 45 | using ws = Any>; 46 | using hex = Ranges<'0','9','a','f','A','F'>; 47 | using escape = Seq, Oneof, Seq, Rep<4, hex>>>>; 48 | using character = Oneof>, Not>, Range<0x0020, 0x10FFFF>>, escape>; 49 | using string = Seq, Any, Atom<'"'>>; 50 | 51 | //------------------------------------------------------------------------------ 52 | // Arrays 53 | 54 | TextSpan match_value(JsonContext& ctx, TextSpan body); 55 | 56 | template 57 | using list = Seq, ws, pattern>>>; 58 | using value = Ref; 59 | using array = Seq, ws, Opt>, ws, Atom<']'>>; 60 | 61 | //------------------------------------------------------------------------------ 62 | // Objects 63 | 64 | using key = Capture<"key", string, JsonString>; 65 | using field = Capture<"field", Seq, ws, value>, JsonKeyVal>; 66 | using object = Seq, ws, Opt>, ws, Atom<'}'>>; 67 | using json = Seq; 68 | 69 | TextSpan match_value(JsonContext& ctx, TextSpan body) { 70 | using value = 71 | Oneof< 72 | Capture<"val", number, JsonNumber>, 73 | Capture<"val", string, JsonString>, 74 | Capture<"val", array, JsonArray>, 75 | Capture<"val", object, JsonObject>, 76 | Capture<"val", Lit<"true">, JsonKeyword>, 77 | Capture<"val", Lit<"false">, JsonKeyword>, 78 | Capture<"val", Lit<"null">, JsonKeyword> 79 | >; 80 | return value::match(ctx, body); 81 | } 82 | 83 | //------------------------------------------------------------------------------ 84 | // Parser 85 | 86 | bool parse_json(const char* text, int size) { 87 | static JsonContext ctx; 88 | TextSpan body(text, text + size); 89 | 90 | auto result = json::match(ctx, body); 91 | 92 | #ifdef DEBUG 93 | utils::print_trees(ctx, body, 40, 0); 94 | #endif 95 | 96 | return result.is_valid(); 97 | } 98 | 99 | //------------------------------------------------------------------------------ 100 | -------------------------------------------------------------------------------- /examples/live_demo/notes/benchmark: -------------------------------------------------------------------------------- 1 | (With -flto) 2 | Parsing data/rapidjson_sample.json 3 | 4 | Tree nodes 5672 5 | Byte total 687491.000000 6 | Line total 3314.000000 7 | Match time 0.513930 8 | Parse time 0.684599 9 | Match byte rate 1337.713307 megabytes per second 10 | Match line rate 6.448349 megalines per second 11 | Parse byte rate 1004.224371 megabytes per second 12 | Parse line rate 4.840790 megalines per second 13 | 14 | 15 | (No -flto) 16 | Parsing data/rapidjson_sample.json 17 | 18 | Tree nodes 5672 19 | Byte total 687491.000000 20 | Line total 3314.000000 21 | Match time 0.590587 22 | Parse time 0.693285 23 | Match byte rate 1164.080821 megabytes per second 24 | Match line rate 5.611366 megalines per second 25 | Parse byte rate 991.642687 megabytes per second 26 | Parse line rate 4.780141 megalines per second 27 | -------------------------------------------------------------------------------- /examples/live_demo/notes/grammar.txt: -------------------------------------------------------------------------------- 1 | onenine := '1'...'9' 2 | digit := '0' | onenine 3 | digits := digit | digit digits 4 | fraction := "" | '.' digits 5 | sign := "" | '+' | '-' 6 | exponent := "" | 'E' sign digits | 'e' sign digits 7 | integer := digit | onenine digits | '-' digit | '-' onenine digits 8 | number := integer fraction exponent 9 | 10 | hex := digit | 'A'...'F' | 'a'...'f' 11 | escape := '"' | '\' | '/' | 'b' | 'f' | 'n' | 'r' | 't' | 'u' hex hex hex hex 12 | character := '0020' . '10FFFF' - '"' - '\' | '\' escape 13 | characters := "" | character characters 14 | string := '"' characters '"' 15 | 16 | ws := "" | '0020' ws | '000A' ws | '000D' ws | '0009' ws 17 | 18 | value := object | array | string | number | "true" | "false" | "null" 19 | object := '{' ws '}' | '{' members '}' 20 | members := member | member ',' members 21 | member := ws string ws ':' element 22 | array := '[' ws ']' | '[' elements ']' 23 | 24 | element := ws value ws 25 | elements := element | element ',' elements 26 | 27 | json := element 28 | -------------------------------------------------------------------------------- /examples/live_demo/notes/valgrind: -------------------------------------------------------------------------------- 1 | ==764411== Memcheck, a memory error detector 2 | ==764411== Copyright (C) 2002-2022, and GNU GPL'd, by Julian Seward et al. 3 | ==764411== Using Valgrind-3.19.0 and LibVEX; rerun with -h for copyright info 4 | ==764411== Command: bin/examples/json/json_benchmark 5 | ==764411== 6 | Matcheroni JSON matching/parsing benchmark 7 | ---------------------------------------- 8 | Parsing data/canada.json 9 | 10 | Tree nodes 167203 11 | Byte total 2251051.000000 12 | Line total 9.000000 13 | Match time 63.056414 14 | Parse time 80.973685 15 | Match byte rate 35.699001 megabytes per second 16 | Match line rate 0.000143 megalines per second 17 | Parse byte rate 27.799785 megabytes per second 18 | Parse line rate 0.000111 megalines per second 19 | ---------------------------------------- 20 | Parsing data/citm_catalog.json 21 | 22 | Tree nodes 115385 23 | Byte total 1727204.000000 24 | Line total 50468.000000 25 | Match time 31.315390 26 | Parse time 49.419611 27 | Match byte rate 55.155117 megabytes per second 28 | Match line rate 1.611604 megalines per second 29 | Parse byte rate 34.949769 megabytes per second 30 | Parse line rate 1.021214 megalines per second 31 | ---------------------------------------- 32 | Parsing data/twitter.json 33 | 34 | Tree nodes 53949 35 | Byte total 631515.000000 36 | Line total 15482.000000 37 | Match time 11.596800 38 | Parse time 18.492193 39 | Match byte rate 54.455971 megabytes per second 40 | Match line rate 1.335023 megalines per second 41 | Parse byte rate 34.150357 megabytes per second 42 | Parse line rate 0.837218 megalines per second 43 | ---------------------------------------- 44 | Parsing data/rapidjson_sample.json 45 | 46 | Tree nodes 7005 47 | Byte total 687491.000000 48 | Line total 3314.000000 49 | Match time 7.729750 50 | Parse time 9.113121 51 | Match byte rate 88.940910 megabytes per second 52 | Match line rate 0.428733 megalines per second 53 | Parse byte rate 75.439687 megabytes per second 54 | Parse line rate 0.363651 megalines per second 55 | ---------------------------------------- 56 | Results averaged over all test files: 57 | 58 | Byte total 5297261.000000 59 | Line total 69273.000000 60 | Match time 113.698354 61 | Parse time 157.998610 62 | Match byte rate 46.590481 megabytes per second 63 | Match line rate 0.609270 megalines per second 64 | Parse byte rate 33.527263 megabytes per second 65 | Parse line rate 0.438441 megalines per second 66 | 67 | ==764411== 68 | ==764411== HEAP SUMMARY: 69 | ==764411== in use at exit: 0 bytes in 0 blocks 70 | ==764411== total heap usage: 29 allocs, 29 frees, 20,075,729 bytes allocated 71 | ==764411== 72 | ==764411== All heap blocks were freed -- no leaks are possible 73 | ==764411== 74 | ==764411== For lists of detected and suppressed errors, rerun with: -s 75 | ==764411== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) 76 | -------------------------------------------------------------------------------- /examples/live_demo/run1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p bin/dbg 3 | set -v 4 | g++ -DDEBUG -std=c++20 -O0 -I.. -c live_demo.cpp -o bin/dbg/live_demo.o 5 | g++ -DDEBUG -std=c++20 -O0 -I.. -c live_parser1.cpp -o bin/dbg/live_parser1.o 6 | g++ bin/dbg/live_demo.o bin/dbg/live_parser1.o -o bin/dbg/live_demo1 7 | 8 | bin/dbg/live_demo1 9 | -------------------------------------------------------------------------------- /examples/live_demo/run2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p bin/dbg 3 | set -v 4 | g++ -DDEBUG -std=c++20 -O0 -I.. -c live_demo.cpp -o bin/dbg/live_demo.o 5 | g++ -DDEBUG -std=c++20 -O0 -I.. -c live_parser2.cpp -o bin/dbg/live_parser2.o 6 | g++ bin/dbg/live_demo.o bin/dbg/live_parser2.o -o bin/dbg/live_demo2 7 | 8 | bin/dbg/live_demo2 9 | -------------------------------------------------------------------------------- /examples/live_demo/run3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p bin/dbg 3 | set -v 4 | g++ -DDEBUG -std=c++20 -O0 -I.. -c live_demo.cpp -o bin/dbg/live_demo.o 5 | g++ -DDEBUG -std=c++20 -O0 -I.. -c live_parser3.cpp -o bin/dbg/live_parser3.o 6 | g++ bin/dbg/live_demo.o bin/dbg/live_parser3.o -o bin/dbg/live_demo3 7 | 8 | bin/dbg/live_demo3 9 | -------------------------------------------------------------------------------- /examples/regex/build.hancho: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # Matcheroni regex parsing example 3 | 4 | lib = hancho( 5 | hancho.base_rules.cpp_lib, 6 | in_srcs = "regex_parser.cpp", 7 | out_lib = "regex_parser.a", 8 | ) 9 | 10 | -------------------------------------------------------------------------------- /examples/regex/regex_demo.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // This file is a full working example of using Matcheroni to build a parser 3 | // that can parse a subset of regular expressions. Supported operators are 4 | // ^, $, ., *, ?, +, |, (), [], [^], and escaped characters. 5 | 6 | // Example usage: 7 | // bin/regex_demo "(^\d+\s+(very)?\s+(good|bad)\s+[a-z]*$)" 8 | 9 | // SPDX-FileCopyrightText: 2023 Austin Appleby 10 | // SPDX-License-Identifier: MIT License 11 | 12 | #include "matcheroni/Matcheroni.hpp" 13 | #include "matcheroni/Parseroni.hpp" 14 | #include "matcheroni/Utilities.hpp" 15 | 16 | #include 17 | #include 18 | 19 | using namespace matcheroni; 20 | using namespace parseroni; 21 | 22 | TextSpan parse_regex(TextParseContext& ctx, TextSpan body); 23 | 24 | //------------------------------------------------------------------------------ 25 | // The demo app accepts a quoted regex as its first command line argument, 26 | // attempts to parse it, and then prints out the resulting parse tree. 27 | 28 | // Bash will un-quote the regex on the command line for us, so we don't need 29 | // to do any processing here. 30 | 31 | int main(int argc, char** argv) { 32 | printf("Regex Demo\n"); 33 | 34 | if (argc < 2) { 35 | printf("Usage: regex_demo \"\"\n"); 36 | return 0; 37 | } 38 | 39 | // Invoke our regex matcher against the input text. If it matches, we will 40 | // get a non-null endpoint for the match. 41 | 42 | TextParseContext ctx; 43 | auto text = utils::to_span(argv[1]); 44 | auto tail = parse_regex(ctx, text); 45 | utils::print_summary(text, tail, 50); 46 | 47 | return 0; 48 | } 49 | 50 | //------------------------------------------------------------------------------ 51 | -------------------------------------------------------------------------------- /examples/regex/regex_parser.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // This file is a full working example of using Matcheroni to build a parser 3 | // that can parse a subset of regular expressions. Supported operators are 4 | // ^, $, ., *, ?, +, |, (), [], [^], and escaped characters. 5 | 6 | // Example usage: 7 | // bin/regex_parser "(^\d+\s+(very)?\s+(good|bad)\s+[a-z]*$)" 8 | 9 | // SPDX-FileCopyrightText: 2023 Austin Appleby 10 | // SPDX-License-Identifier: MIT License 11 | 12 | #include "matcheroni/Matcheroni.hpp" 13 | #include "matcheroni/Parseroni.hpp" 14 | #include "matcheroni/Utilities.hpp" 15 | 16 | using namespace matcheroni; 17 | using namespace parseroni; 18 | 19 | template 20 | struct Capture3 { 21 | static TextSpan match(TextParseContext& ctx, TextSpan body) { 22 | return Capture::match(ctx, body); 23 | } 24 | }; 25 | 26 | //------------------------------------------------------------------------------ 27 | // To match anything at all, we first need to tell Matcheroni how to compare 28 | // one atom of our input sequence against a constant. 29 | 30 | static TextSpan match_regex(TextParseContext& ctx, TextSpan body); 31 | 32 | // Our 'control' characters consist of all atoms with special regex meanings. 33 | 34 | struct cchar { 35 | using pattern = Atoms<'\\', '(', ')', '|', '$', '.', '+', '*', '?', '[', ']', '^'>; 36 | static TextSpan match(TextParseContext& ctx, TextSpan body) { return pattern::match(ctx, body); } 37 | }; 38 | 39 | // Our 'plain' characters are every character that's not a control character. 40 | 41 | struct pchar { 42 | using pattern = Seq, AnyAtom>; 43 | static TextSpan match(TextParseContext& ctx, TextSpan body) { return pattern::match(ctx, body); } 44 | }; 45 | 46 | // Plain text is any span of plain characters not followed by an operator. 47 | 48 | struct text { 49 | using pattern = Some>>>; 50 | static TextSpan match(TextParseContext& ctx, TextSpan body) { return pattern::match(ctx, body); } 51 | }; 52 | 53 | // Our 'meta' characters are anything after a backslash. 54 | 55 | struct mchar { 56 | using pattern = Seq, AnyAtom>; 57 | static TextSpan match(TextParseContext& ctx, TextSpan body) { return pattern::match(ctx, body); } 58 | }; 59 | 60 | // A character range is a beginning character and an end character separated 61 | // by a hyphen. 62 | 63 | 64 | struct range { 65 | using pattern = Seq< 66 | Capture3<"begin", pchar, TextParseNode>, 67 | Atom<'-'>, 68 | Capture3<"end", pchar, TextParseNode> 69 | >; 70 | static TextSpan match(TextParseContext& ctx, TextSpan body) { return pattern::match(ctx, body); } 71 | }; 72 | 73 | // The contents of a matcher set must be ranges or individual characters. 74 | struct set_body { 75 | static TextSpan match(TextParseContext& ctx, TextSpan body) { 76 | return 77 | Some< 78 | Capture3<"range", range, TextParseNode>, 79 | Capture3<"char", pchar, TextParseNode>, 80 | Capture3<"meta", mchar, TextParseNode> 81 | >::match(ctx, body); 82 | } 83 | }; 84 | 85 | 86 | // The regex units that we can apply a */+/? operator to are sets, groups, 87 | // dots, and single characters. 88 | // Note that "group" recurses through RegexParser::match. 89 | 90 | struct unit { 91 | using pattern = 92 | Oneof< 93 | Capture3<"neg_set", Seq, Atom<'^'>, set_body, Atom<']'>>, TextParseNode>, 94 | Capture3<"pos_set", Seq, set_body, Atom<']'>>, TextParseNode>, 95 | Capture3<"group", Seq, Ref, Atom<')'>>, TextParseNode>, 96 | Capture3<"dot", Atom<'.'>, TextParseNode>, 97 | Capture3<"char", pchar, TextParseNode>, 98 | Capture3<"meta", mchar, TextParseNode> 99 | >; 100 | static TextSpan match(TextParseContext& ctx, TextSpan body) { return pattern::match(ctx, body); } 101 | }; 102 | 103 | // A 'simple' regex is text, line end markers, a unit w/ operator, or a bare 104 | // unit. 105 | 106 | struct simple { 107 | using pattern = Some< 108 | Capture3<"text", text, TextParseNode>, 109 | Capture3<"BOL", Atom<'^'>, TextParseNode>, 110 | Capture3<"EOL", Atom<'$'>, TextParseNode>, 111 | Capture3<"any", Seq>, TextParseNode>, 112 | Capture3<"some", Seq>, TextParseNode>, 113 | Capture3<"opt", Seq>, TextParseNode>, 114 | unit 115 | >; 116 | static TextSpan match(TextParseContext& ctx, TextSpan body) { 117 | return pattern::match(ctx, body); 118 | } 119 | }; 120 | 121 | // A 'one-of' regex is a list of simple regexes separated by '|'. 122 | 123 | struct oneof { 124 | using pattern = 125 | Seq< 126 | Capture3<"option", simple, TextParseNode>, 127 | Some, 129 | Capture3<"option", simple, TextParseNode> 130 | >> 131 | >; 132 | static TextSpan match(TextParseContext& ctx, TextSpan body) { 133 | return pattern::match(ctx, body); 134 | } 135 | }; 136 | 137 | // This is the top level of our regex parser. 138 | static TextSpan match_regex(TextParseContext& ctx, TextSpan body) { 139 | // A 'top-level' regex is either a simple regex or a one-of regex. 140 | using regex_top = 141 | Oneof< 142 | Capture3<"oneof", oneof, TextParseNode>, 143 | simple 144 | >; 145 | return regex_top::match(ctx, body); 146 | } 147 | 148 | TextSpan parse_regex(TextParseContext& ctx, TextSpan body) { 149 | return match_regex(ctx, body); 150 | //return body.fail(); 151 | } 152 | 153 | //------------------------------------------------------------------------------ 154 | -------------------------------------------------------------------------------- /examples/regex/regex_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char** argv) { 4 | printf("All tests pass\n"); 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /examples/regex/test.hancho: -------------------------------------------------------------------------------- 1 | 2 | # These are the various regex libraries that Matcheroni can be benchmarked 3 | # against. CTRE and SRELL require that you copy their header into matcheroni/. 4 | 5 | # These defines are required to reduce the compiled size of the SRELL library used in the benchmark. 6 | #benchmark_defs = ${benchmark_defs} -DSRELL_NO_UNICODE_ICASE 7 | #benchmark_defs = ${benchmark_defs} -DSRELL_NO_UNICODE_PROPERTY 8 | #benchmark_defs = ${benchmark_defs} -DSRELL_NO_UNICODE_DATA 9 | #benchmark_defs = ${benchmark_defs} -DSRELL_NO_NAMEDCAPTURE 10 | #benchmark_defs = ${benchmark_defs} -DSRELL_NO_VMODE 11 | 12 | regex = hancho.load("{repo_dir}/examples/regex/build.hancho") 13 | 14 | hancho( 15 | hancho.base_rules.cpp_bin, 16 | in_srcs = "regex_demo.cpp", 17 | in_libs = regex.lib, 18 | out_bin = "regex_demo", 19 | ) 20 | 21 | hancho( 22 | hancho.base_rules.cpp_bin, 23 | in_srcs = "regex_benchmark.cpp", 24 | in_libs = regex.lib, 25 | out_bin = "regex_benchmark", 26 | sys_libs = ["-lboost_system", "-lboost_regex"], 27 | ) 28 | 29 | hancho( 30 | hancho.base_rules.cpp_test, 31 | in_srcs = "regex_test.cpp", 32 | in_libs = regex.lib, 33 | out_bin = "regex_test", 34 | ) 35 | -------------------------------------------------------------------------------- /examples/toml/build.hancho: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------- 2 | # TOML parser example 3 | 4 | lib = hancho( 5 | hancho.base_rules.cpp_lib, 6 | in_srcs = "toml_parser.cpp", 7 | out_lib = "toml_parser.a", 8 | ) 9 | -------------------------------------------------------------------------------- /examples/toml/test.hancho: -------------------------------------------------------------------------------- 1 | toml = hancho.load("{repo_dir}/examples/toml/build.hancho") 2 | 3 | hancho( 4 | hancho.base_rules.cpp_test, 5 | in_srcs = "toml_test.cpp", 6 | in_libs = toml.lib, 7 | out_bin = "toml_test", 8 | ) 9 | -------------------------------------------------------------------------------- /examples/toml/toml_parser.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | #include "matcheroni/Utilities.hpp" 4 | 5 | using namespace matcheroni; 6 | using namespace parseroni; 7 | 8 | template 9 | using Cap = Capture; 10 | 11 | // https://epage.github.io/blog/2023/07/winnow-0-5-the-fastest-rust-parser-combinator-library/ 12 | 13 | using space = Any>; 14 | using comment = Seq, Until>; 15 | 16 | using cooked_string = 17 | Seq< 18 | Atom<'"'>, 19 | Until>, 20 | Atom<'"'> 21 | >; 22 | 23 | using single_string = 24 | Seq< 25 | Atom<'\''>, 26 | Until>, 27 | Atom<'\''> 28 | >; 29 | 30 | using raw_string = 31 | Seq< 32 | Lit<"\"\"\"">, 33 | Until>, 34 | Lit<"\"\"\""> 35 | >; 36 | 37 | using string = Oneof; 38 | 39 | using number = Some>; 40 | using boolean = Oneof, Lit<"false">>; 41 | using key = Some>; 42 | // using date = ??? 43 | 44 | template 45 | using delimited_list = Opt>, space, Opt>>; 46 | 47 | static TextSpan match_value(TextParseContext& ctx, TextSpan body); 48 | using value = Ref; 49 | 50 | using key_or_str = Oneof; 51 | using path = 52 | delimited_list< 53 | Cap<"path_el", key_or_str>, 54 | Atom<'.'> 55 | >; 56 | 57 | using table_header = 58 | Seq< 59 | Atom<'['>, 60 | space, 61 | Cap<"path", path>, 62 | space, 63 | Atom<']'> 64 | >; 65 | 66 | using table_array_header = 67 | Seq< 68 | Atom<'['>, Atom<'['>, 69 | space, 70 | Cap<"path", path>, 71 | space, 72 | Atom<']'>, Atom<']'> 73 | >; 74 | 75 | using key_value = 76 | Seq< 77 | Cap<"key", key_or_str>, 78 | space, 79 | Atom<'='>, 80 | space, 81 | Cap<"value", value> 82 | >; 83 | 84 | using table_entries = Any; 85 | 86 | using table = 87 | Seq< 88 | Cap<"header", table_header>, 89 | Cap<"entries", table_entries> 90 | >; 91 | 92 | using table_array = 93 | Seq< 94 | table_array_header, 95 | table_entries 96 | >; 97 | 98 | using inline_table = 99 | Seq< 100 | Atom<'{'>, 101 | space, 102 | delimited_list< 103 | Cap<"item", key_value>, 104 | Atom<','> 105 | >, 106 | space, 107 | Atom<'}'> 108 | >; 109 | 110 | using array = 111 | Seq< 112 | Atom<'['>, 113 | space, 114 | delimited_list< 115 | value, 116 | Atom<','> 117 | >, 118 | space, 119 | Atom<']'> 120 | >; 121 | 122 | using expression = 123 | Oneof< 124 | Cap<"pair", key_value>, 125 | Cap<"table", table>, 126 | Cap<"table_array", table_array> 127 | >; 128 | 129 | using tomlFile = 130 | Any>; 135 | 136 | static TextSpan match_value(TextParseContext& ctx, TextSpan body) { 137 | using value = 138 | Oneof< 139 | Cap<"string", string>, 140 | Cap<"number", number>, 141 | Cap<"boolean", boolean>, 142 | /*date,*/ 143 | Cap<"array", array>, 144 | Cap<"inline_table", inline_table> 145 | >; 146 | return value::match(ctx, body); 147 | } 148 | 149 | TextSpan match_toml(TextParseContext& ctx, TextSpan body) { 150 | return tomlFile::match(ctx, body); 151 | } 152 | -------------------------------------------------------------------------------- /examples/toml/toml_test.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // SPDX-FileCopyrightText: 2023 Austin Appleby 3 | // SPDX-License-Identifier: MIT License 4 | 5 | #include "matcheroni/Matcheroni.hpp" 6 | #include "matcheroni/Parseroni.hpp" 7 | #include "matcheroni/Utilities.hpp" 8 | 9 | #include 10 | 11 | using namespace matcheroni; 12 | using namespace parseroni; 13 | 14 | TextSpan match_toml(TextParseContext& ctx, TextSpan text); 15 | 16 | 17 | //------------------------------------------------------------------------------ 18 | 19 | const char* toml = R"( 20 | [workspace] 21 | resolver = "2" 22 | [target.'cfg(not(windows))'.dependencies] 23 | description = """ 24 | Cargo, a package manager for Rust. 25 | """ 26 | members = [ 27 | "crates/*", 28 | "credential/*", 29 | "benches/benchsuite", 30 | "benches/capture", 31 | ] 32 | exclude = [ 33 | "target/", 34 | ] 35 | )"; 36 | 37 | int main(int argc, char** argv) { 38 | /* 39 | if (argc < 2) { 40 | printf("Usage: toml_test \n"); 41 | return 1; 42 | } 43 | 44 | printf("argv[0] = %s\n", argv[0]); 45 | printf("argv[1] = %s\n", argv[1]); 46 | 47 | std::string buf; 48 | read(argv[1], buf); 49 | 50 | TextSpan text = to_span(buf); 51 | */ 52 | 53 | TextSpan text = utils::to_span(toml); 54 | TextParseContext ctx; 55 | 56 | double time_a, time_b; 57 | TextSpan tail; 58 | 59 | for (int rep = 0; rep < 100; rep++) { 60 | ctx.reset(); 61 | time_a = utils::timestamp_ms(); 62 | tail = match_toml(ctx, text); 63 | time_b = utils::timestamp_ms(); 64 | } 65 | 66 | printf("Parsing toml took %f msec\n", time_b - time_a); 67 | utils::print_summary(ctx, text, tail, 50); 68 | 69 | return 0; 70 | } 71 | 72 | //------------------------------------------------------------------------------ 73 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut0a.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Utilities.hpp" 3 | 4 | using namespace matcheroni; 5 | 6 | int main(int argc, char** argv) { 7 | const char* filename = argc < 2 ? "examples/tutorial/json_tut0a.input" : argv[1]; 8 | 9 | std::string input = utils::read(filename); 10 | TextSpan text = utils::to_span(input); 11 | 12 | using pattern = Seq< Lit<"Hello">, Atom<' '>, Lit<"World"> >; 13 | 14 | TextMatchContext ctx; 15 | TextSpan tail = pattern::match(ctx, text); 16 | utils::print_summary(text, tail, 50); 17 | 18 | return tail.is_valid() ? 0 : -1; 19 | } 20 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut0a.input: -------------------------------------------------------------------------------- 1 | Hello World! Words words words words 2 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut1a.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Utilities.hpp" 3 | 4 | using namespace matcheroni; 5 | 6 | struct JsonMatcher { 7 | // Matches any JSON number 8 | using sign = Atoms<'+', '-'>; 9 | using digit = Range<'0', '9'>; 10 | using onenine = Range<'1', '9'>; 11 | using digits = Some; 12 | using integer = Seq>, Oneof, digit>>; 13 | using fraction = Seq, digits>; 14 | using exponent = Seq, Opt, digits>; 15 | using number = Seq, Opt>; 16 | 17 | static TextSpan match(TextMatchContext& ctx, TextSpan body) { 18 | return number::match(ctx, body); 19 | } 20 | }; 21 | 22 | int main(int argc, char** argv) { 23 | const char* filename = argc < 2 ? "examples/tutorial/json_tut1a.input" : argv[1]; 24 | 25 | std::string input = utils::read(filename); 26 | TextSpan text = utils::to_span(input); 27 | 28 | TextMatchContext ctx; 29 | auto tail = JsonMatcher::match(ctx, text); 30 | utils::print_summary(text, tail, 50); 31 | 32 | return tail.is_valid() ? 0 : -1; 33 | } 34 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut1a.input: -------------------------------------------------------------------------------- 1 | -12345.6789e10 Hello Json World 2 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut1b.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Utilities.hpp" 3 | 4 | using namespace matcheroni; 5 | 6 | struct JsonMatcher { 7 | // Matches any JSON number 8 | using sign = Atoms<'+', '-'>; 9 | using digit = Range<'0', '9'>; 10 | using onenine = Range<'1', '9'>; 11 | using digits = Some; 12 | using integer = Seq>, Oneof, digit>>; 13 | using fraction = Seq, digits>; 14 | using exponent = Seq, Opt, digits>; 15 | using number = Seq, Opt>; 16 | 17 | // Matches a JSON string that can contain valid escape characters 18 | using space = Some>; 19 | using hex = Ranges<'0','9','a','f','A','F'>; 20 | using escape = Oneof, Seq, Rep<4, hex>>>; 21 | using character = Oneof< 22 | Seq>, Not>, Range<0x0020, 0x10FFFF>>, 23 | Seq, escape> 24 | >; 25 | using string = Seq, Any, Atom<'"'>>; 26 | 27 | // Matches the three reserved JSON keywords 28 | using keyword = Oneof, Lit<"false">, Lit<"null">>; 29 | 30 | // Matches the above items separated by whitespace 31 | static TextSpan match(TextMatchContext& ctx, TextSpan body) { 32 | using item = Oneof; 33 | using pattern = Seq>>; 34 | return pattern::match(ctx, body); 35 | } 36 | }; 37 | 38 | int main(int argc, char** argv) { 39 | const char* filename = argc < 2 ? "examples/tutorial/json_tut1b.input" : argv[1]; 40 | 41 | std::string input = utils::read(filename); 42 | TextSpan text = utils::to_span(input); 43 | 44 | TextMatchContext ctx; 45 | auto tail = JsonMatcher::match(ctx, text); 46 | utils::print_summary(text, tail, 50); 47 | 48 | return tail.is_valid() ? 0 : -1; 49 | } 50 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut1b.input: -------------------------------------------------------------------------------- 1 | "Hello World" 2 | 12345.58e10 3 | true 4 | false 5 | null 6 | random_word 7 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut1c.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Utilities.hpp" 3 | 4 | using namespace matcheroni; 5 | 6 | struct JsonMatcher { 7 | // Matches any JSON number 8 | using sign = Atoms<'+', '-'>; 9 | using digit = Range<'0', '9'>; 10 | using onenine = Range<'1', '9'>; 11 | using digits = Some; 12 | using integer = Seq>, Oneof, digit>>; 13 | using fraction = Seq, digits>; 14 | using exponent = Seq, Opt, digits>; 15 | using number = Seq, Opt>; 16 | 17 | // Matches a JSON string that can contain valid escape characters 18 | using space = Some>; 19 | using hex = Ranges<'0','9','a','f','A','F'>; 20 | using escape = Oneof, Seq, Rep<4, hex>>>; 21 | using character = Oneof< 22 | Seq>, Not>, Range<0x0020, 0x10FFFF>>, 23 | Seq, escape> 24 | >; 25 | using string = Seq, Any, Atom<'"'>>; 26 | 27 | // Matches the three reserved JSON keywords 28 | using keyword = Oneof, Lit<"false">, Lit<"null">>; 29 | 30 | // Matches a comma-delimited list with embedded whitespace 31 | template 32 | using list = Seq, Atom<','>, Opt, P>>>; 33 | 34 | // Matches any valid JSON value 35 | static TextSpan match_value(TextMatchContext& ctx, TextSpan body) { 36 | return Oneof< 37 | utils::TraceText<"number", number>, 38 | utils::TraceText<"string", string>, 39 | utils::TraceText<"array", array>, 40 | utils::TraceText<"object", object>, 41 | utils::TraceText<"keyword", keyword> 42 | >::match(ctx, body); 43 | } 44 | using value = Ref; 45 | 46 | // Matches bracket-delimited lists of JSON values 47 | using array = 48 | Seq< 49 | Atom<'['>, 50 | Opt, 51 | Opt>, 52 | Opt, 53 | Atom<']'> 54 | >; 55 | 56 | // Matches a key:value pair where 'key' is a string and 'value' is any JSON 57 | // value. 58 | using pair = 59 | Seq< 60 | utils::TraceText<"key", string>, 61 | Opt, 62 | Atom<':'>, 63 | Opt, 64 | utils::TraceText<"value", value> 65 | >; 66 | 67 | // Matches a curly-brace-delimited list of key:value pairs. 68 | using object = 69 | Seq< 70 | Atom<'{'>, 71 | Opt, 72 | Opt>>, 73 | Opt, 74 | Atom<'}'> 75 | >; 76 | 77 | // Matches any valid JSON document 78 | static TextSpan match(TextMatchContext& ctx, TextSpan body) { 79 | return Seq, value, Opt>::match(ctx, body); 80 | } 81 | }; 82 | 83 | int main(int argc, char** argv) { 84 | const char* filename = argc < 2 ? "examples/tutorial/json_tut1c.input" : argv[1]; 85 | 86 | std::string input = utils::read(filename); 87 | TextSpan text = utils::to_span(input); 88 | 89 | TextMatchContext ctx; 90 | TextSpan tail = JsonMatcher::match(ctx, text); 91 | 92 | printf("\n"); 93 | utils::print_summary(text, tail, 50); 94 | 95 | return tail.is_valid() ? 0 : -1; 96 | } 97 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut1c.input: -------------------------------------------------------------------------------- 1 | { 2 | "zarg" : "whop", 3 | "foo" : [1,2,3] 4 | } 5 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut2a.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | #include "matcheroni/Utilities.hpp" 4 | 5 | using namespace matcheroni; 6 | using namespace parseroni; 7 | 8 | struct JsonParser { 9 | // Matches any JSON number 10 | using sign = Atoms<'+', '-'>; 11 | using digit = Range<'0', '9'>; 12 | using onenine = Range<'1', '9'>; 13 | using digits = Some; 14 | using integer = Seq>, Oneof, digit>>; 15 | using fraction = Seq, digits>; 16 | using exponent = Seq, Opt, digits>; 17 | using number = Seq, Opt>; 18 | 19 | // Matches a JSON string that can contain valid escape characters 20 | using ws = Some>; 21 | using hex = Ranges<'0','9','a','f','A','F'>; 22 | using escape = Oneof, Seq, Rep<4, hex>>>; 23 | using character = Oneof< 24 | Seq>, Not>, Range<0x0020, 0x10FFFF>>, 25 | Seq, escape> 26 | >; 27 | using string = Seq, Any, Atom<'"'>>; 28 | 29 | // Matches the three reserved JSON keywords 30 | using keyword = Oneof, Lit<"false">, Lit<"null">>; 31 | 32 | // Matches a comma-delimited list with embedded whitespace 33 | template 34 | using list = Seq, Atom<','>, Opt, P>>>; 35 | 36 | // Matches any valid JSON value 37 | static TextSpan match_value(TextParseContext& ctx, TextSpan body) { 38 | return Oneof< 39 | Capture<"number", number, TextParseNode>, 40 | Capture<"string", string, TextParseNode>, 41 | Capture<"array", array, TextParseNode>, 42 | Capture<"object", object, TextParseNode>, 43 | Capture<"keyword", keyword, TextParseNode> 44 | >::match(ctx, body); 45 | } 46 | using value = Ref; 47 | 48 | // Matches bracket-delimited lists of JSON values 49 | using array = 50 | Seq< 51 | Atom<'['>, 52 | Opt, 53 | Opt>, 54 | Opt, 55 | Atom<']'> 56 | >; 57 | 58 | // Matches a key:value pair where 'key' is a string and 'value' is a JSON value. 59 | using pair = 60 | Seq< 61 | Capture<"key", string, TextParseNode>, 62 | Opt, 63 | Atom<':'>, 64 | Opt, 65 | Capture<"value", value, TextParseNode> 66 | >; 67 | 68 | // Matches a curly-brace-delimited list of key:value pairs. 69 | using object = 70 | Seq< 71 | Atom<'{'>, 72 | Opt, 73 | Opt>>, 74 | Opt, 75 | Atom<'}'> 76 | >; 77 | 78 | // Matches any valid JSON document 79 | static TextSpan match(TextParseContext& ctx, TextSpan body) { 80 | return Seq, value, Opt>::match(ctx, body); 81 | } 82 | }; 83 | 84 | int main(int argc, char** argv) { 85 | const char* filename = argc < 2 ? "examples/tutorial/json_tut2a.input" : argv[1]; 86 | 87 | std::string input = utils::read(filename); 88 | TextSpan text = utils::to_span(input); 89 | 90 | TextParseContext ctx; 91 | TextSpan tail = JsonParser::match(ctx, text); 92 | utils::print_summary(ctx, text, tail, 50); 93 | 94 | return tail.is_valid() ? 0 : -1; 95 | } 96 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut2a.input: -------------------------------------------------------------------------------- 1 | { 2 | "foo" : "bar", 3 | "baz" : [1, 2, 3, -5.238492834e-123], 4 | "blep" : true, 5 | "blap" : false, 6 | "blop" : null 7 | } 8 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut2b.cpp: -------------------------------------------------------------------------------- 1 | #include "matcheroni/Matcheroni.hpp" 2 | #include "matcheroni/Parseroni.hpp" 3 | #include "matcheroni/Utilities.hpp" 4 | 5 | #include 6 | 7 | using namespace matcheroni; 8 | using namespace parseroni; 9 | 10 | // Our base node type is the same as TextParseNode, with the addition of a 11 | // sum() method. 12 | struct JsonNode : public NodeBase { 13 | TextSpan as_text_span() const { return span; } 14 | 15 | virtual double sum() { 16 | double result = 0; 17 | for (auto n = child_head; n; n = n->node_next) { 18 | result += n->sum(); 19 | } 20 | return result; 21 | } 22 | }; 23 | 24 | // We'll specialize JsonNode for numerical values by overriding init() to also 25 | // convert the matched text to a double. 26 | struct NumberNode : public JsonNode { 27 | void init() { 28 | value = atof(span.begin); 29 | } 30 | 31 | virtual double sum() { 32 | return value; 33 | } 34 | 35 | double value = 0; 36 | }; 37 | 38 | // And our context provides atom_cmp() and sum(). NodeContext<> handles the 39 | // required checkpoint()/rewind() methods. 40 | struct JsonParseContext : public NodeContext { 41 | static int atom_cmp(char a, int b) { 42 | return (unsigned char)a - b; 43 | } 44 | 45 | double sum() { 46 | double result = 0; 47 | for (auto n = top_head; n; n = n->node_next) { 48 | result += n->sum(); 49 | } 50 | return result; 51 | } 52 | }; 53 | 54 | //------------------------------------------------------------------------------ 55 | 56 | struct JsonParser { 57 | // Matches any JSON number 58 | using sign = Atoms<'+', '-'>; 59 | using digit = Range<'0', '9'>; 60 | using onenine = Range<'1', '9'>; 61 | using digits = Some; 62 | using integer = Seq>, Oneof, digit>>; 63 | using fraction = Seq, digits>; 64 | using exponent = Seq, Opt, digits>; 65 | using number = Seq, Opt>; 66 | 67 | // Matches a JSON string that can contain valid escape characters 68 | using ws = Some>; 69 | using hex = Ranges<'0','9','a','f','A','F'>; 70 | using escape = Oneof, Seq, Rep<4, hex>>>; 71 | using character = Oneof< 72 | Seq>, Not>, Range<0x0020, 0x10FFFF>>, 73 | Seq, escape> 74 | >; 75 | using string = Seq, Any, Atom<'"'>>; 76 | 77 | // Matches the three reserved JSON keywords 78 | using keyword = Oneof, Lit<"false">, Lit<"null">>; 79 | 80 | // Matches a comma-delimited list with embedded whitespace 81 | template 82 | using list = Seq, Atom<','>, Opt, P>>>; 83 | 84 | // Matches any valid JSON value 85 | static TextSpan match_value(JsonParseContext& ctx, TextSpan body) { 86 | return Oneof< 87 | 88 | // ********** 89 | // This Capture<> will now create NumberNodes 90 | Capture<"number", number, NumberNode>, 91 | // ********** 92 | 93 | Capture<"string", string, JsonNode>, 94 | Capture<"array", array, JsonNode>, 95 | Capture<"object", object, JsonNode>, 96 | Capture<"keyword", keyword, JsonNode> 97 | >::match(ctx, body); 98 | } 99 | using value = Ref; 100 | 101 | // Matches bracket-delimited lists of JSON values 102 | using array = 103 | Seq< 104 | Atom<'['>, 105 | Opt, 106 | Opt>, 107 | Opt, 108 | Atom<']'> 109 | >; 110 | 111 | // Matches a key:value pair where 'key' is a string and 'value' is a JSON value. 112 | using pair = 113 | Seq< 114 | Capture<"key", string, JsonNode>, 115 | Opt, 116 | Atom<':'>, 117 | Opt, 118 | Capture<"value", value, JsonNode> 119 | >; 120 | 121 | // Matches a curly-brace-delimited list of key:value pairs. 122 | using object = 123 | Seq< 124 | Atom<'{'>, 125 | Opt, 126 | Opt>>, 127 | Opt, 128 | Atom<'}'> 129 | >; 130 | 131 | // Matches any valid JSON document 132 | static TextSpan match(JsonParseContext& ctx, TextSpan body) { 133 | return Seq, value, Opt>::match(ctx, body); 134 | } 135 | }; 136 | 137 | int main(int argc, char** argv) { 138 | const char* filename = argc < 2 ? "examples/tutorial/json_tut2b.input" : argv[1]; 139 | 140 | std::string input = utils::read(filename); 141 | TextSpan text = utils::to_span(input); 142 | 143 | JsonParseContext ctx; 144 | TextSpan tail = JsonParser::match(ctx, text); 145 | 146 | printf("Sum of number nodes: %f\n", ctx.sum()); 147 | printf("\n"); 148 | utils::print_summary(ctx, text, tail, 50); 149 | 150 | return tail.is_valid() ? 0 : -1; 151 | } 152 | -------------------------------------------------------------------------------- /examples/tutorial/json_tut2b.input: -------------------------------------------------------------------------------- 1 | { 2 | "numbers" : [1.2, 3.4, 5.6789, 10.111213] 3 | } 4 | -------------------------------------------------------------------------------- /examples/tutorial/json_tutorial.md: -------------------------------------------------------------------------------- 1 | # Json Tutorial json_tutorial.md 2 | -------------------------------------------------------------------------------- /examples/tutorial/main1.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/examples/tutorial/main1.cpp -------------------------------------------------------------------------------- /examples/tutorial/main2.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/examples/tutorial/main2.cpp -------------------------------------------------------------------------------- /examples/tutorial/main3.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aappleby/matcheroni/4c22d81e0ddc8e1a58db16a883636911318f4162/examples/tutorial/main3.cpp -------------------------------------------------------------------------------- /examples/tutorial/test.hancho: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------- 2 | # Tutorial examples 3 | 4 | 5 | hancho( 6 | hancho.base_rules.cpp_test, 7 | in_srcs="json_tut0a.cpp", 8 | out_bin="json_tut0a", 9 | ) 10 | 11 | hancho( 12 | hancho.base_rules.cpp_test, 13 | in_srcs="json_tut1a.cpp", 14 | out_bin="json_tut1a", 15 | ) 16 | 17 | hancho( 18 | hancho.base_rules.cpp_test, 19 | in_srcs="json_tut1b.cpp", 20 | out_bin="json_tut1b", 21 | ) 22 | 23 | hancho( 24 | hancho.base_rules.cpp_test, 25 | in_srcs="json_tut1c.cpp", 26 | out_bin="json_tut1c", 27 | ) 28 | 29 | hancho( 30 | hancho.base_rules.cpp_test, 31 | in_srcs="json_tut2a.cpp", 32 | out_bin="json_tut2a", 33 | ) 34 | 35 | hancho( 36 | hancho.base_rules.cpp_test, 37 | in_srcs="json_tut2b.cpp", 38 | out_bin="json_tut2b", 39 | ) 40 | 41 | hancho( 42 | hancho.base_rules.cpp_bin, 43 | in_srcs="tiny_c_parser.cpp", 44 | in_libs=[hancho.c_lexer.lib, hancho.c_parser.lib], 45 | out_bin="tiny_c_parser", 46 | ) 47 | -------------------------------------------------------------------------------- /examples/tutorial/tiny_c_parser.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2023 Austin Appleby 2 | // SPDX-License-Identifier: MIT License 3 | 4 | #include "examples/c_lexer/CLexer.hpp" 5 | #include "examples/c_parser/CContext.hpp" 6 | #include "examples/c_parser/CNode.hpp" 7 | #include "matcheroni/Utilities.hpp" 8 | 9 | using namespace matcheroni; 10 | 11 | int main(int argc, char** argv) { 12 | const char* filename = argc < 2 ? "examples/tutorial/tiny_c_parser.input" : argv[1]; 13 | 14 | std::string input = utils::read(filename); 15 | auto text = utils::to_span(input); 16 | 17 | CLexer lexer; 18 | lexer.lex(text); 19 | 20 | CContext context; 21 | TokenSpan tok_span(lexer.tokens.data(), lexer.tokens.data() + lexer.tokens.size()); 22 | bool parse_ok = context.parse(text, tok_span); 23 | 24 | for (auto n = context.top_head; n; n = n->node_next) { 25 | utils::print_tree(text, n, 40, 0); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /examples/tutorial/tiny_c_parser.input: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char** argv) { 4 | printf("Hello World! %d %p\n", argc, argv); 5 | return 0; 6 | } 7 | -------------------------------------------------------------------------------- /matcheroni/Cookbook.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "matcheroni/Matcheroni.hpp" 3 | 4 | namespace matcheroni { 5 | namespace cookbook { 6 | 7 | //---------------------------------------- 8 | // Splits a file into lines and sends the lines to 'sink' 9 | 10 | template 11 | using to_lines = Any>, sink>, Opt>>>; 12 | 13 | //---------------------------------------- 14 | // Character types from ctype.h 15 | 16 | using isalnum = Ranges<'0','9','a','z','A','Z'>; 17 | using isalpha = Ranges<'a', 'z', 'A', 'Z'>; 18 | using isblank = Atoms<' ', '\t'>; 19 | using iscntrl = Ranges<0x00, 0x1F, 0x7F, 0x7F>; 20 | using isdigit = Range<'0', '9'>; 21 | using isgraph = Range<0x21, 0x7E>; 22 | using islower = Range<'a', 'z'>; 23 | using isprint = Range<0x20, 0x7E>; 24 | using ispunct = Ranges<0x21, 0x2F, 0x3A, 0x40, 0x5B, 0x60, 0x7B, 0x7E>; 25 | using isspace = Atoms<' ','\f','\v','\n','\r','\t'>; 26 | using isupper = Range<'A', 'Z'>; 27 | using isxdigit = Ranges<'0','9','a','f','A','F'>; 28 | 29 | //---------------------------------------- 30 | // Numbers 31 | 32 | using sign = Atoms<'+','-'>; 33 | 34 | using decimal_digit = Range<'0','9'>; 35 | using decimal_nonzero = Range<'1','9'>; 36 | using decimal_digits = Some; 37 | using decimal_integer = Seq< Opt, Oneof< Seq, decimal_digit> >; 38 | 39 | using decimal_fraction = Seq, decimal_digits>; 40 | using decimal_exponent = Seq, Opt, decimal_digits>; 41 | using decimal_float = Seq, Opt>; 42 | 43 | using hexadecimal_prefix = Oneof, Lit<"0X">>; 44 | using hexadecimal_digit = Ranges<'0','9','a','f','A','F'>; 45 | using hexadecimal_digits = Some; 46 | using hexadecimal_integer = Seq< Opt, hexadecimal_prefix, hexadecimal_digits >; 47 | 48 | //---------------------------------------- 49 | // Delimited spans 50 | 51 | template 52 | using delimited_span = Seq, rdelim>; 53 | 54 | using dquote_span = delimited_span, Atom<'"'>>; // note - no \" support 55 | using squote_span = delimited_span, Atom<'\''>>; // note - no \' support 56 | using bracket_span = delimited_span, Atom<']'>>; 57 | using brace_span = delimited_span, Atom<'}'>>; 58 | using paren_span = delimited_span, Atom<')'>>; 59 | using angle_span = delimited_span, Atom<'>'>>; 60 | 61 | // Python's triple-double-quoted string literal 62 | using triple_quote = Lit; 63 | using multiline_string = delimited_span; 64 | 65 | //---------------------------------------- 66 | // C #includes 67 | 68 | template 69 | using c_include_line = Seq< 70 | Lit<"#include">, 71 | Some, 72 | Dispatch, sink> 73 | >; 74 | 75 | //---------------------------------------- 76 | // Separated = a, b , c , d 77 | 78 | template 79 | using separated = 80 | Seq< 81 | pattern, 82 | Any, delim, Any, pattern>>, 83 | // trailing delimiter OK 84 | Opt, delim>> 85 | >; 86 | 87 | template 88 | using comma_separated = separated>; 89 | 90 | //---------------------------------------- 91 | // Joined = a.b.c.d 92 | 93 | template 94 | using joined = 95 | Seq< 96 | pattern, 97 | Any> 98 | // trailing delimiter _not_ OK 99 | >; 100 | 101 | template 102 | using dot_joined = joined>; 103 | 104 | //---------------------------------------- 105 | // Delimited lists 106 | 107 | template 108 | using delimited_list = Seq, comma_separated, Any, rdelim>; 109 | 110 | template using paren_list = delimited_list, pattern, Atom<')'>>; 111 | template using bracket_list = delimited_list, pattern, Atom<']'>>; 112 | template using brace_list = delimited_list, pattern, Atom<'}'>>; 113 | 114 | //---------------------------------------- 115 | // Basic UTF8 116 | 117 | using utf8_ext = Range<0x80, 0xBF>; 118 | using utf8_onebyte = Range<0x00, 0x7F>; 119 | using utf8_twobyte = Seq, utf8_ext>; 120 | using utf8_threebyte = Seq, utf8_ext, utf8_ext>; 121 | using utf8_fourbyte = Seq, utf8_ext, utf8_ext, utf8_ext>; 122 | using utf8_bom = Seq, Atom<0xBB>, Atom<0xBF>>; 123 | 124 | }; // namespace cookbook 125 | }; // namespace matcheroni 126 | -------------------------------------------------------------------------------- /matcheroni/Printeroni.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // This will eventually be support code to make regenerating documents from 4 | // parse trees easier. 5 | -------------------------------------------------------------------------------- /matcheroni/__init__.py: -------------------------------------------------------------------------------- 1 | from .matcheroni import * 2 | -------------------------------------------------------------------------------- /matcheroni/dump.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include // for exit 6 | #include 7 | #include 8 | #include 9 | 10 | #include "matcheroni/Matcheroni.hpp" // for Span 11 | 12 | namespace matcheroni { 13 | namespace utils { 14 | 15 | //------------------------------------------------------------------------------ 16 | 17 | inline uint32_t dim_color(uint32_t color) { 18 | int r = (color >> 0) & 0xFF; 19 | int g = (color >> 8) & 0xFF; 20 | int b = (color >> 16) & 0xFF; 21 | 22 | r = (r * 2) / 5; 23 | g = (g * 2) / 5; 24 | b = (b * 2) / 5; 25 | 26 | return (r << 0) | (g << 8) | (b << 16); 27 | } 28 | 29 | inline void set_color(uint32_t c) { 30 | static uint32_t current_color = 0; 31 | if (current_color == c) return; 32 | current_color = c; 33 | if (c) { 34 | printf("\u001b[38;2;%d;%d;%dm", (c >> 0) & 0xFF, (c >> 8) & 0xFF, 35 | (c >> 16) & 0xFF); 36 | } else { 37 | printf("\u001b[0m"); 38 | } 39 | } 40 | 41 | //------------------------------------------------------------------------------ 42 | 43 | struct SpanDumper { 44 | size_t len = 0; 45 | 46 | void put(char c, uint32_t color) { 47 | if (color == 0) color = 0xCCCCCC; 48 | 49 | if (c == ' ' ) { set_color(dim_color(color)); putc('_', stdout); } 50 | else if (c == '@' ) { set_color(dim_color(color)); putc('@', stdout); } 51 | else if (isprint(c)) { set_color(color); putc(c, stdout); } 52 | else if (c == '\n') { set_color(dim_color(color)); putc('n', stdout); } 53 | else if (c == '\r') { set_color(dim_color(color)); putc('r', stdout); } 54 | else if (c == '\t') { set_color(dim_color(color)); putc('t', stdout); } 55 | else { set_color(dim_color(color)); putc('?', stdout); } 56 | 57 | len++; 58 | fflush(stdout); 59 | } 60 | }; 61 | 62 | //------------------------------------------------------------------------------ 63 | 64 | inline void print_trellis(int depth, const char* name, const char* suffix, 65 | uint32_t color) { 66 | set_color(0x808080); 67 | printf(depth == 0 ? " *" : " "); 68 | for (int i = 0; i < depth; i++) { 69 | printf(i == depth - 1 ? "|--" : "| "); 70 | } 71 | set_color(color); 72 | printf("%s %s", name, suffix); 73 | set_color(0); 74 | } 75 | 76 | //------------------------------------------------------------------------------ 77 | 78 | inline void print_match(const char* a, const char* b, const char* c, uint32_t col_ab, uint32_t col_bc, size_t width) { 79 | SpanDumper d; 80 | 81 | if (b-a > (int)width) { b = a + width; c = a + width; } 82 | if (c-a > (int)width) { c = a + width; } 83 | 84 | for (auto p = a; p < b; p++) d.put(*p, col_ab); 85 | for (auto p = b; p < c; p++) d.put(*p, col_bc); 86 | 87 | while(d.len < width) d.put('@', 0); 88 | set_color(0); 89 | } 90 | 91 | inline void print_summary(TextSpan text, TextSpan tail, int width) { 92 | printf("Match text:\n"); 93 | print_match(text.begin, text.end, text.end, 0xCCCCCC, 0xCCCCCC, width); 94 | printf("\n\n"); 95 | 96 | printf("Match result:\n"); 97 | if (tail.is_valid()) { 98 | print_match(text.begin, tail.begin, tail.end, 0x80FF80, 0xCCCCCC, width); 99 | } 100 | else { 101 | print_match(text.begin, tail.end, text.end, 0xCCCCCC, 0x8080FF, width); 102 | } 103 | printf("\n\n"); 104 | } 105 | 106 | //------------------------------------------------------------------------------ 107 | 108 | inline void print_typeid_name(const char* name, int max_len = 0) { 109 | int name_len = 0; 110 | 111 | while((*name >= '0') && (*name <= '9')) { 112 | name_len *= 10; 113 | name_len += *name - '0'; 114 | name++; 115 | } 116 | 117 | if (max_len && name_len > max_len) name_len = max_len; 118 | 119 | for (int i = 0; i < name_len; i++) { 120 | putc(name[i], stdout); 121 | } 122 | for (int i = name_len; i < max_len; i++) { 123 | putc(' ', stdout); 124 | } 125 | } 126 | 127 | //------------------------------------------------------------------------------ 128 | 129 | template 130 | inline void print_class_name(T* node, int max_len = 0) { 131 | print_typeid_name(typeid(*node).name(), max_len); 132 | } 133 | 134 | //------------------------------------------------------------------------------ 135 | // Prints a text representation of the parse tree. 136 | 137 | template 138 | inline void print_tree(TextSpan text, const node_type* node, int width, int depth, int max_depth = 0) { 139 | 140 | auto span = node->as_text_span(); 141 | 142 | print_match(span.begin, span.end, text.end, 0x80FF80, 0xCCCCCC, width); 143 | print_trellis(depth, node->match_tag, "", 0xFFAAAA); 144 | printf(": "); 145 | print_class_name(node); 146 | if (node->child_head == nullptr) { 147 | auto text_span = node->as_text_span(); 148 | printf(" = "); 149 | set_color(0x80FF80); 150 | printf("%.*s", text_span.len(), text_span.begin); 151 | set_color(0); 152 | } 153 | printf("\n"); 154 | 155 | 156 | if (max_depth && depth == max_depth) return; 157 | 158 | for (auto c = node->child_head; c; c = c->node_next) { 159 | print_tree(text, c, width, depth + 1, max_depth); 160 | } 161 | } 162 | 163 | template 164 | inline void print_trees(const context& ctx, TextSpan text, int width, int max_depth = 0) { 165 | printf("Parse tree:\n"); 166 | for (auto node = ctx.top_head; node; node = node->node_next) { 167 | print_tree(text, node, width, 0, max_depth); 168 | } 169 | } 170 | 171 | template 172 | inline void print_summary(const context& ctx, TextSpan text, TextSpan tail, int width) { 173 | print_summary(text, tail, width); 174 | print_trees(ctx, text, width, 0); 175 | } 176 | 177 | }; // namespace utils 178 | }; // namespace matcheroni 179 | -------------------------------------------------------------------------------- /test.hancho: -------------------------------------------------------------------------------- 1 | # matcheroni/build.hancho 2 | 3 | hancho.context.includes = "{repo_dir}" 4 | hancho.context.test_dir = "{repo_dir}" 5 | 6 | hancho.base_rules = hancho.repo("{hancho_dir}//base_rules.hancho") 7 | 8 | hancho.c_lexer = hancho.load("examples/c_lexer/c_lexer.hancho") 9 | hancho.c_parser = hancho.load("examples/c_parser/c_parser.hancho") 10 | 11 | json = hancho.load("examples/json/json.hancho") 12 | regex = hancho.load("examples/regex/regex.hancho") 13 | toml = hancho.load("examples/toml/toml.hancho") 14 | tutorial = hancho.load("examples/tutorial/tutorial.hancho") 15 | 16 | #build obj/matcheroni/Matcheroni.hpp.iwyu : iwyu matcheroni/Matcheroni.hpp 17 | #build obj/matcheroni/Parseroni.hpp.iwyu : iwyu matcheroni/Parseroni.hpp 18 | #build obj/matcheroni/Utilities.hpp.iwyu : iwyu matcheroni/Utilities.hpp 19 | 20 | # Apparently I broke this... 21 | #hancho( 22 | # rules.cpp_test, 23 | # in_srcs = "matcheroni_test.cpp", 24 | # out_bin = "matcheroni_test", 25 | #) 26 | 27 | hancho( 28 | hancho.base_rules.cpp_test, 29 | in_srcs = "tests/parseroni_test.cpp", 30 | out_bin = "tests/parseroni_test", 31 | ) 32 | -------------------------------------------------------------------------------- /tests/dummy.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // blah bleelaksdjfs 4 | -------------------------------------------------------------------------------- /tests/scratch.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using text_span = std::span; 6 | 7 | struct Context { 8 | 9 | Context(text_span _span) : span(_span) {} 10 | Context(const char* text) : span(text, text + strlen(text)) {} 11 | 12 | text_span span; 13 | 14 | template 15 | text_span take() { 16 | return text_span(); 17 | } 18 | }; 19 | 20 | struct identifier {}; 21 | struct whitespace {}; 22 | 23 | void blah() { 24 | 25 | Context ctx("Hello World"); 26 | 27 | auto foo1 = ctx.take(); 28 | auto foo2 = ctx.take(); 29 | auto foo3 = ctx.take(); 30 | 31 | } 32 | 33 | // pattern span context 34 | 35 | 36 | // pattern + span + context -> head / tail 37 | 38 | // tail = context.match(); 39 | // tail = pattern::match(context); 40 | 41 | // tail = pattern::match(context, span); 42 | 43 | // head = take(context, span); 44 | --------------------------------------------------------------------------------