├── .github └── workflows │ └── testing.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── benchmark ├── benchmark.sh ├── grammars │ ├── calc.peg │ ├── json.peg │ └── kotlin.peg └── inputs │ ├── calc.txt │ ├── json.json │ └── kotlin.kt ├── examples ├── README.md ├── ast-calc.peg ├── ast-calc.v3.peg ├── ast-tinyc │ ├── .gitignore │ ├── CMakeLists.txt │ ├── README.md │ ├── inputs │ │ ├── erroneous1.c │ │ ├── example1.c │ │ ├── example2.c │ │ ├── example3.c │ │ ├── example4.c │ │ └── example5.c │ ├── main.c │ ├── parser.peg │ ├── system.c │ ├── system.h │ ├── utility.c │ └── utility.h └── calc.peg ├── import ├── README.md ├── char │ ├── README.md │ ├── ascii_character_group.peg │ ├── unicode_derived_core.peg │ └── unicode_general_category.peg └── code │ ├── README.md │ ├── pcc_ast.md │ ├── pcc_ast.peg │ ├── pcc_ast.v3.md │ └── pcc_ast.v3.peg ├── misc ├── README.md ├── unicode_derived_core.py └── unicode_general_category.py ├── src └── packcc.c └── tests ├── .gitignore ├── README.md ├── ascii.d ├── ascii.bats ├── expected.txt ├── input.peg └── input.txt ├── ast-calc.d ├── ast-calc.bats ├── expected.txt └── input.txt ├── ast-calc.v3.d ├── ast-calc.bats ├── expected.txt └── input.txt ├── basic.d ├── expected.txt ├── input.peg └── input.txt ├── blank_lines.d ├── input.peg └── lines.bats ├── calc.d ├── calc.bats ├── expected.txt └── input.txt ├── captures.d ├── expected.txt ├── input.peg └── input.txt ├── character_classes_0.d ├── expected.txt ├── input.peg └── input.txt ├── character_classes_1.d ├── expected-utf8.txt ├── expected.txt ├── input-utf8.txt ├── input.peg └── input.txt ├── character_classes_2.d ├── expected.txt ├── input.peg └── input.txt ├── code_generation.d ├── generation.bats ├── input.peg └── main.c ├── code_indentation.d ├── code.bats ├── expected.c.txt └── input.peg ├── code_line_continuation.d ├── dump.bats ├── expected.txt └── input.peg ├── debug_macro.d ├── expected.txt ├── input.peg └── input.txt ├── dump.d ├── dump.bats └── expected.txt ├── error_action.d ├── expected.txt ├── input.peg └── input.txt ├── escape_sequences.d ├── expected-hex-0.txt ├── expected-unicode-0.txt ├── expected-unicode-1.txt ├── input-hex-0.txt ├── input-unicode-0.txt ├── input-unicode-1.txt └── input.peg ├── import.d ├── .gitignore ├── check_line_number.py ├── import.bats ├── make_input.py ├── reference.peg └── template.peg ├── import_char.d ├── import_char.bats └── input.peg ├── issue_28.d ├── expected.txt ├── input.peg └── input.txt ├── issue_78.d ├── input.peg └── issue_78.bats ├── lines.d ├── input.peg └── lines.bats ├── main.c ├── negative_predicate.d ├── expected.txt ├── input.peg └── input.txt ├── position.d ├── expected.txt ├── input.peg └── input.txt ├── positive_predicate.d ├── expected.txt ├── input.peg └── input.txt ├── programmable_predicate.d ├── expected.txt ├── input.peg └── input.txt ├── quantifiers.d ├── expected-optional-repeatable.txt ├── expected-optional.txt ├── expected-repeatable.txt ├── input-optional-repeatable.txt ├── input-optional.txt ├── input-repeatable.txt └── input.peg ├── strings.d ├── expected-escapes.txt ├── expected-utf8.txt ├── expected.txt ├── input-escapes.txt ├── input-utf8.txt ├── input.peg └── input.txt ├── style.d └── style.bats ├── substitution.d ├── expected.c.txt ├── expected.h.txt ├── input.peg └── subst.bats ├── test.sh ├── uncrustify.cfg ├── unused_functions.d ├── check.bats ├── test0.peg ├── test1.peg └── test2.peg ├── unused_rule_elimination.d ├── dump.bats ├── expected.txt └── input.peg └── utils.sh /.github/workflows/testing.yml: -------------------------------------------------------------------------------- 1 | name: run test cases 2 | 3 | on: 4 | push: 5 | branches: [main, develop] 6 | pull_request: 7 | branches: [main, develop] 8 | 9 | jobs: 10 | testing: 11 | strategy: 12 | matrix: 13 | os: [ubuntu-22.04, ubuntu-24.04, ubuntu-24.04-arm] 14 | compiler: [gcc, clang] 15 | 16 | runs-on: ${{ matrix.os }} 17 | 18 | env: 19 | CC: ${{ matrix.compiler }} 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: install bats 24 | run: | 25 | curl -L -o bats-core-1.2.1.tar.gz https://github.com/bats-core/bats-core/archive/v1.2.1.tar.gz 26 | tar zxvf bats-core-1.2.1.tar.gz 27 | cd bats-core-1.2.1 && 28 | sudo ./install.sh /usr/local 29 | - name: install uncrustify ${{ runner.arch }} == X64 30 | # if: runner.arch == "X64" 31 | run: | 32 | curl -LO http://launchpadlibrarian.net/516341795/uncrustify_0.72.0+dfsg1-2_amd64.deb 33 | sudo dpkg -i uncrustify_0.72.0+dfsg1-2_amd64.deb || true 34 | - name: install uncrustify ${{ runner.arch }} == ARM64 35 | # if: runner.arch == "ARM64" 36 | run: | 37 | curl -LO http://launchpadlibrarian.net/516341795/uncrustify_0.72.0+dfsg1-2_arm64.deb 38 | sudo dpkg -i uncrustify_0.72.0+dfsg1-2_arm64.deb || true 39 | - name: build packcc 40 | run: | 41 | ( 42 | mkdir -p build 43 | cd build 44 | cmake -DCMAKE_C_COMPILER=$CC .. 45 | cmake --build . --config Debug --target check --clean-first 46 | cmake --build . --config Release --target check --clean-first 47 | ) 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.vscode/ 2 | /.cache/ 3 | __pycache__/ 4 | /build/ 5 | /benchmark/tmp/ 6 | *.o 7 | *.obj 8 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project( 4 | packcc 5 | VERSION 3.0.0 6 | DESCRIPTION "A parser generator for C" 7 | HOMEPAGE_URL https://github.com/arithy/packcc 8 | LANGUAGES C 9 | ) 10 | 11 | if(MSVC) 12 | string(REGEX REPLACE "/W[0-3]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") # To avoid the warning D9025. 13 | endif() 14 | 15 | function(add_common_compile_options target) 16 | target_compile_features(${target} PRIVATE c_std_90) 17 | target_compile_options( 18 | ${target} PRIVATE 19 | $<$: 20 | $<$: 21 | -fsigned-char -Wall -Wextra -Wno-unused-parameter -Wno-overlength-strings -pedantic -Werror 22 | > 23 | $<$: 24 | -fsigned-char -Wall -Wextra -Wno-unused-parameter -Wno-overlength-strings -pedantic -Werror 25 | > 26 | $<$,$>>: 27 | -Wall -Wextra -Wno-unused-parameter -pedantic -Werror 28 | > 29 | $<$: 30 | /W4 /WX /wd4100 /wd4456 31 | > 32 | $<$,$>: 33 | /W4 /WX 34 | > 35 | > 36 | ) 37 | endfunction() 38 | 39 | add_executable( 40 | packcc 41 | src/packcc.c 42 | ) 43 | add_common_compile_options(packcc) 44 | 45 | add_custom_target( 46 | check 47 | COMMAND "${PROJECT_SOURCE_DIR}/tests/test.sh" 48 | DEPENDS packcc 49 | VERBATIM 50 | ) 51 | set_property( 52 | TARGET check 53 | PROPERTY 54 | ENVIRONMENT "PACKCC=${PROJECT_BINARY_DIR}/packcc;PCC_IMPORT_PATH=${PROJECT_SOURCE_DIR}/import;CC=\"${CMAKE_C_COMPILER} ${CMAKE_C_FLAGS}\"" 55 | WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}/tests" 56 | ) 57 | 58 | install(TARGETS packcc) 59 | install( 60 | DIRECTORY "${PROJECT_SOURCE_DIR}/import" 61 | DESTINATION "." 62 | PATTERN ".*" EXCLUDE 63 | PATTERN "*.txt" EXCLUDE 64 | PATTERN "*.md" EXCLUDE 65 | ) 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | PackCC: a packrat parser generator for C. 2 | 3 | Copyright (c) 2014, 2019-2025 Arihiro Yoshida. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmark/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Generates, builds and runs parsers from grammars directory for each git reference supplied as argument. 4 | # Each action is performed multiple times and the times are averaged. Peak memory consumption is also measured. 5 | # First reference is always taken as a "baseline" and others are compared to it. This should allow to compare 6 | # how any given commit affects PackCCs performance. 7 | # 8 | # Usage: 9 | # ./benchmark.sh ... 10 | # 11 | # Environment: 12 | # CC Compiler to use, default: "cc -O2" 13 | # GEN_REPEATS How many times to generate the parser, default: 10 14 | # BUILD_REPEATS How many times to build the parser, default: 5 15 | # RUN_REPEATS How many times to run the given parser, default: 20 16 | # 17 | # Example: 18 | # CC="clang -O3" ./benchmark.sh origin/master 6015afc HEAD 19 | 20 | build() { 21 | echo "Building packcc..." 22 | $CC -o "$PACKCC" $ROOTDIR/src/packcc.c 23 | } 24 | 25 | clean() { 26 | rm -rf "$BENCHDIR/tmp" 27 | } 28 | 29 | format() { 30 | TIME="$1" 31 | if [ $((TIME / 1000000000)) -gt 10 ]; then 32 | echo "$((TIME / 1000000000)) s" 33 | elif [ $((TIME / 1000000)) -gt 10 ]; then 34 | echo "$((TIME / 1000000)) ms" 35 | elif [ $((TIME / 1000)) -gt 10 ]; then 36 | echo "$((TIME / 1000)) us" 37 | else 38 | echo "$TIME ns" 39 | fi 40 | } 41 | 42 | format_mem() { 43 | MEM="$1" 44 | if [ -z "$TIME_CMD" ]; then 45 | echo "??? kB" 46 | elif [ $((MEM / 1048576)) -gt 10 ]; then 47 | echo "$((MEM / 1048576)) GB" 48 | elif [ $((MEM / 1024)) -gt 10 ]; then 49 | echo "$((MEM / 1024)) MB" 50 | else 51 | echo "$MEM kB" 52 | fi 53 | } 54 | 55 | measure() { 56 | COUNT="$1" 57 | shift 58 | MEM=0 59 | if [ "$TIME_CMD" ]; then 60 | MEM="$(${TIME_CMD[@]} -f %M "$@" 2>&1 >/dev/null)" 61 | fi 62 | START="$(date '+%s%N')" 63 | for ((i=0; i /dev/null 65 | done 66 | END="$(date '+%s%N')" 67 | TIME=$(( END - START )) 68 | } 69 | 70 | benchmark() { 71 | KEY="${GRAMMAR}_${REF//\//_}" 72 | NAME="tmp/parser_$KEY" 73 | 74 | echo "Generating $GRAMMAR parser in $REF ($GEN_REPEATS times)..." 75 | measure "$GEN_REPEATS" "$PACKCC" -o "$NAME" "$GRAMMAR_FILE" 76 | GEN_TIME["$KEY"]=$TIME 77 | GEN_MEM["$KEY"]=$MEM 78 | echo " Repeated $GEN_REPEATS times in $(format $TIME), peak memory $(format_mem $MEM)" 79 | 80 | echo "Building $GRAMMAR parser in $REF ($BUILD_REPEATS times)..." 81 | measure "$BUILD_REPEATS" $CC -I. "$NAME".c -o "$NAME" 82 | BUILD_TIME["$KEY"]=$TIME 83 | BUILD_MEM["$KEY"]=$MEM 84 | echo " Built $BUILD_REPEATS times in $(format $TIME), peak memory $(format_mem $MEM)" 85 | 86 | echo "Running $GRAMMAR parser in $REF ($RUN_REPEATS times)..." 87 | measure "$RUN_REPEATS" "./$NAME" "$INPUT" 88 | RUN_TIME["$KEY"]=$TIME 89 | RUN_MEM["$KEY"]=$MEM 90 | echo " Repeated $RUN_REPEATS times in $(format $TIME), peak memory $(format_mem $MEM)" 91 | } 92 | 93 | print_table() { 94 | declare -n RESULTS_TIME="${1}_TIME" 95 | declare -n RESULTS_MEM="${1}_MEM" 96 | printf "%-12s" "" 97 | for REF in "${REFS[@]}"; do 98 | printf "%-32s" "$REF" 99 | done 100 | printf "\n" 101 | MEMORY=0 102 | RELATIVE_MEM="???" 103 | COLOR_MEM=0 104 | for GRAMMAR in "${GRAMMARS[@]}"; do 105 | printf "%-12s" "$GRAMMAR" 106 | for REF in "${REFS[@]}"; do 107 | KEY="${GRAMMAR}_${REF//\//_}" 108 | BASE="${GRAMMAR}_${REFS[0]//\//_}" 109 | TIME="$((${RESULTS_TIME["$KEY"]} / RUN_REPEATS))" 110 | RELATIVE_TIME="$((100 * RESULTS_TIME["$KEY"] / RESULTS_TIME["$BASE"]))" 111 | COLOR=$((RELATIVE_TIME == 100 ? 0 : ( RELATIVE_TIME > 100 ? 31 : 32))) 112 | if [ "$TIME_CMD" ]; then 113 | MEMORY="${RESULTS_MEM["$KEY"]}" 114 | RELATIVE_MEM="$((100 * RESULTS_MEM["$KEY"] / RESULTS_MEM["$BASE"]))" 115 | COLOR_MEM=$((RELATIVE_MEM == 100 ? 0 : ( RELATIVE_MEM > 100 ? 31 : 32))) 116 | fi 117 | printf "\033[0;${COLOR}m%-16s\033[0;${COLOR_MEM}m%-16s\033[0m" "$(format $TIME) ($RELATIVE_TIME%)" "$(format_mem $MEMORY) ($RELATIVE_MEM%)" 118 | done 119 | printf "\n" 120 | done 121 | } 122 | 123 | print_results() { 124 | echo 125 | echo "Generation performance:" 126 | echo "=======================" 127 | print_table GEN 128 | echo 129 | echo "Build performance:" 130 | echo "==================" 131 | print_table BUILD 132 | echo 133 | echo "Run performance:" 134 | echo "================" 135 | print_table RUN 136 | echo 137 | } 138 | 139 | main() { 140 | set -e 141 | 142 | BENCHDIR="$(cd "$(dirname "$0")" && pwd)" 143 | ROOTDIR="$BENCHDIR/.." 144 | declare -a GRAMMARS=() 145 | declare -A BUILD_TIME GEN_TIME RUN_TIME BUILD_MEM GEN_MEM RUN_MEM 146 | 147 | declare -i GEN_REPEATS="${GEN_REPEATS:-1}" 148 | declare -i BUILD_REPEATS="${BUILD_REPEATS:-1}" 149 | declare -i RUN_REPEATS="${RUN_REPEATS:-1}" 150 | CC="${CC:-cc -O2}" 151 | REFS=("$@") 152 | 153 | if [[ $# -eq 0 || "$1" =~ -h|--help|--usage ]]; then 154 | sed -n '3,/^$/s/^#//p' "$0" 155 | exit 0 156 | fi 157 | 158 | if which busybox &> /dev/null; then 159 | TIME_CMD=(busybox time) 160 | elif which time &> /dev/null; then 161 | TIME_CMD=("$(which time)") 162 | else 163 | echo "NOTE: No time command found, please install GNU time or busybox to measure memory consumption." 164 | TIME_CMD="" 165 | fi 166 | 167 | START_REF="$(git name-rev --name-only HEAD)" 168 | trap "echo 'Returning to $START_REF...' && git checkout $START_REF" EXIT ERR INT 169 | 170 | cd "$BENCHDIR" 171 | clean 172 | mkdir "tmp" 173 | cp -aL inputs grammars tmp/ 174 | 175 | for REF in "${REFS[@]}"; do 176 | PACKCC="tmp/packcc_${REF//\//_}" 177 | git checkout "$REF" 178 | build 179 | for GRAMMAR_FILE in "tmp/grammars"/*.peg ; do 180 | GRAMMAR="$(basename "$GRAMMAR_FILE" .peg)" 181 | [ "$REF" == "${REFS[0]}" ] && GRAMMARS+=("$GRAMMAR") 182 | INPUT="$(ls "tmp/inputs/$GRAMMAR"*)" 183 | benchmark 184 | done 185 | done 186 | 187 | print_results 188 | } 189 | 190 | main "$@" 191 | -------------------------------------------------------------------------------- /benchmark/grammars/calc.peg: -------------------------------------------------------------------------------- 1 | %prefix "calc" 2 | 3 | %source { 4 | #include 5 | #include 6 | } 7 | 8 | statement <- _ e:expression _ EOL { printf("answer=%d\n", e); } 9 | / ( !EOL . )* EOL { printf("error\n"); } 10 | 11 | expression <- e:term { $$ = e; } 12 | 13 | term <- l:term _ '+' _ r:factor { $$ = l + r; } 14 | / l:term _ '-' _ r:factor { $$ = l - r; } 15 | / e:factor { $$ = e; } 16 | 17 | factor <- l:factor _ '*' _ r:unary { $$ = l * r; } 18 | / l:factor _ '/' _ r:unary { $$ = l / r; } 19 | / e:unary { $$ = e; } 20 | 21 | unary <- '+' _ e:unary { $$ = +e; } 22 | / '-' _ e:unary { $$ = -e; } 23 | / e:primary { $$ = e; } 24 | 25 | primary <- < [0-9]+ > { $$ = atoi($1); } 26 | / '(' _ e:expression _ ')' { $$ = e; } 27 | 28 | _ <- [ \t]* 29 | EOL <- '\n' / '\r\n' / '\r' / ';' 30 | 31 | %% 32 | int main(int argc, char **argv) { 33 | if (argc > 1) { 34 | freopen(argv[1], "r", stdin); 35 | } 36 | calc_context_t *ctx = calc_create(NULL); 37 | while (calc_parse(ctx, NULL)); 38 | calc_destroy(ctx); 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /benchmark/grammars/json.peg: -------------------------------------------------------------------------------- 1 | %prefix "json" 2 | 3 | file <- _ (object / array) _ 4 | object <- '{' ( pair (',' pair)* / _ ) '}' 5 | pair <- _ string _ ':' value 6 | array <- '[' ( value (',' value)* / _ ) ']' 7 | value <- _ (object / array / boolean / number / string / null) _ 8 | boolean <- 'false' / 'true' 9 | number <- '-'? ('0' / [1-9] [0-9]*) ('.' [0-9]+)? ([eE] [-+]? [0-9]+)? 10 | string <- '"' ('\\"' / [^"])* '"' 11 | null <- 'null' 12 | _ <- [ \n\r\t]* 13 | 14 | %% 15 | int main(int argc, char **argv) { 16 | if (argc > 1) { 17 | freopen(argv[1], "r", stdin); 18 | } 19 | json_context_t *ctx = json_create(NULL); 20 | while (json_parse(ctx, NULL)); 21 | json_destroy(ctx); 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## Overview 4 | 5 | In this directory, examples are stored. 6 | 7 | ## Example List 8 | 9 | - [Desktop Calculator](calc.peg) 10 | - [Simple AST builder](ast-calc.peg) : using `ast-calc.peg` (deprecated) 11 | - [Simple AST builder](ast-calc.v3.peg) : using `ast-calc.v3.peg` 12 | - [AST Builder for Tiny-C](ast-tinyc) 13 | 14 | For details, see PackCC [README.md](../README.md). 15 | -------------------------------------------------------------------------------- /examples/ast-calc.peg: -------------------------------------------------------------------------------- 1 | # This code is hereby placed in the public domain. 2 | # 3 | # THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 4 | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 5 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 6 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 7 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 8 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 9 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 10 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 11 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 12 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 13 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | %prefix "calc" 16 | 17 | %value "calc_ast_node_t *" # <-- must be set 18 | 19 | %auxil "calc_ast_manager_t *" # <-- must be set 20 | 21 | %header { 22 | #define CALC_AST_NODE_CUSTOM_DATA_DEFINED /* <-- enables node custom data */ 23 | 24 | typedef struct text_data_tag { /* <-- node custom data type */ 25 | char *text; 26 | } calc_ast_node_custom_data_t; 27 | } 28 | 29 | %source { 30 | #include 31 | #include 32 | } 33 | 34 | statement <- _ e:expression _ EOL { $$ = e; } 35 | / ( !EOL . )* EOL { $$ = NULL; } 36 | 37 | expression <- e:term { $$ = e; } 38 | 39 | term <- l:term _ '+' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("+"); } 40 | / l:term _ '-' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("-"); } 41 | / e:factor { $$ = e; } 42 | 43 | factor <- l:factor _ '*' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("*"); } 44 | / l:factor _ '/' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("/"); } 45 | / e:unary { $$ = e; } 46 | 47 | unary <- '+' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("+"); } 48 | / '-' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("-"); } 49 | / e:primary { $$ = e; } 50 | 51 | primary <- < [0-9]+ > { $$ = calc_ast_node__create_0(); $$->custom.text = strdup($1); } 52 | / '(' _ e:expression _ ')' { $$ = e; } 53 | 54 | _ <- [ \t]* 55 | EOL <- '\n' / '\r\n' / '\r' / ';' 56 | 57 | %import "code/pcc_ast.peg" # <-- provides AST build functions 58 | 59 | %% 60 | void calc_ast_node_custom_data__initialize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 61 | obj->text = NULL; 62 | } /* <-- must be implemented when enabling node custom data */ 63 | 64 | void calc_ast_node_custom_data__finalize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 65 | free(obj->text); 66 | } /* <-- must be implemented when enabling node custom data */ 67 | 68 | static void dump_ast(const calc_ast_node_t *obj, int depth) { 69 | if (obj) { 70 | switch (obj->type) { 71 | case CALC_AST_NODE_TYPE_NULLARY: 72 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "nullary", obj->custom.text); 73 | break; 74 | case CALC_AST_NODE_TYPE_UNARY: 75 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "unary", obj->custom.text); 76 | dump_ast(obj->data.unary.node, depth + 1); 77 | break; 78 | case CALC_AST_NODE_TYPE_BINARY: 79 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "binary", obj->custom.text); 80 | dump_ast(obj->data.binary.node[0], depth + 1); 81 | dump_ast(obj->data.binary.node[1], depth + 1); 82 | break; 83 | case CALC_AST_NODE_TYPE_TERNARY: 84 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "ternary", obj->custom.text); 85 | dump_ast(obj->data.ternary.node[0], depth + 1); 86 | dump_ast(obj->data.ternary.node[1], depth + 1); 87 | dump_ast(obj->data.ternary.node[2], depth + 1); 88 | break; 89 | case CALC_AST_NODE_TYPE_VARIADIC: 90 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "variadic", obj->custom.text); 91 | { 92 | size_t i; 93 | for (i = 0; i < obj->data.variadic.len; i++) { 94 | dump_ast(obj->data.variadic.node[i], depth + 1); 95 | } 96 | } 97 | break; 98 | default: 99 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "(unknown)", obj->custom.text); 100 | break; 101 | } 102 | } 103 | else { 104 | printf("%*s(null)\n", 2 * depth, ""); 105 | } 106 | } 107 | 108 | int main(int argc, char **argv) { 109 | calc_ast_manager_t mgr; 110 | calc_ast_manager__initialize(&mgr); 111 | { 112 | calc_context_t *ctx = calc_create(&mgr); 113 | calc_ast_node_t *ast = NULL; 114 | while (calc_parse(ctx, &ast)) { 115 | dump_ast(ast, 0); 116 | calc_ast_node__destroy(&mgr, ast); 117 | } 118 | calc_destroy(ctx); 119 | } 120 | calc_ast_manager__finalize(&mgr); 121 | return 0; 122 | } 123 | -------------------------------------------------------------------------------- /examples/ast-calc.v3.peg: -------------------------------------------------------------------------------- 1 | # This code is hereby placed in the public domain. 2 | # 3 | # THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 4 | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 5 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 6 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 7 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 8 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 9 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 10 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 11 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 12 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 13 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | %prefix "calc" 16 | 17 | %value "calc_ast_node_t *" # <-- must be set 18 | 19 | %auxil "calc_ast_manager_t *" # <-- must be set 20 | 21 | %header { 22 | #define CALC_AST_NODE_CUSTOM_DATA_DEFINED /* <-- enables node custom data */ 23 | 24 | typedef struct text_data_tag { /* <-- node custom data type */ 25 | char *text; 26 | } calc_ast_node_custom_data_t; 27 | } 28 | 29 | %source { 30 | #include 31 | #include 32 | } 33 | 34 | statement <- _ e:expression _ EOL { $$ = e; } 35 | / ( !EOL . )* EOL { $$ = NULL; } 36 | 37 | expression <- e:term { $$ = e; } 38 | 39 | term <- l:term _ '+' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("+"); } 40 | / l:term _ '-' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("-"); } 41 | / e:factor { $$ = e; } 42 | 43 | factor <- l:factor _ '*' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("*"); } 44 | / l:factor _ '/' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("/"); } 45 | / e:unary { $$ = e; } 46 | 47 | unary <- '+' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("+"); } 48 | / '-' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("-"); } 49 | / e:primary { $$ = e; } 50 | 51 | primary <- < [0-9]+ > { $$ = calc_ast_node__create_0(); $$->custom.text = strdup($1); } 52 | / '(' _ e:expression _ ')' { $$ = e; } 53 | 54 | _ <- [ \t]* 55 | EOL <- '\n' / '\r\n' / '\r' / ';' 56 | 57 | %import "code/pcc_ast.v3.peg" # <-- provides AST build functions 58 | 59 | %% 60 | void calc_ast_node_custom_data__initialize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 61 | obj->text = NULL; 62 | } /* <-- must be implemented when enabling node custom data */ 63 | 64 | void calc_ast_node_custom_data__finalize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 65 | free(obj->text); 66 | } /* <-- must be implemented when enabling node custom data */ 67 | 68 | static void dump_ast(const calc_ast_node_t *obj, int depth) { 69 | if (obj) { 70 | const size_t n = calc_ast_node__get_child_count(obj); 71 | const calc_ast_node_t *const *const p = calc_ast_node__get_child_array(obj); 72 | const calc_ast_node_custom_data_t *const d = &(obj->custom); 73 | const int b = calc_ast_node__is_variadic(obj); 74 | if (b || n <= 3) { 75 | static const char *const arity_name[] = { "nullary", "unary", "binary", "ternary" }; 76 | printf("%*s%s: \"%s\"\n", 2 * depth, "", b ? "variadic" : arity_name[n], d->text); 77 | { 78 | size_t i; 79 | for (i = 0; i < n; i++) { 80 | dump_ast(p[i], depth + 1); 81 | } 82 | } 83 | } 84 | else { 85 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "(unknown)", d->text); 86 | } 87 | } 88 | else { 89 | printf("%*s(null)\n", 2 * depth, ""); 90 | } 91 | } 92 | 93 | int main(int argc, char **argv) { 94 | calc_ast_manager_t mgr; 95 | calc_ast_manager__initialize(&mgr); 96 | { 97 | calc_context_t *ctx = calc_create(&mgr); 98 | calc_ast_node_t *ast = NULL; 99 | while (calc_parse(ctx, &ast)) { 100 | dump_ast(ast, 0); 101 | calc_ast_node__destroy(&mgr, ast); 102 | } 103 | calc_destroy(ctx); 104 | } 105 | calc_ast_manager__finalize(&mgr); 106 | return 0; 107 | } 108 | -------------------------------------------------------------------------------- /examples/ast-tinyc/.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | -------------------------------------------------------------------------------- /examples/ast-tinyc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | 3 | project(ast) 4 | 5 | set(PACKCC packcc CACHE FILEPATH "Specify file path of packcc command.") 6 | 7 | add_custom_command( 8 | OUTPUT parser.c parser.h 9 | COMMAND ${PACKCC} ARGS -o parser ${CMAKE_CURRENT_SOURCE_DIR}/parser.peg 10 | DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/parser.peg 11 | VERBATIM 12 | ) 13 | 14 | add_executable(ast main.c parser.c system.c utility.c) 15 | 16 | target_compile_features(ast PRIVATE c_std_99) 17 | target_include_directories(ast BEFORE PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) 18 | -------------------------------------------------------------------------------- /examples/ast-tinyc/README.md: -------------------------------------------------------------------------------- 1 | # AST Builder for Tiny-C # 2 | 3 | ## Overview ## 4 | 5 | This example builds an AST (abstract syntax tree) from an input source file 6 | written in [Tiny-C](http://www.iro.umontreal.ca/~felipe/IFT2030-Automne2002/Complements/tinyc.c) with some extension shown below, 7 | and prints the AST in the standard output. 8 | If there are syntax errors, it shows them with line and column numbers. 9 | 10 | __Extension:__ 11 | - Supports all arithmetic, logical and bitwise operators, 12 | - Accepts octal and hexadecimal integers, 13 | - Permits uppercase letters, number letters, and underscores in an identifier, 14 | - Accepts `'\v'` and `'\f'` as white-spaces, 15 | - Supports comment blocks. 16 | 17 | This example is placed in the public domain! 18 | So, you can use it freely without noticing the copyright of the original author. 19 | 20 | ## How to compile this example ## 21 | 22 | ### For Unix-like OS ### 23 | 24 | You can get the executable by executing the following commands: 25 | 26 | ``` 27 | cd /path/to/this_directory 28 | mkdir build 29 | cd build 30 | cmake -DPACKCC=/path/to/packcc .. 31 | make 32 | ``` 33 | 34 | Here, `/path/to/this_directory` represents the path name of this directory, 35 | and `/path/to/packcc` represents the path name of `packcc` command. 36 | If `packcc` command is installed in one of the directories specified in the environment variable `PATH`, 37 | the option `-DPACKCC=/path/to/packcc` is not necessary. 38 | 39 | The executable `ast` will be created in the directory `build`. 40 | 41 | ### For Windows ### 42 | 43 | #### Using Visual Studio #### 44 | 45 | You must have [Build Tools for Visual Studio](https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019) installed in your system. 46 | You can get the executable by executing the following commands using 'Developer Command Prompt for VS 2019' or 'Developer PowerShell for VS 2019': 47 | 48 | ``` 49 | cd \path\to\this_directory 50 | mkdir build 51 | cd build 52 | cmake -DPACKCC=\path\to\packcc .. 53 | MSBuild ALL_BUILD.vcxproj 54 | ``` 55 | 56 | Here, `\path\to\this_directory` represents the path name of this directory, 57 | and `\path\to\packcc` represents the path name of `packcc` command. 58 | If `packcc` command is installed in one of the directories specified in the environment variable `PATH`, 59 | the option `-DPACKCC=\path\to\packcc` is not necessary. 60 | 61 | The executable `ast.exe` will be created in the directory `build\Debug`. 62 | 63 | #### Using MinGW-w64 #### 64 | 65 | You can get the executable by executing the following commands: 66 | 67 | ``` 68 | cd /path/to/this_directory 69 | mkdir build 70 | cd build 71 | cmake -G "MSYS Makefiles" -DPACKCC=/path/to/packcc .. 72 | make 73 | ``` 74 | 75 | Here, `/path/to/this_directory` represents the path name of this directory, 76 | and `/path/to/packcc` represents the path name of `packcc` command. 77 | If `packcc` command is installed in one of the directories specified in the environment variable `PATH`, 78 | the option `-DPACKCC=/path/to/packcc` is not necessary. 79 | 80 | The executable `ast.exe` will be created in the directory `build`. 81 | 82 | ## How to run this example ## 83 | 84 | Example input source files are prepared in [`inputs`](inputs) directory. 85 | Most of these are taken from [grammars-v4](https://github.com/antlr/grammars-v4/) repository of [ANTLR project](https://github.com/antlr/) with thanks. 86 | 87 | Here, it is assumed that you are in the directory `build`. 88 | If you want to print out the AST of the input source file [`inputs/example2.c`](inputs/example2.c), execute the following command: 89 | 90 | ``` 91 | ./ast ../inputs/example2.c 92 | ``` 93 | 94 | You will see the output below: 95 | 96 | ``` 97 | STATEMENT_LIST: arity = 1 98 | STATEMENT_LIST: arity = 3 99 | OPERATOR_ASSIGN: arity = 2 100 | IDENTIFIER: line = 2, column = 3, value = 'i' 101 | INTEGER_DEC: line = 2, column = 5, value = '125' 102 | OPERATOR_ASSIGN: arity = 2 103 | IDENTIFIER: line = 2, column = 10, value = 'j' 104 | INTEGER_DEC: line = 2, column = 12, value = '100' 105 | STATEMENT_WHILE: arity = 2 106 | OPERATOR_SUB: arity = 2 107 | IDENTIFIER: line = 2, column = 24, value = 'i' 108 | IDENTIFIER: line = 2, column = 26, value = 'j' 109 | STATEMENT_IF_ELSE: arity = 3 110 | OPERATOR_LT: arity = 2 111 | IDENTIFIER: line = 2, column = 33, value = 'i' 112 | IDENTIFIER: line = 2, column = 35, value = 'j' 113 | OPERATOR_ASSIGN: arity = 2 114 | IDENTIFIER: line = 2, column = 38, value = 'j' 115 | OPERATOR_SUB: arity = 2 116 | IDENTIFIER: line = 2, column = 40, value = 'j' 117 | IDENTIFIER: line = 2, column = 42, value = 'i' 118 | OPERATOR_ASSIGN: arity = 2 119 | IDENTIFIER: line = 2, column = 50, value = 'i' 120 | OPERATOR_SUB: arity = 2 121 | IDENTIFIER: line = 2, column = 52, value = 'i' 122 | IDENTIFIER: line = 2, column = 54, value = 'j' 123 | ``` 124 | 125 | If there are errors in the input source file, this example reports up to 4 errors at a time. 126 | You can see this behavior by the following command using [`inputs/erroneous1.c`](inputs/erroneous1.c): 127 | 128 | ``` 129 | ./ast ../inputs/erroneous1.c 130 | ``` 131 | 132 | You will see the error messages below: 133 | 134 | ``` 135 | ERROR: line 6, column 5: Unexpected token 'do' 136 | ERROR: line 7, column 5: Statement missing after 'do' 137 | ERROR: line 4, column 5: 'else' without corresponding 'if' 138 | ERROR: line 3, column 5: 'else' without corresponding 'if' 139 | ``` 140 | 141 | Note that the multiple error recognition of this example is not sophisticated. 142 | To improve it, you have to design elaborate grammar rules for error skipping to detect next multiple errors naturally. 143 | -------------------------------------------------------------------------------- /examples/ast-tinyc/inputs/erroneous1.c: -------------------------------------------------------------------------------- 1 | /* This code has multiple errors for testing. */ 2 | 3 | else 4 | else 5 | if 6 | do 7 | do 8 | -------------------------------------------------------------------------------- /examples/ast-tinyc/inputs/example1.c: -------------------------------------------------------------------------------- 1 | /* https://github.com/antlr/grammars-v4/blob/master/tinyc/examples/example1.c */ 2 | { i=1; while (i<100) i=i+i; } 3 | -------------------------------------------------------------------------------- /examples/ast-tinyc/inputs/example2.c: -------------------------------------------------------------------------------- 1 | /* https://github.com/antlr/grammars-v4/blob/master/tinyc/examples/example2.c */ 2 | { i=125; j=100; while (i-j) if (i 21 | 22 | int main(int argc, char **argv) { 23 | if (argc > 2) { 24 | fprintf(stderr, "ERROR: Too many arguments\n"); 25 | return 1; /* usage error */ 26 | } 27 | const char *path = (argc > 1) ? argv[1] : NULL; 28 | int ret = 0; 29 | system_t system; 30 | parser_context_t *parser = NULL; 31 | if (setjmp(system.jmp) == 0) { 32 | system__initialize(&system); 33 | system__open_source_file(&system, path); 34 | parser = parser_create(&system); 35 | ast_node_t *ast; 36 | const int b = parser_parse(parser, &ast); 37 | if (system.source.ecount > 0) longjmp(system.jmp, 1); /* never returns */ 38 | if (b) { 39 | ret = 10; /* internal error */ 40 | fprintf(stderr, "FATAL: Internal error\n"); 41 | /* <-- input text remaining due to incompleteness of the grammar */ 42 | } 43 | else { 44 | system__dump_ast(&system, ast); 45 | } 46 | } 47 | else { 48 | ret = 2; /* error during parsing */ 49 | } 50 | parser_destroy(parser); 51 | system__finalize(&system); /* all system resources are freed */ 52 | return ret; 53 | } 54 | -------------------------------------------------------------------------------- /examples/ast-tinyc/system.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is hereby placed in the public domain. 3 | * 4 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 5 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 6 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 8 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 9 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 10 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 11 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 12 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 13 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 14 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 15 | */ 16 | 17 | #ifndef INCLUDED_SYSTEM_H 18 | #define INCLUDED_SYSTEM_H 19 | 20 | #include "utility.h" 21 | 22 | #include 23 | #include 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | typedef struct system_tag system_t; 30 | typedef struct ast_node_tag ast_node_t; 31 | 32 | typedef enum ast_node_type_tag { 33 | AST_NODE_TYPE_IDENTIFIER, 34 | AST_NODE_TYPE_INTEGER_DEC, 35 | AST_NODE_TYPE_INTEGER_OCT, 36 | AST_NODE_TYPE_INTEGER_HEX, 37 | AST_NODE_TYPE_OPERATOR_PLUS, 38 | AST_NODE_TYPE_OPERATOR_MINUS, 39 | AST_NODE_TYPE_OPERATOR_INV, 40 | AST_NODE_TYPE_OPERATOR_NOT, 41 | AST_NODE_TYPE_OPERATOR_INC, 42 | AST_NODE_TYPE_OPERATOR_DEC, 43 | AST_NODE_TYPE_OPERATOR_POST_INC, 44 | AST_NODE_TYPE_OPERATOR_POST_DEC, 45 | AST_NODE_TYPE_OPERATOR_ADD, 46 | AST_NODE_TYPE_OPERATOR_SUB, 47 | AST_NODE_TYPE_OPERATOR_MUL, 48 | AST_NODE_TYPE_OPERATOR_DIV, 49 | AST_NODE_TYPE_OPERATOR_MOD, 50 | AST_NODE_TYPE_OPERATOR_AND, 51 | AST_NODE_TYPE_OPERATOR_AND2, 52 | AST_NODE_TYPE_OPERATOR_OR, 53 | AST_NODE_TYPE_OPERATOR_OR2, 54 | AST_NODE_TYPE_OPERATOR_XOR, 55 | AST_NODE_TYPE_OPERATOR_SHL, 56 | AST_NODE_TYPE_OPERATOR_SHR, 57 | AST_NODE_TYPE_OPERATOR_EQ, 58 | AST_NODE_TYPE_OPERATOR_NE, 59 | AST_NODE_TYPE_OPERATOR_LT, 60 | AST_NODE_TYPE_OPERATOR_LE, 61 | AST_NODE_TYPE_OPERATOR_GT, 62 | AST_NODE_TYPE_OPERATOR_GE, 63 | AST_NODE_TYPE_OPERATOR_COND, 64 | AST_NODE_TYPE_OPERATOR_COMMA, 65 | AST_NODE_TYPE_OPERATOR_ASSIGN, 66 | AST_NODE_TYPE_OPERATOR_ASSIGN_ADD, 67 | AST_NODE_TYPE_OPERATOR_ASSIGN_SUB, 68 | AST_NODE_TYPE_OPERATOR_ASSIGN_MUL, 69 | AST_NODE_TYPE_OPERATOR_ASSIGN_DIV, 70 | AST_NODE_TYPE_OPERATOR_ASSIGN_MOD, 71 | AST_NODE_TYPE_OPERATOR_ASSIGN_AND, 72 | AST_NODE_TYPE_OPERATOR_ASSIGN_OR, 73 | AST_NODE_TYPE_OPERATOR_ASSIGN_XOR, 74 | AST_NODE_TYPE_OPERATOR_ASSIGN_SHL, 75 | AST_NODE_TYPE_OPERATOR_ASSIGN_SHR, 76 | AST_NODE_TYPE_STATEMENT_VOID, 77 | AST_NODE_TYPE_STATEMENT_IF, 78 | AST_NODE_TYPE_STATEMENT_IF_ELSE, 79 | AST_NODE_TYPE_STATEMENT_WHILE, 80 | AST_NODE_TYPE_STATEMENT_DO_WHILE, 81 | AST_NODE_TYPE_STATEMENT_LIST, 82 | AST_NODE_TYPE_ERROR_SKIP, 83 | AST_NODE_TYPE_ERROR_SKIP_IF_0, 84 | AST_NODE_TYPE_ERROR_SKIP_IF_1, 85 | AST_NODE_TYPE_ERROR_SKIP_IF_2, 86 | AST_NODE_TYPE_ERROR_SKIP_ELSE_0, 87 | AST_NODE_TYPE_ERROR_SKIP_ELSE_1, 88 | AST_NODE_TYPE_ERROR_SKIP_DO_0, 89 | AST_NODE_TYPE_ERROR_SKIP_DO_1, 90 | AST_NODE_TYPE_ERROR_SKIP_DO_2, 91 | AST_NODE_TYPE_ERROR_SKIP_WHILE_0, 92 | AST_NODE_TYPE_ERROR_SKIP_WHILE_1, 93 | AST_NODE_TYPE_UNEXPECTED_TOKEN 94 | } ast_node_type_t; 95 | 96 | struct ast_node_tag { 97 | ast_node_type_t type; /* the AST node type */ 98 | range_t range; /* the byte range in the source text */ 99 | size_t arity; /* the number of the child AST nodes */ 100 | ast_node_t *parent; /* the parent AST node */ 101 | struct ast_node_sibling_tag { 102 | ast_node_t *prev; /* the previous sibling AST node */ 103 | ast_node_t *next; /* the next sibling AST node */ 104 | } sibling; 105 | struct ast_node_child_tag { 106 | ast_node_t *first; /* the first child AST node */ 107 | ast_node_t *last; /* the last child AST node */ 108 | } child; 109 | system_t *system; /* the system that manages this AST node */ 110 | struct ast_node_managed_tag { 111 | ast_node_t *prev; /* the previous AST node managed by the same system */ 112 | ast_node_t *next; /* the next AST node managed by the same system */ 113 | } managed; 114 | }; 115 | 116 | typedef enum syntax_error_tag { 117 | SYNTAX_ERROR_IF_WITHOUT_CONDITION, 118 | SYNTAX_ERROR_IF_WITHOUT_STATEMENT, 119 | SYNTAX_ERROR_ELSE_WITHOUT_STATEMENT, 120 | SYNTAX_ERROR_LONE_ELSE, 121 | SYNTAX_ERROR_WHILE_WITHOUT_CONDITION, 122 | SYNTAX_ERROR_WHILE_WITHOUT_STATEMENT, 123 | SYNTAX_ERROR_DO_WITHOUT_STATEMENT, 124 | SYNTAX_ERROR_DO_WITHOUT_WHILE, 125 | SYNTAX_ERROR_NO_ENDING_SEMICOLON, 126 | SYNTAX_ERROR_UNCLOSED_COMMENT_BLOCK, 127 | SYNTAX_ERROR_UNEXPECTED_TOKEN, 128 | SYNTAX_ERROR_UNKNOWN 129 | } syntax_error_t; 130 | 131 | struct system_tag { 132 | struct system_input_tag { 133 | const char *path; /* the source file path */ 134 | FILE *file; /* the source file pointer */ 135 | char_array_t text; /* the source text */ 136 | size_t_array_t line; /* the byte positions of the line head in the source text */ 137 | size_t ecount; /* the error count */ 138 | } source; 139 | struct system_managed_tag { 140 | ast_node_t *first; /* the first managed AST node */ 141 | ast_node_t *last; /* the last managed AST node */ 142 | } managed; 143 | jmp_buf jmp; 144 | }; 145 | 146 | void system__initialize(system_t *obj); 147 | void system__finalize(system_t *obj); 148 | 149 | void *system__allocate_memory(system_t *obj, size_t size); 150 | void *system__reallocate_memory(system_t *obj, void *ptr, size_t size); 151 | void system__deallocate_memory(system_t *obj, void *ptr); 152 | 153 | void system__open_source_file(system_t *obj, const char *path); /* the standard input if path == NULL */ 154 | void system__close_source_file(system_t *obj); 155 | int system__read_source_file(system_t *obj); 156 | 157 | void system__handle_syntax_error(system_t *obj, syntax_error_t error, range_t range); 158 | 159 | ast_node_t *system__create_ast_node_terminal(system_t *obj, ast_node_type_t type, range_t range); 160 | ast_node_t *system__create_ast_node_unary(system_t *obj, ast_node_type_t type, range_t range, ast_node_t *node1); 161 | ast_node_t *system__create_ast_node_binary(system_t *obj, ast_node_type_t type, range_t range, ast_node_t *node1, ast_node_t *node2); 162 | ast_node_t *system__create_ast_node_ternary(system_t *obj, ast_node_type_t type, range_t range, ast_node_t *node1, ast_node_t *node2, ast_node_t *node3); 163 | ast_node_t *system__create_ast_node_variadic(system_t *obj, ast_node_type_t type, range_t range); 164 | 165 | void system__destroy_all_ast_nodes(system_t *obj); 166 | 167 | void system__dump_ast(system_t *obj, ast_node_t *root); 168 | 169 | void ast_node__prepend_child(ast_node_t *obj, ast_node_t *node); 170 | void ast_node__append_child(ast_node_t *obj, ast_node_t *node); 171 | void ast_node__destroy(ast_node_t *obj); 172 | 173 | #ifdef __cplusplus 174 | } 175 | #endif 176 | 177 | #endif /* !INCLUDED_SYSTEM_H */ 178 | -------------------------------------------------------------------------------- /examples/ast-tinyc/utility.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is hereby placed in the public domain. 3 | * 4 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 5 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 6 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 8 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 9 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 10 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 11 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 12 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 13 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 14 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 15 | */ 16 | 17 | #include "utility.h" 18 | 19 | #include 20 | 21 | #ifndef CHAR_ARRAY_MIN 22 | #define CHAR_ARRAY_MIN 256 /* the minimum number of char_array_t elements to be allocated */ 23 | #endif 24 | 25 | #ifndef SIZE_T_ARRAY_MIN 26 | #define SIZE_T_ARRAY_MIN 16 /* the minimum number of size_t_array_t elements to be allocated */ 27 | #endif 28 | 29 | void char_array__initialize(char_array_t *obj) { 30 | obj->m = 0; 31 | obj->n = 0; 32 | obj->p = NULL; 33 | } 34 | 35 | void char_array__finalize(char_array_t *obj) { 36 | free(obj->p); 37 | } 38 | 39 | bool_t char_array__resize(char_array_t *obj, size_t size) { 40 | if (obj->m < size) { 41 | size_t m = obj->m; 42 | if (m == 0) m = CHAR_ARRAY_MIN; 43 | while (m < size && m != 0) m <<= 1; 44 | if (m == 0) m = size; /* in case of shift overflow */ 45 | char *const p = (char *)realloc(obj->p, m); 46 | if (p == NULL) return BOOL_FALSE; 47 | obj->p = p; 48 | obj->m = m; 49 | } 50 | obj->n = size; 51 | return BOOL_TRUE; 52 | } 53 | 54 | void size_t_array__initialize(size_t_array_t *obj) { 55 | obj->m = 0; 56 | obj->n = 0; 57 | obj->p = NULL; 58 | } 59 | 60 | void size_t_array__finalize(size_t_array_t *obj) { 61 | free(obj->p); 62 | } 63 | 64 | bool_t size_t_array__resize(size_t_array_t *obj, size_t size) { 65 | if (obj->m < size) { 66 | size_t m = obj->m; 67 | if (m == 0) m = SIZE_T_ARRAY_MIN; 68 | while (m < size && m != 0) m <<= 1; 69 | if (m == 0) m = size; /* in case of shift overflow */ 70 | size_t *const p = (size_t *)realloc(obj->p, sizeof(size_t) * m); 71 | if (p == NULL) return BOOL_FALSE; 72 | obj->p = p; 73 | obj->m = m; 74 | } 75 | obj->n = size; 76 | return BOOL_TRUE; 77 | } 78 | -------------------------------------------------------------------------------- /examples/ast-tinyc/utility.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is hereby placed in the public domain. 3 | * 4 | * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 5 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 6 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 7 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 8 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 9 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 10 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 11 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 12 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 13 | * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 14 | * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 15 | */ 16 | 17 | #ifndef INCLUDED_UTILITY_H 18 | #define INCLUDED_UTILITY_H 19 | 20 | #include 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | typedef enum bool_tag { 27 | BOOL_FALSE = 0, 28 | BOOL_TRUE 29 | } bool_t; 30 | 31 | typedef struct range_tag { 32 | size_t min; /* the start position (inclusive) */ 33 | size_t max; /* the end position (exclusive) */ 34 | } range_t; 35 | 36 | typedef struct char_array_tag { 37 | size_t m; /* the allocated length */ 38 | size_t n; /* the actual length */ 39 | char *p; /* the buffer */ 40 | } char_array_t; 41 | 42 | typedef struct size_t_array_tag { 43 | size_t m; /* the allocated length */ 44 | size_t n; /* the actual length */ 45 | size_t *p; /* the buffer */ 46 | } size_t_array_t; 47 | 48 | void char_array__initialize(char_array_t *obj); 49 | void char_array__finalize(char_array_t *obj); 50 | bool_t char_array__resize(char_array_t *obj, size_t size); 51 | 52 | void size_t_array__initialize(size_t_array_t *obj); 53 | void size_t_array__finalize(size_t_array_t *obj); 54 | bool_t size_t_array__resize(size_t_array_t *obj, size_t size); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | inline static range_t range__void(void) { 61 | const range_t obj = { 0, 0 }; 62 | return obj; 63 | } 64 | 65 | inline static range_t range__new(size_t min, size_t max) { 66 | const range_t obj = { min, max }; 67 | return obj; 68 | } 69 | 70 | #endif /* !INCLUDED_UTILITY_H */ 71 | -------------------------------------------------------------------------------- /examples/calc.peg: -------------------------------------------------------------------------------- 1 | # This code is hereby placed in the public domain. 2 | # 3 | # THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 4 | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 5 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 6 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 7 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 8 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 9 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 10 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 11 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 12 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 13 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | %prefix "calc" 16 | 17 | %source { 18 | #include 19 | #include 20 | } 21 | 22 | statement <- _ e:expression _ EOL { printf("answer=%d\n", e); } 23 | / ( !EOL . )* EOL { printf("error\n"); } 24 | 25 | expression <- e:term { $$ = e; } 26 | 27 | term <- l:term _ '+' _ r:factor { $$ = l + r; } 28 | / l:term _ '-' _ r:factor { $$ = l - r; } 29 | / e:factor { $$ = e; } 30 | 31 | factor <- l:factor _ '*' _ r:unary { $$ = l * r; } 32 | / l:factor _ '/' _ r:unary { $$ = l / r; } 33 | / e:unary { $$ = e; } 34 | 35 | unary <- '+' _ e:unary { $$ = +e; } 36 | / '-' _ e:unary { $$ = -e; } 37 | / e:primary { $$ = e; } 38 | 39 | primary <- < [0-9]+ > { $$ = atoi($1); } 40 | / '(' _ e:expression _ ')' { $$ = e; } 41 | 42 | _ <- [ \t]* 43 | EOL <- '\n' / '\r\n' / '\r' / ';' 44 | 45 | %% 46 | int main(int argc, char **argv) { 47 | calc_context_t *ctx = calc_create(NULL); 48 | while (calc_parse(ctx, NULL)); 49 | calc_destroy(ctx); 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /import/README.md: -------------------------------------------------------------------------------- 1 | # Import Files 2 | 3 | ## Overview 4 | 5 | In this directory, import files are stored. 6 | 7 | ## Import File Categories 8 | 9 | | Category Name | Description | 10 | | --- | --- | 11 | | [`char`](char) | Import files that define character matching rules. | 12 | | [`code`](code) | Import files that provide utility codes. | 13 | | [`lang`](lang) | Import files that build ASTs of specific languages. | 14 | -------------------------------------------------------------------------------- /import/char/README.md: -------------------------------------------------------------------------------- 1 | # Import Files of Character Matching Rules 2 | 3 | ## Overview 4 | 5 | In this directory, import files that define character matching rules are stored. 6 | These import files are recommended to **be imported after the last rule** in the PEG file that imports them. 7 | 8 | ## Import Files 9 | 10 | ### `char/ascii_character_group.peg` 11 | 12 | #### Synopsis 13 | 14 | An import file that defines rules to match an ASCII character belonging to a specific character group. 15 | 16 | #### PEG Rules 17 | 18 | The following PEG rules are available. 19 | 20 | | Rule Name | Description | 21 | | --- | --- | 22 | | `ASCII_Printable_Character` | Matches a printable character, i.e. a character other than control characters. | 23 | | `ASCII_Letter` | Matches an alphabet character (`[A-Za-z]`). | 24 | | `ASCII_Control_Character` | Matches a control character (`[\x00-\x1f\x7f]`). | 25 | | `ASCII_Special_Character` | Matches a character other than control characters, number characters, and alphabet characters. | 26 | | `ASCII_Number` | Matches a number character (`[0-9]`). | 27 | | `ASCII_Uppercase_Letter` | Matches an uppercase alphabet character (`[A-Z]`). | 28 | | `ASCII_Lowercase_Letter` | Matches a lowercase alphabet character (`[a-z]`). | 29 | | `ASCII_C_alnum` | Matches a character for which the standard C function `isalnum()` returns a non-zero value (`[0-9A-Za-z]`). | 30 | | `ASCII_C_alpha` | Matches a character for which the standard C function `isalpha()` returns a non-zero value (= `ASCII_Letter`). | 31 | | `ASCII_C_blank` | Matches a character for which the standard C function `isblank()` returns a non-zero value (`[ \t]`). | 32 | | `ASCII_C_cntrl` | Matches a character for which the standard C function `iscntrl()` returns a non-zero value (= `ASCII_Control_Character`). | 33 | | `ASCII_C_digit` | Matches a character for which the standard C function `isdigit()` returns a non-zero value (= `ASCII_Number`). | 34 | | `ASCII_C_graph` | Matches a character for which the standard C function `isgraph()` returns a non-zero value (= `ASCII_Printable_Character` excluding the space character `' '`). | 35 | | `ASCII_C_lower` | Matches a character for which the standard C function `islower()` returns a non-zero value (= `ASCII_Lowercase_Letter`). | 36 | | `ASCII_C_print` | Matches a character for which the standard C function `isprint()` returns a non-zero value (= `ASCII_Printable_Character`). | 37 | | `ASCII_C_punct` | Matches a character for which the standard C function `ispunct()` returns a non-zero value (= `ASCII_Special_Character` excluding the space character `' '`). | 38 | | `ASCII_C_space` | Matches a character for which the standard C function `isspace()` returns a non-zero value (`[ \t\n\v\f\r]`). | 39 | | `ASCII_C_upper` | Matches a character for which the standard C function `isupper()` returns a non-zero value (= `ASCII_Uppercase_Letter`). | 40 | | `ASCII_C_xdigit` | Matches a character for which the standard C function `isxdigit()` returns a non-zero value (`[0-9A-Fa-f]`). | 41 | 42 | ### `char/unicode_general_category.peg` 43 | 44 | #### Synopsis 45 | 46 | An import file that defines rules to match a Unicode character belonging to a specific [general category](https://unicode.org/reports/tr44/#General_Category_Values). 47 | 48 | #### PEG Rules 49 | 50 | The following PEG rules are available. 51 | 52 | | Rule Name | Description | 53 | | --- | --- | 54 | | `Unicode_Uppercase_Letter` | Matches an uppercase letter. | 55 | | `Unicode_Lowercase_Letter` | Matches a lowercase letter. | 56 | | `Unicode_Titlecase_Letter` | Matches a digraph encoded as a single character, with the first part uppercase. | 57 | | `Unicode_Cased_Letter` | Matches a cased letter (= `Unicode_Uppercase_Letter / Unicode_Lowercase_Letter / Unicode_Titlecase_Letter`). | 58 | | `Unicode_Modifier_Letter` | Matches a modifier letter. | 59 | | `Unicode_Other_Letter` | Matches a letter of other type, including syllables and ideographs. | 60 | | `Unicode_Letter` | Matches a letter (= `Unicode_Cased_Letter / Unicode_Modifier_Letter / Unicode_Other_Letter`). | 61 | | `Unicode_Nonspacing_Mark` | Matches a nonspacing combining mark (zero advance width). | 62 | | `Unicode_Spacing_Mark` | Matches a spacing combining mark (positive advance width). | 63 | | `Unicode_Enclosing_Mark` | Matches an enclosing combining mark. | 64 | | `Unicode_Mark` | Matches a mark (= `Unicode_Nonspacing_Mark / Unicode_Spacing_Mark / Unicode_Enclosing_Mark`). | 65 | | `Unicode_Decimal_Number` | Matches a decimal digit. | 66 | | `Unicode_Letter_Number` | Matches a letterlike numeric character. | 67 | | `Unicode_Other_Number` | Matches a numeric character of other type. | 68 | | `Unicode_Number` | Matches a numeric character (= `Unicode_Decimal_Number / Unicode_Letter_Number / Unicode_Other_Number`). | 69 | | `Unicode_Connector_Punctuation` | Matches a connecting punctuation mark, like a tie. | 70 | | `Unicode_Dash_Punctuation` | Matches a dash or hyphen punctuation mark. | 71 | | `Unicode_Open_Punctuation` | Matches an opening punctuation mark (of a pair). | 72 | | `Unicode_Close_Punctuation` | Matches a closing punctuation mark (of a pair). | 73 | | `Unicode_Initial_Punctuation` | Matches an initial quotation mark. | 74 | | `Unicode_Final_Punctuation` | Matches a final quotation mark. | 75 | | `Unicode_Other_Punctuation` | Matches a punctuation mark of other type. | 76 | | `Unicode_Punctuation` | Matches a punctuation mark (= `Unicode_Connector_Punctuation / Unicode_Dash_Punctuation / Unicode_Open_Punctuation / Unicode_Close_Punctuation / Unicode_Initial_Punctuation / Unicode_Final_Punctuation / Unicode_Other_Punctuation`). | 77 | | `Unicode_Math_Symbol` | Matches a symbol of mathematical use. | 78 | | `Unicode_Currency_Symbol` | Matches a currency sign. | 79 | | `Unicode_Modifier_Symbol` | Matches a non-letterlike modifier symbol. | 80 | | `Unicode_Other_Symbol` | Matches a symbol of other type. | 81 | | `Unicode_Symbol` | Matches a symbol (= `Unicode_Math_Symbol / Unicode_Currency_Symbol / Unicode_Modifier_Symbol / Unicode_Other_Symbol`). | 82 | | `Unicode_Space_Separator` | Matches a space character (of various non-zero widths). | 83 | | `Unicode_Line_Separator` | Matches U+2028 "LINE SEPARATOR" only. | 84 | | `Unicode_Paragraph_Separator` | Matches U+2029 "PARAGRAPH SEPARATOR" only. | 85 | | `Unicode_Separator` | Matches a space character (= `Unicode_Space_Separator / Unicode_Line_Separator / Unicode_Paragraph_Separator`). | 86 | | `Unicode_Control` | Matches a [C0](https://www.unicode.org/charts/nameslist/n_0000.html) or [C1](https://www.unicode.org/charts/nameslist/n_0080.html) control code. | 87 | | `Unicode_Format` | Matches a format control character. | 88 | | `Unicode_Surrogate` | Matches a surrogate code point. | 89 | | `Unicode_Private_Use` | Matches a private-use character. | 90 | | `Unicode_Other` | Matches a character of other type (= `Unicode_Control / Unicode_Format / Unicode_Surrogate / Unicode_Private_Use`). | 91 | 92 | ### `char/unicode_derived_core.peg` 93 | 94 | #### Synopsis 95 | 96 | An import file that defines rules to match a Unicode character belonging to a specific [derived core property](https://www.unicode.org/reports/tr44/#DerivedCoreProperties.txt). 97 | 98 | #### PEG Rules 99 | 100 | The following PEG rules are available. 101 | 102 | | Rule Name | Description | 103 | | --- | --- | 104 | | `Unicode_Lowercase` | Matches a character with the Lowercase property. | 105 | | `Unicode_Uppercase` | Matches a character with the Uppercase property. | 106 | | `Unicode_Cased` | Matches a character which is considered to be either uppercase, lowercase or titlecase characters. | 107 | | `Unicode_Case_Ignorable` | Matches a character which is ignored for casing purposes. | 108 | | `Unicode_Changes_When_Lowercased` | Matches a character whose normalized form is not stable under a toLowercase mapping. | 109 | | `Unicode_Changes_When_Uppercased` | Matches a character whose normalized form is not stable under a toUppercase mapping. | 110 | | `Unicode_Changes_When_Titlecased` | Matches a character whose normalized form is not stable under a toTitlecase mapping. | 111 | | `Unicode_Changes_When_Casefolded` | Matches a character whose normalized form is not stable under case folding. | 112 | | `Unicode_Changes_When_Casemapped` | Matches a character which may change when it undergoes case mapping. | 113 | | `Unicode_Alphabetic` | Matches a character with the Alphabetic property. | 114 | | `Unicode_Default_Ignorable_Code_Point` | Matches a character which should be ignored in rendering unless explicitly supported by programs. | 115 | | `Unicode_Grapheme_Base` | Matches a character with the property used to define "Grapheme base". | 116 | | `Unicode_Grapheme_Extend` | Matches a character with the property used to define "Grapheme extender". | 117 | | `Unicode_Math` | Matches a character with the Math property. | 118 | | `Unicode_ID_Start` | Matches a character which may be used as the first letter of an identifier in a programming language. | 119 | | `Unicode_ID_Continue` | Matches a character which may be used as the second and subsequent letters of an identifier in a programming language. | 120 | | `Unicode_XID_Start` | Matches a character which can be mapped to a `Unicode_ID_Start` character under NFKC-normalization. | 121 | | `Unicode_XID_Continue` | Matches a character which can be mapped to a `Unicode_ID_Continue` character under NFKC-normalization. | 122 | -------------------------------------------------------------------------------- /import/char/ascii_character_group.peg: -------------------------------------------------------------------------------- 1 | # This file is hereby placed in the public domain. 2 | # 3 | # THIS SOFTWARE IS PROVIDED BY THE AUTHORS AS IS AND ANY EXPRESS 4 | # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 5 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 6 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 7 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 8 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 9 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 10 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 11 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 12 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 13 | # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | ASCII_Printable_Character <- ASCII_Special_Character / ASCII_Number / ASCII_Letter 16 | ASCII_Letter <- ASCII_Uppercase_Letter / ASCII_Lowercase_Letter 17 | 18 | ASCII_Control_Character <- [\x00-\x1f\x7f] 19 | ASCII_Special_Character <- [\x20-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e] 20 | ASCII_Number <- [0-9] 21 | ASCII_Uppercase_Letter <- [A-Z] 22 | ASCII_Lowercase_Letter <- [a-z] 23 | 24 | ASCII_C_alnum <- [0-9A-Za-z] 25 | ASCII_C_alpha <- [A-Za-z] 26 | ASCII_C_blank <- [ \t] 27 | ASCII_C_cntrl <- [\x00-\x1f\x7f] 28 | ASCII_C_digit <- [0-9] 29 | ASCII_C_graph <- [\x21-\x7e] 30 | ASCII_C_lower <- [a-z] 31 | ASCII_C_print <- [\x20-\x7e] 32 | ASCII_C_punct <- [\x21-\x2f\x3a-\x40\x5b-\x60\x7b-\x7e] 33 | ASCII_C_space <- [ \t\n\v\f\r] 34 | ASCII_C_upper <- [A-Z] 35 | ASCII_C_xdigit <- [0-9A-Fa-f] 36 | -------------------------------------------------------------------------------- /import/code/README.md: -------------------------------------------------------------------------------- 1 | # Import Files of Utility Codes 2 | 3 | ## Overview 4 | 5 | In this directory, import files that provide utility codes are stored. 6 | 7 | ## Import Files 8 | 9 | | File Name | Description | Version | Details | 10 | | --- | --- | --- | --- | 11 | | [`code/pcc_ast.peg`](pcc_ast.peg) | Provides codes to make it easier to build an AST. (deprecated) | 2.3.1 | [link](pcc_ast.md) | 12 | | [`code/pcc_ast.v3.peg`](pcc_ast.v3.peg) | Provides codes to make it easier to build an AST. | 3.0.0 | [link](pcc_ast.v3.md) | 13 | -------------------------------------------------------------------------------- /import/code/pcc_ast.md: -------------------------------------------------------------------------------- 1 | ### `code/pcc_ast.peg` (version 2.3.0) 2 | 3 | #### Synopsis 4 | 5 | An import file that provides codes to make it easier to build an AST (abstract syntax tree). 6 | 7 | #### Usage 8 | 9 | The usage procedure is shown below. 10 | 11 | 1. Import `code/pcc_ast.peg` **after the last `%header` section** in the PEG file if any. 12 | ```c 13 | %import "code/pcc_ast.peg" 14 | ``` 15 | 2. Set the designated data types as follows: 16 | ```c 17 | %value "pcc_ast_node_t *" 18 | 19 | %auxil "pcc_ast_manager_t *" 20 | ``` 21 | 22 | If the prefix is set with `%prefix`, all symbols starting with pcc\_ are changed to those with the specified prefix as below. 23 | ```c 24 | %prefix "my" 25 | 26 | %value "my_ast_node_t *" 27 | 28 | %auxil "my_ast_manager_t *" 29 | ``` 30 | 3. Create an AST node using either of the following functions in every rule action. 31 | - pcc\_ast_node_t *pcc\_ast_node__create_0(void); 32 | + Returns a newly created nullary node. 33 | - pcc\_ast_node_t *pcc\_ast_node__create_0_int(ptrdiff_t val); 34 | + Returns a newly created nullary node retaining an integer value. 35 | + The integer value can be accessed using ptrdiff_t pcc\_ast_node__get_integer(pcc\_ast_node_t *node). 36 | - pcc\_ast_node_t *pcc\_ast_node__create_0_str(const char *str); 37 | + Returns a newly created nullary node retaining a copy of the specified string. 38 | + The string can be accessed using const char *pcc\_ast_node__get_string(pcc\_ast_node_t *node). 39 | - pcc\_ast_node_t *pcc\_ast_node__create_1(pcc\_ast_node_t *node); 40 | + Returns a newly created unary node with one child node specified by the argument `node`. 41 | - pcc\_ast_node_t *pcc\_ast_node__create_2(pcc\_ast_node_t *node0, pcc\_ast_node_t *node1); 42 | + Returns a newly created binary node with two child nodes specified by the argument `node0` and `node1`. 43 | - pcc\_ast_node_t *pcc\_ast_node__create_3(pcc\_ast_node_t *node0, pcc\_ast_node_t *node1, pcc\_ast_node_t *node2); 44 | + Returns a newly created ternary node with three child nodes specified by the argument `node0`, `node1`, and `node2`. 45 | - pcc\_ast_node_t *pcc\_ast_node__create_v(void); 46 | + Returns a newly created variadic node initially with no child node. 47 | - pcc\_ast_node_t *pcc\_ast_node__add_child(pcc\_ast_node_t *obj, pcc\_ast_node_t *node); 48 | + Adds a child node specified by the argument `node` to the variadic node `obj`. 49 | + Can be used for `obj` as a variadic node only. 50 | 51 | As written above, if the prefix is set with `%prefix`, all symbols starting with pcc\_ are changed to those with the specified prefix. 52 | 53 | There are the variants of the node creation functions that enable setting a label as an `int` value. 54 | The label can be used for specifying node kinds in order to make it easier to analyze the AST in the later parsing steps. 55 | - pcc\_ast_node_t *pcc\_ast_node__create_0_ext(int label); 56 | - pcc\_ast_node_t *pcc\_ast_node__create_0_ext_str(int label, const char *str); 57 | - pcc\_ast_node_t *pcc\_ast_node__create_1_ext(int label, pcc\_ast_node_t *node); 58 | - pcc\_ast_node_t *pcc\_ast_node__create_2_ext(int label, pcc\_ast_node_t *node0, pcc\_ast_node_t *node1); 59 | - pcc\_ast_node_t *pcc\_ast_node__create_3_ext(int label, pcc\_ast_node_t *node0, pcc\_ast_node_t *node1, pcc\_ast_node_t *node2); 60 | - pcc\_ast_node_t *pcc\_ast_node__create_v_ext(int label); 61 | 62 | Every AST node retains the rule pattern matching range in the member variable `range`. 63 | Namely, `obj->range.start` and `obj->range.end` memorize `$0s` and `$0e` respectively at the time when the node `obj` was created in a rule action. 64 | 65 | A usage example is shown below. 66 | ```c 67 | rule0 <- l:rule1 '+' r:rule1 { $$ = my_ast_node__create_2(l, r); } 68 | rule1 <- [0-9]+ { $$ = my_ast_node__create_0(); } 69 | ``` 70 | 4. Call the generated parser API functions as follows: 71 | ```c 72 | my_ast_manager_t mgr; 73 | my_ast_manager__initialize(&mgr); 74 | { 75 | my_context_t *ctx = my_create(&mgr); 76 | my_ast_node_t *ast = NULL; /* ast: the root node of the AST */ 77 | while (my_parse(ctx, &ast)) { 78 | /* ... do something needed here */ 79 | my_ast_node__destroy(&mgr, ast); 80 | } 81 | my_destroy(ctx); 82 | } 83 | my_ast_manager__finalize(&mgr); 84 | ``` 85 | This code can be executed safely with no memory leak (if "_do something needed here_" does not bring memory leaks). 86 | 87 | #### Customization 88 | 89 | To build a meaningful AST, customization of the node is needed. 90 | By defining the macro PCC\_AST_NODE_CUSTOM_DATA_DEFINED in a `%header` section before `%import "code/pcc_ast.peg"`, 91 | the node member variable `custom` whose data type is pcc\_ast_node_custom_data_t is enabled for storing node custom data. 92 | If the prefix is set with `%prefix`, the macro name PCC\_AST_NODE_CUSTOM_DATA_DEFINED is changed to those with the uppercased prefix as below. 93 | ```c 94 | %prefix "my" 95 | 96 | %header { 97 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED 98 | ... 99 | } 100 | ``` 101 | 102 | The concrete usage procedure is shown below. 103 | 104 | 1. Define the data type of the node custom data in a PEG file. 105 | ```c 106 | %header { 107 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED /* <-- enables node custom data */ 108 | 109 | typedef struct node_custom_data_tag { /* <-- node custom data type */ 110 | /* ... define member variables as needed */ 111 | } my_ast_node_custom_data_t; 112 | } 113 | ``` 114 | An example is as follows. 115 | ```c 116 | %header { 117 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED 118 | 119 | typedef struct text_data_tag { 120 | char *text; 121 | } my_ast_node_custom_data_t; 122 | } 123 | ``` 124 | Make sure that this `%header` section is located before `%import "code/pcc_ast.peg"`. 125 | 2. Set a node custom data value in every rule action as needed. 126 | An example is as follows. 127 | ```c 128 | rule0 <- l:rule1 '+' r:rule1 { $$ = my_ast_node__create_2(l, r); $$->custom.text = strdup("+"); } 129 | rule1 <- < [0-9]+ > { $$ = my_ast_node__create_0(); $$->custom.text = strdup($1); } 130 | ``` 131 | 3. Implement the initialization and finalization functions for the node custom data. 132 | - void pcc\_ast_node_custom_data__initialize(pcc\_ast_manager_t *mgr, pcc\_ast_node_custom_data_t *obj); 133 | + Initializes the node custom data `obj`. 134 | - void pcc\_ast_node_custom_data__finalize(pcc\_ast_manager_t *mgr, pcc\_ast_node_custom_data_t *obj); 135 | + Finalizes the node custom data `obj`. 136 | 137 | An example is as follows. 138 | ```c 139 | void my_ast_node_custom_data__initialize(my_ast_manager_t *mgr, my_ast_node_custom_data_t *obj) { 140 | obj->text = NULL; 141 | } 142 | 143 | void my_ast_node_custom_data__finalize(my_ast_manager_t *mgr, my_ast_node_custom_data_t *obj) { 144 | free(obj->text); 145 | } 146 | ``` 147 | 148 | #### Macros 149 | 150 | Some macros are prepared to customize the behavior of memory allocation for AST nodes. 151 | The macro definition should be **in `%source` section** in the PEG source. 152 | 153 | The following macros are available. 154 | Note that, unlike other symbols, the prefix of these macro names is never changed even when a different prefix is set with `%prefix`. 155 | 156 | **`PCC_AST_MALLOC(`**_mgr_**`,`**_size_**`)`** 157 | 158 | The function macro to allocate a memory block. 159 | The pointer to the instance of pcc\_ast_manager_t that was passed to the API function pcc\_create() can be retrieved from the argument _auxil_. 160 | It can be ignored if the instance does not concern memory allocation. 161 | The argument _size_ is the number of bytes to allocate. 162 | This macro must return a pointer to the allocated memory block, or `NULL` if no sufficient memory is available. 163 | 164 | The default is defined as `PCC_MALLOC(mgr, size)`, which is used in the generated parser. 165 | 166 | **`PCC_AST_REALLOC(`**_mgr_**`,`**_ptr_**`,`**_size_**`)`** 167 | 168 | The function macro to reallocate the existing memory block. 169 | The pointer to the instance of pcc\_ast_manager_t that was passed to the API function pcc\_create() can be retrieved from the argument _auxil_. 170 | It can be ignored if the instance does not concern memory allocation. 171 | The argument _ptr_ is the pointer to the previously allocated memory block. 172 | The argument _size_ is the new number of bytes to reallocate. 173 | This macro must return a pointer to the reallocated memory block, or `NULL` if no sufficient memory is available. 174 | The contents of the memory block should be left unchanged in any case even if the reallocation fails. 175 | 176 | The default is defined as `PCC_REALLOC(mgr, ptr, size)`, which is used in the generated parser. 177 | 178 | **`PCC_AST_FREE(`**_mgr_**`,`**_ptr_**`)`** 179 | 180 | The function macro to free the existing memory block. 181 | The pointer to the instance of pcc\_ast_manager_t that was passed to the API function pcc\_create() can be retrieved from the argument _auxil_. 182 | It can be ignored if the instance does not concern memory allocation. 183 | The argument _ptr_ is the pointer to the previously allocated memory block. 184 | This macro need not return a value. 185 | 186 | The default is defined as `PCC_FREE(mgr, ptr)`, which is used in the generated parser. 187 | 188 | **`PCC_AST_NODE_ARRAY_MIN_SIZE`** 189 | 190 | The initial size (the number of nods) of the node arrays used in AST nodes. 191 | The arrays are expanded as needed. 192 | The default is `4`. 193 | 194 | #### Example 195 | 196 | An example which builds an AST and dumps it is shown here. 197 | This example accepts the same inputs as [*Desktop Calculator*](../../examples/calc.peg). 198 | 199 | ```c 200 | %prefix "calc" 201 | 202 | %value "calc_ast_node_t *" # <-- must be set 203 | 204 | %auxil "calc_ast_manager_t *" # <-- must be set 205 | 206 | %header { 207 | #define CALC_AST_NODE_CUSTOM_DATA_DEFINED /* <-- enables node custom data */ 208 | 209 | typedef struct text_data_tag { /* <-- node custom data type */ 210 | char *text; 211 | } calc_ast_node_custom_data_t; 212 | } 213 | 214 | %source { 215 | #include 216 | #include 217 | } 218 | 219 | statement <- _ e:expression _ EOL { $$ = e; } 220 | / ( !EOL . )* EOL { $$ = NULL; } 221 | 222 | expression <- e:term { $$ = e; } 223 | 224 | term <- l:term _ '+' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("+"); } 225 | / l:term _ '-' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("-"); } 226 | / e:factor { $$ = e; } 227 | 228 | factor <- l:factor _ '*' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("*"); } 229 | / l:factor _ '/' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("/"); } 230 | / e:unary { $$ = e; } 231 | 232 | unary <- '+' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("+"); } 233 | / '-' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("-"); } 234 | / e:primary { $$ = e; } 235 | 236 | primary <- < [0-9]+ > { $$ = calc_ast_node__create_0(); $$->custom.text = strdup($1); } 237 | / '(' _ e:expression _ ')' { $$ = e; } 238 | 239 | _ <- [ \t]* 240 | EOL <- '\n' / '\r\n' / '\r' / ';' 241 | 242 | %import "code/pcc_ast.peg" # <-- provides AST build functions 243 | 244 | %% 245 | void calc_ast_node_custom_data__initialize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 246 | obj->text = NULL; 247 | } /* <-- must be implemented when enabling node custom data */ 248 | 249 | void calc_ast_node_custom_data__finalize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 250 | free(obj->text); 251 | } /* <-- must be implemented when enabling node custom data */ 252 | 253 | static void dump_ast(const calc_ast_node_t *obj, int depth) { 254 | if (obj) { 255 | switch (obj->type) { 256 | case CALC_AST_NODE_TYPE_NULLARY: 257 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "nullary", obj->custom.text); 258 | break; 259 | case CALC_AST_NODE_TYPE_UNARY: 260 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "unary", obj->custom.text); 261 | dump_ast(obj->data.unary.node, depth + 1); 262 | break; 263 | case CALC_AST_NODE_TYPE_BINARY: 264 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "binary", obj->custom.text); 265 | dump_ast(obj->data.binary.node[0], depth + 1); 266 | dump_ast(obj->data.binary.node[1], depth + 1); 267 | break; 268 | case CALC_AST_NODE_TYPE_TERNARY: 269 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "ternary", obj->custom.text); 270 | dump_ast(obj->data.ternary.node[0], depth + 1); 271 | dump_ast(obj->data.ternary.node[1], depth + 1); 272 | dump_ast(obj->data.ternary.node[2], depth + 1); 273 | break; 274 | case CALC_AST_NODE_TYPE_VARIADIC: 275 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "variadic", obj->custom.text); 276 | { 277 | size_t i; 278 | for (i = 0; i < obj->data.variadic.len; i++) { 279 | dump_ast(obj->data.variadic.node[i], depth + 1); 280 | } 281 | } 282 | break; 283 | default: 284 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "(unknown)", obj->custom.text); 285 | break; 286 | } 287 | } 288 | else { 289 | printf("%*s(null)\n", 2 * depth, ""); 290 | } 291 | } 292 | 293 | int main(int argc, char **argv) { 294 | calc_ast_manager_t mgr; 295 | calc_ast_manager__initialize(&mgr); 296 | { 297 | calc_context_t *ctx = calc_create(&mgr); 298 | calc_ast_node_t *ast = NULL; 299 | while (calc_parse(ctx, &ast)) { 300 | dump_ast(ast, 0); 301 | calc_ast_node__destroy(&mgr, ast); 302 | } 303 | calc_destroy(ctx); 304 | } 305 | calc_ast_manager__finalize(&mgr); 306 | return 0; 307 | } 308 | ``` 309 | 310 | An execution example is as follows. 311 | 312 | ``` 313 | $ ./ast-calc↵ 314 | 1+2*(3+4*(5+6))↵ 315 | binary: "+" 316 | nullary: "1" 317 | binary: "*" 318 | nullary: "2" 319 | binary: "+" 320 | nullary: "3" 321 | binary: "*" 322 | nullary: "4" 323 | binary: "+" 324 | nullary: "5" 325 | nullary: "6" 326 | 5*6*7*8/(1*2*3*4)↵ 327 | binary: "/" 328 | binary: "*" 329 | binary: "*" 330 | binary: "*" 331 | nullary: "5" 332 | nullary: "6" 333 | nullary: "7" 334 | nullary: "8" 335 | binary: "*" 336 | binary: "*" 337 | binary: "*" 338 | nullary: "1" 339 | nullary: "2" 340 | nullary: "3" 341 | nullary: "4" 342 | ``` 343 | -------------------------------------------------------------------------------- /import/code/pcc_ast.v3.md: -------------------------------------------------------------------------------- 1 | ### `code/pcc_ast.v3.peg` (version 3.0.0) 2 | 3 | #### Synopsis 4 | 5 | An import file that provides codes to make it easier to build an AST (abstract syntax tree). 6 | 7 | #### Usage 8 | 9 | The usage procedure is shown below. 10 | 11 | 1. Import `code/pcc_ast.v3.peg` **after the last `%header` section** in the PEG file if any. 12 | ```c 13 | %import "code/pcc_ast.v3.peg" 14 | ``` 15 | 2. Set the designated data types as follows: 16 | ```c 17 | %value "pcc_ast_node_t *" 18 | 19 | %auxil "pcc_ast_manager_t *" 20 | ``` 21 | 22 | If the prefix is set with `%prefix`, all symbols starting with pcc\_ are changed to those with the specified prefix as below. 23 | ```c 24 | %prefix "my" 25 | 26 | %value "my_ast_node_t *" 27 | 28 | %auxil "my_ast_manager_t *" 29 | ``` 30 | 3. Create an AST node using either of the following functions in every rule action. 31 | - pcc\_ast_node_t *pcc\_ast_node__create_0(void); 32 | + Returns a newly created nullary node. 33 | - pcc\_ast_node_t *pcc\_ast_node__create_0_int(ptrdiff_t val); 34 | + Returns a newly created nullary node retaining an integer value. 35 | + The integer value can be accessed later using ptrdiff_t pcc\_ast_node__get_int(const pcc\_ast_node_t *obj). 36 | + The integer value is immutable. 37 | - pcc\_ast_node_t *pcc\_ast_node__create_0_str(const char *str); 38 | + Returns a newly created nullary node retaining a copy of the specified string. 39 | + The string can be accessed later using const char *pcc\_ast_node__get_str(const pcc\_ast_node_t *obj). 40 | + The string is immutable. 41 | - pcc\_ast_node_t *pcc\_ast_node__create_1(pcc\_ast_node_t *node0); 42 | + Returns a newly created unary node with one child node specified by the argument `node`. 43 | - pcc\_ast_node_t *pcc\_ast_node__create_2(pcc\_ast_node_t *node0, pcc\_ast_node_t *node1); 44 | + Returns a newly created binary node with two child nodes specified by the argument `node0` and `node1`. 45 | - pcc\_ast_node_t *pcc\_ast_node__create_3(pcc\_ast_node_t *node0, pcc\_ast_node_t *node1, pcc\_ast_node_t *node2); 46 | + Returns a newly created ternary node with three child nodes specified by the argument `node0`, `node1`, and `node2`. 47 | - pcc\_ast_node_t *pcc\_ast_node__create_v(void); 48 | + Returns a newly created variadic node initially with no child node. 49 | - pcc\_ast_node_t *pcc\_ast_node__add_child(pcc\_ast_node_t *obj, pcc\_ast_node_t *node); 50 | + Adds a child node specified by the argument `node` to the variadic node `obj`. 51 | + Can be used for `obj` as a variadic node only. 52 | 53 | As written above, if the prefix is set with `%prefix`, all symbols starting with pcc\_ are changed to those with the specified prefix. 54 | 55 | A usage example is shown below. 56 | ```c 57 | rule0 <- l:rule1 '+' r:rule1 { $$ = my_ast_node__create_2(l, r); } 58 | rule1 <- [0-9]+ { $$ = my_ast_node__create_0(); } 59 | ``` 60 | 61 | There are the variants of the node creation functions that enable setting a label as an `int` value. 62 | The label can be used for specifying node kinds in order to make it easier to analyze the AST in the later parsing steps. 63 | - pcc\_ast_node_t *pcc\_ast_node__create_0_ext(int label); 64 | - pcc\_ast_node_t *pcc\_ast_node__create_0_ext_int(int label, ptrdiff_t val); 65 | - pcc\_ast_node_t *pcc\_ast_node__create_0_ext_str(int label, const char *str); 66 | - pcc\_ast_node_t *pcc\_ast_node__create_1_ext(int label, pcc\_ast_node_t *node0); 67 | - pcc\_ast_node_t *pcc\_ast_node__create_2_ext(int label, pcc\_ast_node_t *node0, pcc\_ast_node_t *node1); 68 | - pcc\_ast_node_t *pcc\_ast_node__create_3_ext(int label, pcc\_ast_node_t *node0, pcc\_ast_node_t *node1, pcc\_ast_node_t *node2); 69 | - pcc\_ast_node_t *pcc\_ast_node__create_v_ext(int label); 70 | 71 | The maximum number of child nodes for a non-variadic node is limited by the macro PCC\_AST_NODE_MAX_CONSTANT_ARITY. The default value is 3, and can be changed by defining the macro with a preferred value in a `%header` section before `%import "code/pcc_ast.v3.peg"`. Up to 9 child nodes are supported. 72 | 73 | If the prefix is set with `%prefix`, the macro name PCC\_AST_NODE_CUSTOM_DATA_DEFINED is changed to those with the uppercased prefix as below. 74 | ```c 75 | %prefix "my" 76 | 77 | %header { 78 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED 9 79 | ... 80 | } 81 | ``` 82 | 83 | The functions to access child nodes are provided. 84 | - int pcc\_ast_node__is_variadic(const pcc\_ast_node_t *obj); 85 | + Returns a nonzero value if the specified node is a variadic node, or returns 0 otherwise. 86 | - size_t pcc\_ast_node__get_child_count(const pcc\_ast_node_t *obj); 87 | + Returns the number of the child nodes of the specified node. 88 | - pcc\_ast_node_t *const *pcc\_ast_node__get_child_array(pcc\_ast_node_t *obj); 89 | + Returns the pointer to the child node array of the specified node. 90 | 91 | Every node retains the rule pattern matching range, which can be accessed using pcc\_ast_range_t pcc\_ast_node__get_range(const pcc\_ast_node_t *obj). 92 | Namely, the member variables `start` and `end` of the range value memorize `$0s` and `$0e` respectively at the time when the node `obj` was created in a rule action. The range value is immutable. 93 | 4. Call the generated parser API functions as follows: 94 | ```c 95 | my_ast_manager_t mgr; 96 | my_ast_manager__initialize(&mgr); 97 | { 98 | my_context_t *ctx = my_create(&mgr); 99 | my_ast_node_t *ast = NULL; /* ast: the root node of the AST */ 100 | while (my_parse(ctx, &ast)) { 101 | /* ... do something needed here */ 102 | my_ast_node__destroy(&mgr, ast); 103 | } 104 | my_destroy(ctx); 105 | } 106 | my_ast_manager__finalize(&mgr); 107 | ``` 108 | This code can be executed safely with no memory leak (if "_do something needed here_" does not bring memory leaks). 109 | 110 | #### Customization 111 | 112 | To build a meaningful AST, customization of the node is needed. 113 | By defining the macro PCC\_AST_NODE_CUSTOM_DATA_DEFINED in a `%header` section before `%import "code/pcc_ast.v3.peg"`, 114 | it is enabled to store custom data in every node. 115 | The data type of the node custom data is pcc\_ast_node_custom_data_t, and it must be defined in a `%header` section before `%import "code/pcc_ast.v3.peg"` as well. 116 | The node custom data is retained in the member variable `custom` of every node, and can be accessed freely. 117 | 118 | If the prefix is set with `%prefix`, the macro name PCC\_AST_NODE_CUSTOM_DATA_DEFINED is changed to those with the uppercased prefix as below. 119 | ```c 120 | %prefix "my" 121 | 122 | %header { 123 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED 124 | ... 125 | } 126 | ``` 127 | 128 | The concrete usage procedure is shown below. 129 | 130 | 1. Define the data type of the node custom data in a PEG file. 131 | ```c 132 | %header { 133 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED /* <-- enables node custom data */ 134 | 135 | typedef struct node_custom_data_tag { /* <-- node custom data type */ 136 | /* ... define member variables as needed */ 137 | } my_ast_node_custom_data_t; 138 | } 139 | ``` 140 | An example is as follows. 141 | ```c 142 | %header { 143 | #define MY_AST_NODE_CUSTOM_DATA_DEFINED 144 | 145 | typedef struct text_data_tag { 146 | char *text; 147 | } my_ast_node_custom_data_t; 148 | } 149 | ``` 150 | Make sure that this `%header` section is located before `%import "code/pcc_ast.v3.peg"`. 151 | 2. Set a node custom data value in every rule action as needed. 152 | An example is as follows. 153 | ```c 154 | rule0 <- l:rule1 '+' r:rule1 { $$ = my_ast_node__create_2(l, r); $$->custom.text = strdup("+"); } 155 | rule1 <- < [0-9]+ > { $$ = my_ast_node__create_0(); $$->custom.text = strdup($1); } 156 | ``` 157 | 3. Implement the initialization and finalization functions for the node custom data. 158 | - void pcc\_ast_node_custom_data__initialize(pcc\_ast_manager_t *mgr, pcc\_ast_node_custom_data_t *obj); 159 | + Initializes the node custom data `obj`. 160 | - void pcc\_ast_node_custom_data__finalize(pcc\_ast_manager_t *mgr, pcc\_ast_node_custom_data_t *obj); 161 | + Finalizes the node custom data `obj`. 162 | 163 | An example is as follows. 164 | ```c 165 | void my_ast_node_custom_data__initialize(my_ast_manager_t *mgr, my_ast_node_custom_data_t *obj) { 166 | obj->text = NULL; 167 | } 168 | 169 | void my_ast_node_custom_data__finalize(my_ast_manager_t *mgr, my_ast_node_custom_data_t *obj) { 170 | free(obj->text); 171 | } 172 | ``` 173 | 174 | #### Macros 175 | 176 | Some macros are prepared to customize the behavior of memory allocation for AST nodes. 177 | The macro definition should be **in `%source` section** in the PEG source. 178 | 179 | The following macros are available. 180 | Note that, unlike other symbols, the prefix of these macro names is never changed even when a different prefix is set with `%prefix`. 181 | 182 | **`PCC_AST_MALLOC(`**_mgr_**`,`**_size_**`)`** 183 | 184 | The function macro to allocate a memory block. 185 | The pointer to the instance of pcc\_ast_manager_t that was passed to the API function pcc\_create() can be retrieved from the argument _auxil_. 186 | It can be ignored if the instance does not concern memory allocation. 187 | The argument _size_ is the number of bytes to allocate. 188 | This macro must return a pointer to the allocated memory block, or `NULL` if no sufficient memory is available. 189 | 190 | The default is defined as `PCC_MALLOC(mgr, size)`, which is used in the generated parser. 191 | 192 | **`PCC_AST_REALLOC(`**_mgr_**`,`**_ptr_**`,`**_size_**`)`** 193 | 194 | The function macro to reallocate the existing memory block. 195 | The pointer to the instance of pcc\_ast_manager_t that was passed to the API function pcc\_create() can be retrieved from the argument _auxil_. 196 | It can be ignored if the instance does not concern memory allocation. 197 | The argument _ptr_ is the pointer to the previously allocated memory block. 198 | The argument _size_ is the new number of bytes to reallocate. 199 | This macro must return a pointer to the reallocated memory block, or `NULL` if no sufficient memory is available. 200 | The contents of the memory block should be left unchanged in any case even if the reallocation fails. 201 | 202 | The default is defined as `PCC_REALLOC(mgr, ptr, size)`, which is used in the generated parser. 203 | 204 | **`PCC_AST_FREE(`**_mgr_**`,`**_ptr_**`)`** 205 | 206 | The function macro to free the existing memory block. 207 | The pointer to the instance of pcc\_ast_manager_t that was passed to the API function pcc\_create() can be retrieved from the argument _auxil_. 208 | It can be ignored if the instance does not concern memory allocation. 209 | The argument _ptr_ is the pointer to the previously allocated memory block. 210 | This macro need not return a value. 211 | 212 | The default is defined as `PCC_FREE(mgr, ptr)`, which is used in the generated parser. 213 | 214 | **`PCC_AST_NODE_ARRAY_MIN_SIZE`** 215 | 216 | The initial size (the number of nods) of the node arrays used in AST nodes. 217 | The arrays are expanded as needed. 218 | The default is `4`. 219 | 220 | #### Example 221 | 222 | An example which builds an AST and dumps it is shown here. 223 | This example accepts the same inputs as [*Desktop Calculator*](../../examples/calc.peg). 224 | 225 | ```c 226 | %prefix "calc" 227 | 228 | %value "calc_ast_node_t *" # <-- must be set 229 | 230 | %auxil "calc_ast_manager_t *" # <-- must be set 231 | 232 | %header { 233 | #define CALC_AST_NODE_CUSTOM_DATA_DEFINED /* <-- enables node custom data */ 234 | 235 | typedef struct text_data_tag { /* <-- node custom data type */ 236 | char *text; 237 | } calc_ast_node_custom_data_t; 238 | } 239 | 240 | %source { 241 | #include 242 | #include 243 | } 244 | 245 | statement <- _ e:expression _ EOL { $$ = e; } 246 | / ( !EOL . )* EOL { $$ = NULL; } 247 | 248 | expression <- e:term { $$ = e; } 249 | 250 | term <- l:term _ '+' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("+"); } 251 | / l:term _ '-' _ r:factor { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("-"); } 252 | / e:factor { $$ = e; } 253 | 254 | factor <- l:factor _ '*' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("*"); } 255 | / l:factor _ '/' _ r:unary { $$ = calc_ast_node__create_2(l, r); $$->custom.text = strdup("/"); } 256 | / e:unary { $$ = e; } 257 | 258 | unary <- '+' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("+"); } 259 | / '-' _ e:unary { $$ = calc_ast_node__create_1(e); $$->custom.text = strdup("-"); } 260 | / e:primary { $$ = e; } 261 | 262 | primary <- < [0-9]+ > { $$ = calc_ast_node__create_0(); $$->custom.text = strdup($1); } 263 | / '(' _ e:expression _ ')' { $$ = e; } 264 | 265 | _ <- [ \t]* 266 | EOL <- '\n' / '\r\n' / '\r' / ';' 267 | 268 | %import "code/pcc_ast.v3.peg" # <-- provides AST build functions 269 | 270 | %% 271 | void calc_ast_node_custom_data__initialize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 272 | obj->text = NULL; 273 | } /* <-- must be implemented when enabling node custom data */ 274 | 275 | void calc_ast_node_custom_data__finalize(calc_ast_manager_t *mgr, calc_ast_node_custom_data_t *obj) { 276 | free(obj->text); 277 | } /* <-- must be implemented when enabling node custom data */ 278 | 279 | static void dump_ast(const calc_ast_node_t *obj, int depth) { 280 | if (obj) { 281 | const size_t n = calc_ast_node__get_child_count(obj); 282 | const calc_ast_node_t *const *const p = calc_ast_node__get_child_array(obj); 283 | const calc_ast_node_custom_data_t *const d = &(obj->custom); 284 | const int b = calc_ast_node__is_variadic(obj); 285 | if (b || n <= 3) { 286 | static const char *const arity_name[] = { "nullary", "unary", "binary", "ternary" }; 287 | printf("%*s%s: \"%s\"\n", 2 * depth, "", b ? "variadic" : arity_name[n], d->text); 288 | { 289 | size_t i; 290 | for (i = 0; i < n; i++) { 291 | dump_ast(p[i], depth + 1); 292 | } 293 | } 294 | } 295 | else { 296 | printf("%*s%s: \"%s\"\n", 2 * depth, "", "(unknown)", d->text); 297 | } 298 | } 299 | else { 300 | printf("%*s(null)\n", 2 * depth, ""); 301 | } 302 | } 303 | 304 | int main(int argc, char **argv) { 305 | calc_ast_manager_t mgr; 306 | calc_ast_manager__initialize(&mgr); 307 | { 308 | calc_context_t *ctx = calc_create(&mgr); 309 | calc_ast_node_t *ast = NULL; 310 | while (calc_parse(ctx, &ast)) { 311 | dump_ast(ast, 0); 312 | calc_ast_node__destroy(&mgr, ast); 313 | } 314 | calc_destroy(ctx); 315 | } 316 | calc_ast_manager__finalize(&mgr); 317 | return 0; 318 | } 319 | ``` 320 | 321 | An execution example is as follows. 322 | 323 | ``` 324 | $ ./ast-calc↵ 325 | 1+2*(3+4*(5+6))↵ 326 | binary: "+" 327 | nullary: "1" 328 | binary: "*" 329 | nullary: "2" 330 | binary: "+" 331 | nullary: "3" 332 | binary: "*" 333 | nullary: "4" 334 | binary: "+" 335 | nullary: "5" 336 | nullary: "6" 337 | 5*6*7*8/(1*2*3*4)↵ 338 | binary: "/" 339 | binary: "*" 340 | binary: "*" 341 | binary: "*" 342 | nullary: "5" 343 | nullary: "6" 344 | nullary: "7" 345 | nullary: "8" 346 | binary: "*" 347 | binary: "*" 348 | binary: "*" 349 | nullary: "1" 350 | nullary: "2" 351 | nullary: "3" 352 | nullary: "4" 353 | ``` 354 | -------------------------------------------------------------------------------- /misc/README.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous Tools 2 | 3 | ## Overview 4 | 5 | In this directory, miscellaneous tools shown below are stored. 6 | 7 | ## Tools 8 | 9 | ### `unicode_general_category.py` 10 | 11 | #### Synopsis 12 | 13 | A Python script to generate a PEG file defining rules to match Unicode characters with the respective _general category properties_. 14 | It needs internet access for fetching Unicode data from https://www.unicode.org/ . 15 | 16 | The Python module `requests` is required. 17 | 18 | #### Usage 19 | 20 | ~~~sh 21 | $ python unicode_general_category.py > ../import/char/unicode_general_category.peg 22 | ~~~ 23 | 24 | ### `unicode_derived_core.py` 25 | 26 | #### Synopsis 27 | 28 | A Python script to generate a PEG file defining rules to match Unicode characters with the respective _derived core properties_. 29 | It needs internet access for fetching Unicode data from https://www.unicode.org/ . 30 | 31 | The Python module `requests` is required. 32 | 33 | #### Usage 34 | 35 | ~~~sh 36 | $ python unicode_derived_core.py > ../import/char/unicode_derived_core.peg 37 | ~~~ 38 | -------------------------------------------------------------------------------- /misc/unicode_derived_core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2024 Arihiro Yoshida. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os 24 | import requests 25 | import re 26 | 27 | ucd_url = 'https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt' 28 | ucd_dp_list = [ # Grapheme_Link is deprecated; Indic_Conjunct_Break is unsupported. 29 | 'Math', 30 | 'Alphabetic', 31 | 'Lowercase', 32 | 'Uppercase', 33 | 'Cased', 34 | 'Case_Ignorable', 35 | 'Changes_When_Lowercased', 36 | 'Changes_When_Uppercased', 37 | 'Changes_When_Titlecased', 38 | 'Changes_When_Casefolded', 39 | 'Changes_When_Casemapped', 40 | 'ID_Start', 41 | 'ID_Continue', 42 | 'XID_Start', 43 | 'XID_Continue', 44 | 'Default_Ignorable_Code_Point', 45 | 'Grapheme_Extend', 46 | 'Grapheme_Base' 47 | ] 48 | 49 | def get_unicode_data(): 50 | res = requests.get(ucd_url, stream=True) 51 | res.raise_for_status() 52 | txt = '' 53 | for chunk in res.iter_content(chunk_size=1024*1024): 54 | txt += chunk.decode() 55 | return txt 56 | 57 | def escape_as_utf16_hex(hex): 58 | code = int(hex, 16) 59 | if code > 0x10ffff: 60 | raise ValueError 61 | return f'\\u{code:04x}' if code <= 0xffff else f'\\u{0xd800 | ((code - 0x10000) >> 10):04x}\\u{0xdc00 | (code & 0x3ff):04x}' 62 | 63 | def generate_rules(dat): 64 | str = ( 65 | '# This file was generated using the script \'' + os.path.basename(__file__) + '\'.\n' 66 | '\n' 67 | '# This file is hereby placed in the public domain.\n' 68 | '#\n' 69 | '# THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS\n' 70 | '# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n' 71 | '# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n' 72 | '# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE\n' 73 | '# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n' 74 | '# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n' 75 | '# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n' 76 | '# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n' 77 | '# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n' 78 | '# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,\n' 79 | '# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n' 80 | '\n' 81 | ) 82 | for dp in ucd_dp_list: 83 | if dp not in dat: 84 | continue 85 | cc = '' 86 | cs = '' 87 | cp = '' 88 | for c in dat[dp]: 89 | if cs == '': 90 | cs = c[0] 91 | elif int(c[0], 16) - int(cp, 16) != 1: 92 | cc += escape_as_utf16_hex(cs) 93 | if cs != cp: 94 | cc += '-' + escape_as_utf16_hex(cp) 95 | cs = c[0] 96 | cp = c[1] 97 | if cs != '': 98 | cc += escape_as_utf16_hex(cs) 99 | if cs != cp: 100 | cc += '-' + escape_as_utf16_hex(cp) 101 | str += 'Unicode_' + dp + ' <- [' + cc + ']\n' 102 | return str 103 | 104 | def main(): 105 | pat = re.compile(r'^([0-9a-fA-F]+)(?:\.\.([0-9a-fA-F]+))?\s*;\s*([0-9a-zA-Z_]+)') 106 | dat = {} 107 | for ent in get_unicode_data().splitlines(): 108 | res = pat.search(ent) 109 | if res is None: 110 | continue 111 | pro = res.group(3) 112 | if pro not in dat: 113 | dat[pro] = [] 114 | dat[pro] += [(res.group(1), res.group(1) if res.group(2) is None else res.group(2))] 115 | print(generate_rules(dat), end='') 116 | 117 | if __name__ == '__main__': 118 | main() 119 | -------------------------------------------------------------------------------- /misc/unicode_general_category.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2024 Arihiro Yoshida. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import os 24 | import requests 25 | import re 26 | 27 | ucd_url = 'https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt' 28 | ucd_gc_list = [ 29 | 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Mn', 'Mc', 'Me', 'Nd', 'Nl', 'No', 30 | 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Sm', 'Sc', 'Sk', 'So', 31 | 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Co', 'Cn' 32 | ] 33 | ucd_gc_dict = { 34 | 'Lu': 'Uppercase_Letter', 35 | 'Ll': 'Lowercase_Letter', 36 | 'Lt': 'Titlecase_Letter', 37 | 'Lm': 'Modifier_Letter', 38 | 'Lo': 'Other_Letter', 39 | 'Mn': 'Nonspacing_Mark', 40 | 'Mc': 'Spacing_Mark', 41 | 'Me': 'Enclosing_Mark', 42 | 'Nd': 'Decimal_Number', 43 | 'Nl': 'Letter_Number', 44 | 'No': 'Other_Number', 45 | 'Pc': 'Connector_Punctuation', 46 | 'Pd': 'Dash_Punctuation', 47 | 'Ps': 'Open_Punctuation', 48 | 'Pe': 'Close_Punctuation', 49 | 'Pi': 'Initial_Punctuation', 50 | 'Pf': 'Final_Punctuation', 51 | 'Po': 'Other_Punctuation', 52 | 'Sm': 'Math_Symbol', 53 | 'Sc': 'Currency_Symbol', 54 | 'Sk': 'Modifier_Symbol', 55 | 'So': 'Other_Symbol', 56 | 'Zs': 'Space_Separator', 57 | 'Zl': 'Line_Separator', 58 | 'Zp': 'Paragraph_Separator', 59 | 'Cc': 'Control', 60 | 'Cf': 'Format', 61 | 'Co': 'Private_Use', 62 | 'Cn': 'Unassigned' 63 | } 64 | 65 | def get_unicode_data(): 66 | res = requests.get(ucd_url, stream=True) 67 | res.raise_for_status() 68 | txt = '' 69 | for chunk in res.iter_content(chunk_size=1024*1024): 70 | txt += chunk.decode() 71 | return txt 72 | 73 | def escape_as_utf16_hex(hex): 74 | code = int(hex, 16) 75 | if code > 0x10ffff: 76 | raise ValueError 77 | return f'\\u{code:04x}' if code <= 0xffff else f'\\u{0xd800 | ((code - 0x10000) >> 10):04x}\\u{0xdc00 | (code & 0x3ff):04x}' 78 | 79 | def generate_rules(dat): 80 | str = ( 81 | '# This file was generated using the script \'' + os.path.basename(__file__) + '\'.\n' 82 | '\n' 83 | '# This file is hereby placed in the public domain.\n' 84 | '#\n' 85 | '# THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS\n' 86 | '# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n' 87 | '# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n' 88 | '# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE\n' 89 | '# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n' 90 | '# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n' 91 | '# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n' 92 | '# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n' 93 | '# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n' 94 | '# OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,\n' 95 | '# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n' 96 | '\n' 97 | 'Unicode_Letter <- Unicode_Cased_Letter / Unicode_Modifier_Letter / Unicode_Other_Letter\n' 98 | 'Unicode_Cased_Letter <- Unicode_Uppercase_Letter / Unicode_Lowercase_Letter / Unicode_Titlecase_Letter\n' 99 | 'Unicode_Mark <- Unicode_Nonspacing_Mark / Unicode_Spacing_Mark / Unicode_Enclosing_Mark\n' 100 | 'Unicode_Number <- Unicode_Decimal_Number / Unicode_Letter_Number / Unicode_Other_Number\n' 101 | 'Unicode_Punctuation <- Unicode_Connector_Punctuation / Unicode_Dash_Punctuation / Unicode_Open_Punctuation / Unicode_Close_Punctuation / Unicode_Initial_Punctuation / Unicode_Final_Punctuation / Unicode_Other_Punctuation\n' 102 | 'Unicode_Symbol <- Unicode_Math_Symbol / Unicode_Currency_Symbol / Unicode_Modifier_Symbol / Unicode_Other_Symbol\n' 103 | 'Unicode_Separator <- Unicode_Space_Separator / Unicode_Line_Separator / Unicode_Paragraph_Separator\n' 104 | 'Unicode_Other <- Unicode_Control / Unicode_Format / Unicode_Private_Use\n' # The category 'Unassigned' is excluded because currently it has no character. 105 | '\n' 106 | ) 107 | for gc in ucd_gc_list: 108 | if gc not in dat: 109 | continue 110 | cc = '' 111 | cs = '' 112 | cp = '' 113 | for c in dat[gc]: 114 | if cs == '': 115 | cs = c 116 | elif int(c, 16) - int(cp, 16) != 1: 117 | cc += escape_as_utf16_hex(cs) 118 | if cs != cp: 119 | cc += '-' + escape_as_utf16_hex(cp) 120 | cs = c 121 | cp = c 122 | if cs != '': 123 | cc += escape_as_utf16_hex(cs) 124 | if cs != cp: 125 | cc += '-' + escape_as_utf16_hex(cp) 126 | str += 'Unicode_' + ucd_gc_dict[gc] + ' <- [' + cc + ']\n' 127 | return str 128 | 129 | def main(): 130 | pat = re.compile(r'^([0-9a-fA-F]+);[^;]*;([0-9a-zA-Z_]+);') 131 | dat = {} 132 | for ent in get_unicode_data().splitlines(): 133 | res = pat.search(ent) 134 | cat = res.group(2) 135 | if cat not in dat: 136 | dat[cat] = [] 137 | dat[cat] += [res.group(1)] 138 | print(generate_rules(dat), end='') 139 | 140 | if __name__ == '__main__': 141 | main() 142 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | packcc 2 | packcc.exe 3 | *.gcda 4 | *.gcno 5 | *.gcov 6 | */parser.c 7 | */parser.h 8 | */parser 9 | */parser.exe 10 | */test.bats 11 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Testing PackCC 2 | 3 | ## How to run the tests 4 | 5 | For running the tests, we assume you have `bats-core` and `uncrustify`. 6 | 7 | If you do not have `bats-core` installed, you can do it using your package manager or from its tarball: 8 | ``` 9 | $ curl -L -o bats-core-1.2.1.tar.gz https://github.com/bats-core/bats-core/archive/v1.2.1.tar.gz && 10 | tar zxvf bats-core-1.2.1.tar.gz && 11 | cd bats-core-1.2.1 && 12 | sudo ./install.sh /usr/local 13 | ``` 14 | 15 | If you do not have `uncrustify` installed, you can do it using your package manager or from its repository: 16 | ``` 17 | $ git clone https://github.com/uncrustify/uncrustify && 18 | cd uncrustify && 19 | git checkout uncrustify-0.72.0 && 20 | mkdir -p build && 21 | cd build && 22 | cmake .. && 23 | make && 24 | sudo make install 25 | ``` 26 | 27 | When you use MinGW-w64, `cmake` requires the options `-G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX=/usr/local`. 28 | 29 | After installing `bats-core-1.2.1` and `uncrustify`, you can run the tests using `tests/test.sh` script: 30 | ``` 31 | $ ./test.sh 32 | ✓ Testing basic.d - generation 33 | ✓ Testing basic.d - compilation 34 | ✓ Testing basic.d - run 35 | ... 36 | ✓ Testing strings.d - generation 37 | ✓ Testing strings.d - compilation 38 | ✓ Testing strings.d - run 39 | ✓ Testing strings.d - run [utf8] 40 | 41 | 19 tests, 0 failures, 1 skipped 42 | ``` 43 | 44 | The script passes all its arguments to `bats`, for example to run only some tests, 45 | you can call it with `--filter `. To see all the available arguments, execute `tests/test.sh --help`. 46 | 47 | The behavior of the `test.sh` can also be influenced by environment variables: 48 | - `PACKCC` - Path to a compiled `packcc` binary. If unset, the script will compile it before running the tests. 49 | - `CC` - Compiler to use to compile `packcc` (if necessary) and the programs for testing. Defaults to `cc`. 50 | 51 | ## How to write a generic test 52 | 53 | To create a new test, just follow these simple steps: 54 | 55 | 1. Create a directory with suitable name, e.g.: `tests/sequence.d`. 56 | 2. Create a grammar file called `input.peg` in this directory. 57 | 3. Create one or more input files. The files must match the glob pattern `input*.txt`. 58 | 4. Create a file with expected output for each of the inputs. The names must match the input, 59 | just replace "input" with "expected". E.g.: for `input-1.txt`, there must be `expected-1.txt`. 60 | 61 | Each test automatically performs three or more test cases: 62 | 63 | 1. Generates a parser from the `input.peg` grammar. 64 | 2. Compiles the generated parser. 65 | 3. Runs the parser with specified inputs, comparing the outputs with the contents of the respective expected files. 66 | 67 | ## How to write a customized test 68 | 69 | Sometimes the auto-generated test is not exactly what you need. In this case, you can simply create a customized test on your own: 70 | 71 | 1. Create a directory with a suitable name, e.g.: `tests/sequence.d`. 72 | 2. Create one or more `*.bats` files in this directory. 73 | 3. Specify a custom test in the bats file. 74 | 75 | The test script will notice the customized files and will not generate a generic one. 76 | However, you can still reuse as much of the common code as you want simply by loading `tests/utils.sh` 77 | and calling the prepared functions. See [calc.d/calc.bats](calc.d/calc.bats) as an example. 78 | 79 | ## How to skip a test input 80 | 81 | *Note: This paragraph applies only to automatically generated tests. For customized tests, 82 | just add `skip` directive to your* `*.bats` *file as needed.* 83 | 84 | Sometimes it is useful to skip a test input, for example to avoid an input that triggers a known bug 85 | that has not yet been fixed. To do so, simply rename the input file to `input*.skip.txt`. 86 | 87 | If you want to skip all test inputs in the directory, rename the grammar file to `input.skip.peg`. 88 | -------------------------------------------------------------------------------- /tests/ascii.d/ascii.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing ascii.d - generation" { 6 | PACKCC_OPTS=("--ascii") 7 | test_generate 8 | } 9 | 10 | @test "Testing ascii.d - check code" { 11 | ! in_source "pcc_get_char_as_utf32" 12 | } 13 | 14 | @test "Testing ascii.d - compilation" { 15 | test_compile 16 | } 17 | @test "Testing ascii.d - run" { 18 | run_for_input "ascii.d/input.txt" 19 | } 20 | -------------------------------------------------------------------------------- /tests/ascii.d/expected.txt: -------------------------------------------------------------------------------- 1 | This 2 | is 3 | a 4 | test 5 | -------------------------------------------------------------------------------- /tests/ascii.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- WORD / SPACE 2 | WORD <- [^ \r\n\t]+ { PRINT($0); } 3 | SPACE <- [ \r\n\t]+ 4 | -------------------------------------------------------------------------------- /tests/ascii.d/input.txt: -------------------------------------------------------------------------------- 1 | This is a test 2 | -------------------------------------------------------------------------------- /tests/ast-calc.d/ast-calc.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing ast-calc.d - generation" { 6 | test_generate "$ROOTDIR/examples/ast-calc.peg" 7 | } 8 | 9 | @test "Testing ast-calc.d - compilation" { 10 | ${CC:-cc} ast-calc.d/parser.c -o ast-calc.d/parser 11 | } 12 | 13 | @test "Testing ast-calc.d - run" { 14 | run_for_input ast-calc.d/input.txt 15 | } 16 | -------------------------------------------------------------------------------- /tests/ast-calc.d/expected.txt: -------------------------------------------------------------------------------- 1 | binary: "+" 2 | nullary: "1" 3 | binary: "*" 4 | nullary: "2" 5 | binary: "+" 6 | nullary: "3" 7 | binary: "*" 8 | nullary: "4" 9 | binary: "+" 10 | nullary: "5" 11 | nullary: "6" 12 | binary: "/" 13 | binary: "*" 14 | binary: "*" 15 | binary: "*" 16 | nullary: "5" 17 | nullary: "6" 18 | nullary: "7" 19 | nullary: "8" 20 | binary: "*" 21 | binary: "*" 22 | binary: "*" 23 | nullary: "1" 24 | nullary: "2" 25 | nullary: "3" 26 | nullary: "4" 27 | -------------------------------------------------------------------------------- /tests/ast-calc.d/input.txt: -------------------------------------------------------------------------------- 1 | 1+2*(3+4*(5+6)) 2 | 5*6*7*8/(1*2*3*4) 3 | -------------------------------------------------------------------------------- /tests/ast-calc.v3.d/ast-calc.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing ast-calc.v3.d - generation" { 6 | test_generate "$ROOTDIR/examples/ast-calc.v3.peg" 7 | } 8 | 9 | @test "Testing ast-calc.v3.d - compilation" { 10 | ${CC:-cc} ast-calc.v3.d/parser.c -o ast-calc.v3.d/parser 11 | } 12 | 13 | @test "Testing ast-calc.v3.d - run" { 14 | run_for_input ast-calc.v3.d/input.txt 15 | } 16 | -------------------------------------------------------------------------------- /tests/ast-calc.v3.d/expected.txt: -------------------------------------------------------------------------------- 1 | binary: "+" 2 | nullary: "1" 3 | binary: "*" 4 | nullary: "2" 5 | binary: "+" 6 | nullary: "3" 7 | binary: "*" 8 | nullary: "4" 9 | binary: "+" 10 | nullary: "5" 11 | nullary: "6" 12 | binary: "/" 13 | binary: "*" 14 | binary: "*" 15 | binary: "*" 16 | nullary: "5" 17 | nullary: "6" 18 | nullary: "7" 19 | nullary: "8" 20 | binary: "*" 21 | binary: "*" 22 | binary: "*" 23 | nullary: "1" 24 | nullary: "2" 25 | nullary: "3" 26 | nullary: "4" 27 | -------------------------------------------------------------------------------- /tests/ast-calc.v3.d/input.txt: -------------------------------------------------------------------------------- 1 | 1+2*(3+4*(5+6)) 2 | 5*6*7*8/(1*2*3*4) 3 | -------------------------------------------------------------------------------- /tests/basic.d/expected.txt: -------------------------------------------------------------------------------- 1 | This 2 | is 3 | a 4 | test 5 | -------------------------------------------------------------------------------- /tests/basic.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- WORD / SPACE 2 | WORD <- [^ \r\n\t]+ { PRINT($0); } 3 | SPACE <- [ \r\n\t]+ 4 | -------------------------------------------------------------------------------- /tests/basic.d/input.txt: -------------------------------------------------------------------------------- 1 | This is a test 2 | -------------------------------------------------------------------------------- /tests/blank_lines.d/input.peg: -------------------------------------------------------------------------------- 1 | %header { 2 | 3 | 4 | /* HEADER 0 */ 5 | 6 | 7 | } 8 | 9 | %source { 10 | 11 | 12 | /* SOURCE 0 */ 13 | 14 | 15 | } 16 | 17 | %common { 18 | 19 | 20 | /* COMMON 0 */ 21 | 22 | 23 | } 24 | 25 | %earlyheader { 26 | 27 | 28 | /* EARLY HEADER 0 */ 29 | 30 | 31 | } 32 | 33 | %earlysource { 34 | 35 | 36 | /* EARLY SOURCE 0 */ 37 | 38 | 39 | } 40 | 41 | %earlycommon { 42 | 43 | 44 | /* EARLY COMMON 0 */ 45 | 46 | 47 | } 48 | 49 | rule0 <- { 50 | 51 | 52 | /* ACTION 0 */ 53 | 54 | 55 | } { 56 | 57 | 58 | /* ACTION 1 */ 59 | 60 | 61 | } 62 | 63 | %header { 64 | 65 | 66 | /* HEADER 1 */ 67 | 68 | 69 | } 70 | 71 | %source { 72 | 73 | 74 | /* SOURCE 1 */ 75 | 76 | 77 | } 78 | 79 | %common { 80 | 81 | 82 | /* COMMON 1 */ 83 | 84 | 85 | } 86 | 87 | %earlyheader { 88 | 89 | 90 | /* EARLY HEADER 1 */ 91 | 92 | 93 | } 94 | 95 | %earlysource { 96 | 97 | 98 | /* EARLY SOURCE 1 */ 99 | 100 | 101 | } 102 | 103 | %earlycommon { 104 | 105 | 106 | /* EARLY COMMON 1 */ 107 | 108 | 109 | } 110 | 111 | rule1 <- { 112 | 113 | 114 | /* ACTION 2 */ 115 | 116 | 117 | } 118 | 119 | %% 120 | 121 | 122 | /* FOOTER */ 123 | 124 | 125 | -------------------------------------------------------------------------------- /tests/blank_lines.d/lines.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | is_newline_terminated() { 6 | cat "$BATS_TEST_DIRNAME/$1" | tail -n1 | wc -l | grep 1 7 | } 8 | 9 | get_last_line() { 10 | cat "$BATS_TEST_DIRNAME/$1" | wc -l 11 | # NOTE: The input file must be terminated with a newline character. 12 | } 13 | 14 | extract_line() { 15 | sed -n "$1"p "$BATS_TEST_DIRNAME/$2" 16 | } 17 | 18 | count_blank_lines() { 19 | grep -c $'^[ \t\v\f]*$' 20 | } 21 | 22 | check_if_enclosed_by_blank_lines() { 23 | LINE=$(get_line "$1" "$2") 24 | if [ "$LINE" -gt 1 ]; then 25 | [ "$LINE" -ne 2 ] 26 | [ $(extract_line $((LINE - 1)) "$2" | count_blank_lines) -eq 1 ] 27 | [ $(extract_line $((LINE - 2)) "$2" | count_blank_lines) -eq 0 ] 28 | fi 29 | TAIL=$(get_last_line "$2") 30 | if [ "$LINE" -lt "$TAIL" ]; then 31 | [ "$LINE" -ne $((TAIL - 1)) ] 32 | [ $(extract_line $((LINE + 1)) "$2" | count_blank_lines) -eq 1 ] 33 | [ $(extract_line $((LINE + 2)) "$2" | count_blank_lines) -eq 0 ] 34 | fi 35 | } 36 | 37 | check_if_not_enclosed_by_blank_lines() { 38 | LINE=$(get_line "$1" "$2") 39 | if [ "$LINE" -gt 1 ]; then 40 | [ $(extract_line $((LINE - 1)) "$2" | count_blank_lines) -eq 0 ] 41 | fi 42 | TAIL=$(get_last_line "$2") 43 | if [ "$LINE" -lt "$TAIL" ]; then 44 | [ $(extract_line $((LINE + 1)) "$2" | count_blank_lines) -eq 0 ] 45 | fi 46 | } 47 | 48 | @test "Testing blank_lines.d - generation" { 49 | test_generate 50 | } 51 | 52 | @test "Testing blank_lines.d - header" { 53 | is_newline_terminated parser.h 54 | check_if_enclosed_by_blank_lines "HEADER 0" parser.h 55 | check_if_enclosed_by_blank_lines "HEADER 1" parser.h 56 | check_if_enclosed_by_blank_lines "COMMON 0" parser.h 57 | check_if_enclosed_by_blank_lines "COMMON 1" parser.h 58 | check_if_enclosed_by_blank_lines "EARLY HEADER 0" parser.h 59 | check_if_enclosed_by_blank_lines "EARLY HEADER 1" parser.h 60 | check_if_enclosed_by_blank_lines "EARLY COMMON 0" parser.h 61 | check_if_enclosed_by_blank_lines "EARLY COMMON 1" parser.h 62 | } 63 | 64 | @test "Testing blank_lines.d - source" { 65 | is_newline_terminated parser.c 66 | check_if_enclosed_by_blank_lines "SOURCE 0" parser.c 67 | check_if_enclosed_by_blank_lines "SOURCE 1" parser.c 68 | check_if_enclosed_by_blank_lines "COMMON 0" parser.c 69 | check_if_enclosed_by_blank_lines "COMMON 1" parser.c 70 | check_if_enclosed_by_blank_lines "EARLY SOURCE 0" parser.c 71 | check_if_enclosed_by_blank_lines "EARLY SOURCE 1" parser.c 72 | check_if_enclosed_by_blank_lines "EARLY COMMON 0" parser.c 73 | check_if_enclosed_by_blank_lines "EARLY COMMON 1" parser.c 74 | check_if_enclosed_by_blank_lines "FOOTER" parser.c 75 | check_if_not_enclosed_by_blank_lines "ACTION 0" parser.c 76 | check_if_not_enclosed_by_blank_lines "ACTION 1" parser.c 77 | check_if_not_enclosed_by_blank_lines "ACTION 2" parser.c 78 | } 79 | -------------------------------------------------------------------------------- /tests/calc.d/calc.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing calc.d - generation" { 6 | test_generate "$ROOTDIR/examples/calc.peg" 7 | } 8 | 9 | @test "Testing calc.d - compilation" { 10 | ${CC:-cc} calc.d/parser.c -o calc.d/parser 11 | } 12 | 13 | @test "Testing calc.d - run" { 14 | run_for_input calc.d/input.txt 15 | } 16 | -------------------------------------------------------------------------------- /tests/calc.d/expected.txt: -------------------------------------------------------------------------------- 1 | answer=2 2 | answer=6 3 | answer=4 4 | answer=-2 5 | -------------------------------------------------------------------------------- /tests/calc.d/input.txt: -------------------------------------------------------------------------------- 1 | 1+1 2 | 2*3 3 | 12 / (1+2) 4 | 24 / -4 / 3 5 | -------------------------------------------------------------------------------- /tests/captures.d/expected.txt: -------------------------------------------------------------------------------- 1 | predicate $6 = 132,137:capt6 2 | predicate $8 = 138,143:capt8 3 | 0,11:capture all 4 | 12,19:capture 5 | 34,40:second 6 | 59,64:words 7 | 83,92:something 8 | 93,97:more 98,106:captures 9 | 116,121:again 10 | action $6 = 132,137:capt6 11 | action $8 = 138,143:capt8 12 | -------------------------------------------------------------------------------- /tests/captures.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- (LINE '\n')* 2 | LINE <- ALL / FIRST / SECOND / MULTI1 / MULTI2 / MORE / CAPTURED / ALTERNATE / UNKNOWN 3 | 4 | ALL <- <'capture' _ 'all'> { printf("%d,%d:%s\n", (int)$1s, (int)$1e, $1); } 5 | FIRST <- <'capture'> _ 'first' { printf("%d,%d:%s\n", (int)$1s, (int)$1e, $1); } 6 | SECOND <- 'capture' _ <'second'> { printf("%d,%d:%s\n", (int)$1s, (int)$1e, $1); } 7 | MULTI1 <- 'capture' _ 'multiple1' (_ <[a-z]+>)+ { printf("%d,%d:%s\n", (int)$1s, (int)$1e, $1); } 8 | MULTI2 <- 'capture' _ 'multiple2' (_ <[a-z]+> { printf("%d,%d:%s\n", (int)$1s, (int)$1e, $1); })+ 9 | MORE <- <'more'> _ <'captures'> { printf("%d,%d:%s %d,%d:%s\n", (int)$1s, (int)$1e, $1, (int)$2s, (int)$2e, $2); } 10 | CAPTURED <- "captured" _ <[a-z]+> _ 'and' _ $1 { printf("%d,%d:%s\n", (int)$1s, (int)$1e, $1); } 11 | ALTERNATE <- < 'capt1' > / ( < 'capt2' > _ ( < 'capt3' > / < 'capt4' > ) _ < 'capt5' > / < 'capt6' > _ ( < 'capt7' > / < 'capt8' > 12 | &{ 13 | printf("predicate $6 = %d,%d:%s\n", (int)$6s, (int)$6e, $6 ? $6 : "(null)"); 14 | printf("predicate $8 = %d,%d:%s\n", (int)$8s, (int)$8e, $8 ? $8 : "(null)"); 15 | } 16 | { 17 | printf("action $6 = %d,%d:%s\n", (int)$6s, (int)$6e, $6 ? $6 : "(null)"); 18 | printf("action $8 = %d,%d:%s\n", (int)$8s, (int)$8e, $8 ? $8 : "(null)"); 19 | } 20 | ) _ < 'capt9' > ) 21 | 22 | UNKNOWN <- [^\n]+ { printf("ERROR: %s\n", $0); } 23 | _ <- " " 24 | -------------------------------------------------------------------------------- /tests/captures.d/input.txt: -------------------------------------------------------------------------------- 1 | capture all 2 | capture first 3 | capture second 4 | capture multiple1 words 5 | capture multiple2 something 6 | more captures 7 | captured again and again 8 | capt6 capt8 capt9 9 | -------------------------------------------------------------------------------- /tests/character_classes_0.d/expected.txt: -------------------------------------------------------------------------------- 1 | CLASS0: '^' 2 | CLASS0: '-' 3 | CLASS1: '\' 4 | CLASS1: ']' 5 | CLASS2: 'ぬ' 6 | CLASS3: '𝓴' 7 | CLASS4: 'J' 8 | CLASS5: '7' 9 | -------------------------------------------------------------------------------- /tests/character_classes_0.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- TOKEN (_ TOKEN)* _* 2 | TOKEN <- CLASS0 / CLASS1 / CLASS2 / CLASS3 / CLASS4 / CLASS5 3 | CLASS0 <- [\^a-z-] { printf("CLASS0: '%s'\n", $0); } 4 | CLASS1 <- [\\\]] { printf("CLASS1: '%s'\n", $0); } 5 | CLASS2 <- [-あ-ん] { printf("CLASS2: '%s'\n", $0); } 6 | CLASS3 <- [𝓪-𝔃-] { printf("CLASS3: '%s'\n", $0); } 7 | CLASS4 <- [^0-9] { printf("CLASS4: '%s'\n", $0); } 8 | CLASS5 <- [^] { printf("CLASS5: '%s'\n", $0); } 9 | _ <- ' ' / '\n' 10 | -------------------------------------------------------------------------------- /tests/character_classes_0.d/input.txt: -------------------------------------------------------------------------------- 1 | ^ - \ ] ぬ 𝓴 J 7 2 | -------------------------------------------------------------------------------- /tests/character_classes_1.d/expected-utf8.txt: -------------------------------------------------------------------------------- 1 | UNICODE: '€' 2 | UNICODE: '£' 3 | ESCAPED: chr(10) 4 | -------------------------------------------------------------------------------- /tests/character_classes_1.d/expected.txt: -------------------------------------------------------------------------------- 1 | LOWER: 'a' 2 | LOWER: 'b' 3 | LOWER: 'c' 4 | LOWER: 'z' 5 | ESCAPED: chr(10) 6 | UPPER: 'Z' 7 | UPPER: 'Y' 8 | UPPER: 'X' 9 | UPPER: 'A' 10 | ESCAPED: chr(10) 11 | DIGIT: '4' 12 | DIGIT: '5' 13 | DIGIT: '6' 14 | ESCAPED: chr(10) 15 | SPACE: ' ' 16 | ESCAPED: chr(9) 17 | CARET: '^' 18 | OTHER: '_' 19 | OTHER: '-' 20 | OTHER: '@' 21 | SPECIAL: '/' 22 | ESCAPED: '\' 23 | ESCAPED: chr(10) 24 | -------------------------------------------------------------------------------- /tests/character_classes_1.d/input-utf8.txt: -------------------------------------------------------------------------------- 1 | €£ 2 | -------------------------------------------------------------------------------- /tests/character_classes_1.d/input.peg: -------------------------------------------------------------------------------- 1 | %source { 2 | void print_esc(const char* type, const char* ch) { 3 | if (strncmp(type, "UNICODE", 7) == 0) 4 | printf("%s: '%s'\n", type, ch); 5 | else if (*ch >= '\x20' && *ch < '\x7f') 6 | printf("%s: '%c'\n", type, *ch); 7 | else 8 | printf("%s: chr(%u)\n", type, *ch); 9 | } 10 | } 11 | 12 | FILE <- (LOWER_LETTER / UPPER_LETTER / DIGIT / UNICODE / ESCAPED / CARET / SPACE / SPECIAL / OTHER)* 13 | 14 | # simple character class 15 | LOWER_LETTER <- [a-z] { print_esc("LOWER", $0); } 16 | # octal escapes (FIXME: octals do not work) 17 | # UPPER_LETTER <- [\101-\132] { print_esc("UPPER", $0); } 18 | UPPER_LETTER <- [A-Z] { print_esc("UPPER", $0); } 19 | # hexadecimal escapes 20 | DIGIT <- [\x30-\x39] { print_esc("DIGIT", $0); } 21 | # some unicode chars 22 | UNICODE <- [\u20AC\u00A3] { print_esc("UNICODE", $0); } 23 | # other ANSI escapes 24 | ESCAPED <- [\t\r\n\\\'\"] { print_esc("ESCAPED", $0); } 25 | # caret 26 | CARET <- [\^] { print_esc("CARET", $0); } 27 | # negated set 28 | SPECIAL <- [^-_@a-zA-Z0-9] { print_esc("SPECIAL", $0); } 29 | # single character class 30 | SPACE <- [ ] { print_esc("SPACE", $0); } 31 | # catch non-matched 32 | OTHER <- . { print_esc("OTHER", $0); } 33 | -------------------------------------------------------------------------------- /tests/character_classes_1.d/input.txt: -------------------------------------------------------------------------------- 1 | abcz 2 | ZYXA 3 | 456 4 | ^_-@/\ 5 | -------------------------------------------------------------------------------- /tests/character_classes_2.d/expected.txt: -------------------------------------------------------------------------------- 1 | CLASS0: 'a-' 2 | CLASS1: 'bb' 3 | CLASS2: 'c+' 4 | CLASS2: 'c-' 5 | CLASS3: 'd+' 6 | CLASS3: 'd-' 7 | CLASS4: 'ee' 8 | CLASS5: 'ff' 9 | -------------------------------------------------------------------------------- /tests/character_classes_2.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- TOKEN (_ TOKEN)* _* 2 | TOKEN <- CLASS0 / CLASS1 / CLASS2 / CLASS3 / CLASS4 / CLASS5 3 | CLASS0 <- 'a' [-] { printf("CLASS0: '%s'\n", $0); } 4 | CLASS1 <- 'b' [^-] { printf("CLASS1: '%s'\n", $0); } 5 | CLASS2 <- 'c' [-+] { printf("CLASS2: '%s'\n", $0); } 6 | CLASS3 <- 'd' [+-] { printf("CLASS3: '%s'\n", $0); } 7 | CLASS4 <- 'e' [^-+] { printf("CLASS4: '%s'\n", $0); } 8 | CLASS5 <- 'f' [^+-] { printf("CLASS5: '%s'\n", $0); } 9 | _ <- ' ' / '\n' 10 | -------------------------------------------------------------------------------- /tests/character_classes_2.d/input.txt: -------------------------------------------------------------------------------- 1 | a- bb c+ c- d+ d- ee ff 2 | -------------------------------------------------------------------------------- /tests/code_generation.d/generation.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing code_generation.d - generation" { 6 | test_generate 7 | } 8 | 9 | @test "Testing code_generation.d - compilation" { 10 | ${CC:-cc} -I "$BATS_TEST_DIRNAME" "$BATS_TEST_DIRNAME/parser.c" "$BATS_TEST_DIRNAME/main.c" -o "$BATS_TEST_DIRNAME/parser" "$@" 11 | } 12 | 13 | @test "Testing code_generation.d - earlyheader" { 14 | in_header "EARLY HEADER ONLY" 15 | ! in_source "EARLY HEADER ONLY" 16 | } 17 | 18 | @test "Testing code_generation.d - earlycommon" { 19 | in_header "EARLY HEADER AND SOURCE" 20 | in_source "EARLY HEADER AND SOURCE" 21 | } 22 | 23 | @test "Testing code_generation.d - earlysource" { 24 | ! in_header "EARLY SOURCE ONLY" 25 | in_source "EARLY SOURCE ONLY" 26 | } 27 | 28 | @test "Testing code_generation.d - header" { 29 | in_header "HEADER ONLY" 30 | ! in_source "HEADER ONLY" 31 | } 32 | 33 | @test "Testing code_generation.d - common" { 34 | in_header "HEADER AND SOURCE" 35 | in_source "HEADER AND SOURCE" 36 | } 37 | 38 | @test "Testing code_generation.d - source" { 39 | ! in_header "custom_function" 40 | in_source "custom_function" 41 | } 42 | 43 | @test "Testing code_generation.d - post-source" { 44 | ! in_header "SOURCE AFTER GENERATED CODE" 45 | in_source "SOURCE AFTER GENERATED CODE" 46 | } 47 | 48 | @test "Testing code_generation.d - value" { 49 | in_header "int my_parse(my_context_t *ctx, double *ret)" 50 | in_source "int my_parse(my_context_t *ctx, double *ret)" 51 | } 52 | 53 | @test "Testing code_generation.d - auxil" { 54 | in_header "my_context_t *my_create(long auxil)" 55 | in_source "my_context_t *my_create(long auxil)" 56 | in_source "typedef long pcc_auxil_t" 57 | } 58 | 59 | @test "Testing code_generation.d - prefix" { 60 | in_header "typedef struct my_context_tag my_context_t" 61 | in_header "my_context_t *my_create(long auxil)" 62 | in_header "int my_parse(my_context_t *ctx, double *ret)" 63 | in_header "void my_destroy(my_context_t *ctx)" 64 | 65 | in_source "my_context_t *my_create(long auxil)" 66 | in_source "int my_parse(my_context_t *ctx, double *ret)" 67 | in_source "void my_destroy(my_context_t *ctx)" 68 | 69 | ! in_header "pcc_" 70 | ! in_source "pcc_context_t" 71 | ! in_source "pcc_create" 72 | ! in_source "pcc_parse" 73 | ! in_source "pcc_destroy" 74 | } 75 | 76 | @test "Testing code_generation.d - header ordering" { 77 | EARLYHEADER=$(get_line "EARLY HEADER ONLY" parser.h) 78 | EARLYCOMMON=$(get_line "EARLY HEADER AND SOURCE" parser.h) 79 | IFNDEF=$(get_line "#ifndef PCC_INCLUDED_PARSER_H" parser.h) 80 | HEADER=$(get_line "HEADER ONLY" parser.h) 81 | COMMON=$(get_line "HEADER AND SOURCE" parser.h) 82 | API=$(get_line "my_create" parser.h) 83 | 84 | [ "$EARLYHEADER" -lt "$EARLYCOMMON" ] 85 | [ "$EARLYCOMMON" -lt "$IFNDEF" ] 86 | [ "$HEADER" -lt "$COMMON" ] 87 | [ "$COMMON" -lt "$API" ] 88 | } 89 | 90 | @test "Testing code_generation.d - source ordering" { 91 | EARLYSOURCE=$(get_line "EARLY SOURCE ONLY" parser.c) 92 | EARLYCOMMON=$(get_line "EARLY HEADER AND SOURCE" parser.c) 93 | STDINCLUDE=$(get_line '#include ' parser.c) 94 | INCLUDE=$(get_line '#include "parser.h"' parser.c) 95 | SOURCE=$(get_line "custom_function" parser.c) 96 | COMMON=$(get_line "HEADER AND SOURCE" parser.c) 97 | GENERATED_START=$(get_line "#define PCC_BUFFER_MIN_SIZE" parser.c) 98 | GENERATED_END=$(get_line "my_destroy" parser.c) 99 | POST_SOURCE=$(get_line "SOURCE AFTER GENERATED CODE" parser.c) 100 | 101 | [ "$EARLYSOURCE" -lt "$EARLYCOMMON" ] 102 | [ "$EARLYCOMMON" -lt "$STDINCLUDE" ] 103 | [ "$INCLUDE" -lt "$SOURCE" ] 104 | [ "$SOURCE" -lt "$COMMON" ] 105 | [ "$COMMON" -lt "$GENERATED_START" ] 106 | [ "$GENERATED_START" -lt "$GENERATED_END" ] 107 | [ "$GENERATED_END" -lt "$POST_SOURCE" ] 108 | } 109 | -------------------------------------------------------------------------------- /tests/code_generation.d/input.peg: -------------------------------------------------------------------------------- 1 | %header { /* HEADER ONLY */ } 2 | 3 | %source { 4 | int custom_function(void) { 5 | return 42; 6 | } 7 | } 8 | 9 | %common { 10 | /* HEADER AND SOURCE */ 11 | } 12 | 13 | %earlyheader { /* EARLY HEADER ONLY */ } 14 | 15 | %earlysource { /* EARLY SOURCE ONLY */ } 16 | 17 | %earlycommon { 18 | /* EARLY HEADER AND SOURCE */ 19 | } 20 | 21 | %value "double" 22 | 23 | %auxil "long" 24 | 25 | %prefix "my" 26 | 27 | FILE <- .* 28 | 29 | %% 30 | 31 | /* SOURCE AFTER GENERATED CODE */ 32 | -------------------------------------------------------------------------------- /tests/code_generation.d/main.c: -------------------------------------------------------------------------------- 1 | #include "parser.h" 2 | 3 | int main(int argc, char **argv) { 4 | double ret; 5 | my_context_t *ctx = my_create(0); 6 | while (my_parse(ctx, &ret)); 7 | my_destroy(ctx); 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /tests/code_indentation.d/code.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | check_output_parser() { 6 | diff --strip-trailing-cr -uN "${1/parser/expected}.txt" --label "${1/parser/expected}" <(grep 'TEST:' < "$1") --label "output" 7 | } 8 | 9 | @test "Testing code_indentation.d - generation" { 10 | test_generate 11 | } 12 | 13 | @test "Testing code_indentation.d - indentation" { 14 | check_output_parser "code_indentation.d/parser.c" 15 | } 16 | -------------------------------------------------------------------------------- /tests/code_indentation.d/expected.c.txt: -------------------------------------------------------------------------------- 1 | TEST:D 0S 2 | TEST:D 1S-0 3 | TEST:D 1S-1 4 | TEST:D 2S-0 5 | TEST:D 2S-1 6 | TEST:D 2S-2 7 | TEST:D 3S-0 8 | TEST:D 3S-1 9 | TEST:D 4S-0 10 | TEST:D 4S-1 11 | TEST:D 4S-2 12 | /* TEST:D 5C */ 13 | /* TEST:D 6C-0 14 | TEST:D 6C-1 */ 15 | /* TEST:D 7C-0 16 | TEST:D 7C-1 17 | TEST:D 7C-2 */ 18 | /* TEST:D 8C-0 19 | TEST:D 8C-1 */ 20 | /* TEST:D 9C-0 21 | TEST:D 9C-1 22 | TEST:D 9C-2 */ 23 | # TEST:D aD-0 24 | TEST:D aS-0 25 | TEST:D bS-0 26 | # TEST:D bD-1 27 | TEST:D bS-1 28 | # TEST:D cD-0 \ 29 | TEST:D cD-1 30 | # TEST:D dD-0 \ 31 | TEST:D dD-1 32 | TEST:D dS-0 33 | TEST:A 0S 34 | TEST:A 1S-0 35 | TEST:A 1S-1 36 | TEST:A 2S-0 37 | TEST:A 2S-1 38 | TEST:A 2S-2 39 | TEST:A 3S-0 40 | TEST:A 3S-1 41 | TEST:A 4S-0 42 | TEST:A 4S-1 43 | TEST:A 4S-2 44 | /* TEST:A 5C */ 45 | /* TEST:A 6C-0 46 | TEST:A 6C-1 */ 47 | /* TEST:A 7C-0 48 | TEST:A 7C-1 49 | TEST:A 7C-2 */ 50 | /* TEST:A 8C-0 51 | TEST:A 8C-1 */ 52 | /* TEST:A 9C-0 53 | TEST:A 9C-1 54 | TEST:A 9C-2 */ 55 | # TEST:A aD-0 56 | TEST:A aS-0 57 | TEST:A bS-0 58 | # TEST:A bD-0 59 | TEST:A bS-1 60 | # TEST:A cD-0 \ 61 | TEST:A cD-1 62 | # TEST:A dD-0 \ 63 | TEST:A dD-1 64 | TEST:A dS-0 65 | TEST:F 0S-0 66 | # TEST:F 0D-0 \ 67 | TEST:F 0D-1 \ 68 | TEST:F 0D-2 69 | -------------------------------------------------------------------------------- /tests/code_indentation.d/input.peg: -------------------------------------------------------------------------------- 1 | %source { TEST:D 0S } 2 | %source { TEST:D 1S-0 3 | TEST:D 1S-1 } 4 | %source { TEST:D 2S-0 5 | TEST:D 2S-1 6 | TEST:D 2S-2 } 7 | %source { 8 | TEST:D 3S-0 9 | TEST:D 3S-1 10 | } 11 | %source { 12 | TEST:D 4S-0 13 | TEST:D 4S-1 14 | TEST:D 4S-2} 15 | 16 | %source { /* TEST:D 5C */ } 17 | %source { /* TEST:D 6C-0 18 | TEST:D 6C-1 */ } 19 | %source { /* TEST:D 7C-0 20 | TEST:D 7C-1 21 | TEST:D 7C-2 */ } 22 | %source { 23 | /* TEST:D 8C-0 24 | TEST:D 8C-1 */ 25 | } 26 | %source { 27 | /* TEST:D 9C-0 28 | TEST:D 9C-1 29 | TEST:D 9C-2 */} 30 | 31 | %source { # TEST:D aD-0 32 | TEST:D aS-0 } 33 | %source { TEST:D bS-0 34 | # TEST:D bD-1 35 | TEST:D bS-1 } 36 | %source { 37 | # TEST:D cD-0 \ 38 | TEST:D cD-1 39 | } 40 | %source { 41 | # TEST:D dD-0 \ 42 | TEST:D dD-1 43 | TEST:D dS-0} 44 | 45 | rule <- 46 | { TEST:A 0S } 47 | { TEST:A 1S-0 48 | TEST:A 1S-1 } 49 | { TEST:A 2S-0 50 | TEST:A 2S-1 51 | TEST:A 2S-2 } 52 | { 53 | TEST:A 3S-0 54 | TEST:A 3S-1 55 | } 56 | { 57 | TEST:A 4S-0 58 | TEST:A 4S-1 59 | TEST:A 4S-2} 60 | 61 | { /* TEST:A 5C */ } 62 | { /* TEST:A 6C-0 63 | TEST:A 6C-1 */ } 64 | { /* TEST:A 7C-0 65 | TEST:A 7C-1 66 | TEST:A 7C-2 */ } 67 | { 68 | /* TEST:A 8C-0 69 | TEST:A 8C-1 */ 70 | } 71 | { 72 | /* TEST:A 9C-0 73 | TEST:A 9C-1 74 | TEST:A 9C-2 */} 75 | 76 | { # TEST:A aD-0 77 | TEST:A aS-0 } 78 | { TEST:A bS-0 79 | # TEST:A bD-0 80 | TEST:A bS-1 } 81 | { 82 | # TEST:A cD-0 \ 83 | TEST:A cD-1 84 | } 85 | { 86 | # TEST:A dD-0 \ 87 | TEST:A dD-1 88 | TEST:A dS-0} 89 | 90 | %%TEST:F 0S-0 91 | # TEST:F 0D-0 \ 92 | TEST:F 0D-1 \ 93 | TEST:F 0D-2 94 | -------------------------------------------------------------------------------- /tests/code_line_continuation.d/dump.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing code_line_continuation.d - generation" { 6 | run "$PACKCC" --debug -o "code_line_continuation.d/parser" "code_line_continuation.d/input.peg" 2>&1 7 | check_output "code_line_continuation.d/expected.txt" 8 | } 9 | -------------------------------------------------------------------------------- /tests/code_line_continuation.d/expected.txt: -------------------------------------------------------------------------------- 1 | Rule(name:'rule', ref:0, vars.len:0, capts.len:0, codes.len:1, preds.len:0) { 2 | Sequence(max:2, len:2) { 3 | Charclass(value:'null') 4 | Action(index:0, code:{\n #define AAA /* {\n */ { \\\n aaa \"//\" /*\n {\n {\n */{ \"/*\"\n // bbb \\\n {\\\n {\n }, vars:none) 5 | } 6 | } 7 | value_type: 'int' 8 | auxil_type: 'void *' 9 | prefix: 'pcc' 10 | -------------------------------------------------------------------------------- /tests/code_line_continuation.d/input.peg: -------------------------------------------------------------------------------- 1 | rule 2 | <- . 3 | { 4 | #define AAA /* { 5 | */ { \ 6 | aaa "//" /* 7 | { 8 | { 9 | */{ "/*" 10 | // bbb \ 11 | {\ 12 | { 13 | } 14 | -------------------------------------------------------------------------------- /tests/debug_macro.d/expected.txt: -------------------------------------------------------------------------------- 1 | Evaluating rule TOP @0 [A] 2 | Evaluating rule RULE_A @0 [A] 3 | Matched rule RULE_A @0 [Aaa] 4 | Evaluating rule EOL @3 [ 5 | ] 6 | Matched rule EOL @3 [ 7 | ] 8 | Matched rule TOP @0 [Aaa 9 | ] 10 | A: Aaa 11 | Evaluating rule TOP @0 [B] 12 | Evaluating rule RULE_A @0 [B] 13 | Abandoning rule RULE_A @0 [] 14 | Evaluating rule RULE_B @0 [B] 15 | Evaluating rule RULE_B1 @0 [B] 16 | Evaluating rule RULE_A @2 [C] 17 | Abandoning rule RULE_A @2 [] 18 | Abandoning rule RULE_B1 @0 [BB] 19 | Evaluating rule RULE_B2 @0 [BBC] 20 | Evaluating rule RULE_C @2 [C] 21 | Evaluating rule RULE_A @5 [b] 22 | Abandoning rule RULE_A @5 [] 23 | Evaluating rule RULE_B @5 [b] 24 | Evaluating rule RULE_B1 @5 [b] 25 | Evaluating rule RULE_A @7 [C] 26 | Abandoning rule RULE_A @7 [] 27 | Abandoning rule RULE_B1 @5 [bb] 28 | Evaluating rule RULE_B2 @5 [bbC] 29 | Evaluating rule RULE_C @7 [C] 30 | Evaluating rule RULE_A @9 [B] 31 | Abandoning rule RULE_A @9 [] 32 | Evaluating rule RULE_B @9 [B] 33 | Evaluating rule RULE_B1 @9 [B] 34 | Evaluating rule RULE_A @13 [ 35 | ] 36 | Abandoning rule RULE_A @13 [] 37 | Abandoning rule RULE_B1 @9 [Bbbb] 38 | Evaluating rule RULE_B2 @9 [Bbbb 39 | ] 40 | Evaluating rule RULE_C @13 [ 41 | ] 42 | Abandoning rule RULE_C @13 [] 43 | Matched rule RULE_B2 @9 [Bbbb] 44 | Matched rule RULE_B @9 [Bbbb] 45 | Matched rule RULE_C @7 [CCBbbb] 46 | Matched rule RULE_B2 @5 [bbCCBbbb] 47 | Matched rule RULE_B @5 [bbCCBbbb] 48 | Matched rule RULE_C @2 [CccbbCCBbbb] 49 | Matched rule RULE_B2 @0 [BBCccbbCCBbbb] 50 | Matched rule RULE_B @0 [BBCccbbCCBbbb] 51 | Evaluating rule EOL @13 [ 52 | ] 53 | Matched rule EOL @13 [ 54 | ] 55 | Matched rule TOP @0 [BBCccbbCCBbbb 56 | ] 57 | B2: Bbbb 58 | B: Bbbb 59 | C: CCBbbb 60 | B2: bbCCBbbb 61 | B: bbCCBbbb 62 | C: CccbbCCBbbb 63 | B2: BBCccbbCCBbbb 64 | B: BBCccbbCCBbbb 65 | -------------------------------------------------------------------------------- /tests/debug_macro.d/input.peg: -------------------------------------------------------------------------------- 1 | %source { 2 | static const char *dbg_str[] = { "Evaluating rule", "Matched rule", "Abandoning rule" }; 3 | #define PCC_DEBUG(auxil, event, rule, level, pos, buffer, length) \ 4 | fprintf(stdout, "%*s%s %s @%zu [%.*s]\n", (int)((level) * 2), "", dbg_str[event], rule, pos, (int)(length), buffer) 5 | /* NOTE: To guarantee the output order, stderr, which can lead a race condition with stdout, is not used. */ 6 | } 7 | 8 | TOP <- (RULE_A / RULE_B) EOL 9 | RULE_A <- [Aa]+ { PRINT_L("A", $0); } 10 | RULE_B <- RULE_B1 / RULE_B2 { PRINT_L("B", $0); } 11 | RULE_B1 <- [Bb]+ RULE_A { PRINT_L("B1", $0); } 12 | RULE_B2 <- [Bb]+ RULE_C? { PRINT_L("B2", $0); } 13 | RULE_C <- [Cc]+ (RULE_A / RULE_B) { PRINT_L("C", $0); } 14 | EOL <- "\n" 15 | -------------------------------------------------------------------------------- /tests/debug_macro.d/input.txt: -------------------------------------------------------------------------------- 1 | Aaa 2 | BBCccbbCCBbbb 3 | -------------------------------------------------------------------------------- /tests/dump.d/dump.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing dump.d - generation" { 6 | run "$PACKCC" --debug -o "dump.d/parser" "$ROOTDIR/examples/calc.peg" 2>&1 7 | check_output "dump.d/expected.txt" 8 | } 9 | -------------------------------------------------------------------------------- /tests/dump.d/expected.txt: -------------------------------------------------------------------------------- 1 | Rule(name:'statement', ref:0, vars.len:1, capts.len:0, codes.len:2, preds.len:0) { 2 | Alternate(max:2, len:2) { 3 | Sequence(max:8, len:5) { 4 | Reference(var:'(null)', index:void, name:'_', rule:'_') 5 | Reference(var:'e', index:0, name:'expression', rule:'expression') 6 | Reference(var:'(null)', index:void, name:'_', rule:'_') 7 | Reference(var:'(null)', index:void, name:'EOL', rule:'EOL') 8 | Action(index:0, code:{ printf(\"answer=%d\\n\", e); }, vars: 9 | 'e' 10 | ) 11 | } 12 | Sequence(max:4, len:3) { 13 | Quantity(min:0, max:-1) { 14 | Sequence(max:2, len:2) { 15 | Predicate(neg:1) { 16 | Reference(var:'(null)', index:void, name:'EOL', rule:'EOL') 17 | } 18 | Charclass(value:'null') 19 | } 20 | } 21 | Reference(var:'(null)', index:void, name:'EOL', rule:'EOL') 22 | Action(index:1, code:{ printf(\"error\\n\"); }, vars:none) 23 | } 24 | } 25 | } 26 | Rule(name:'expression', ref:2, vars.len:1, capts.len:0, codes.len:1, preds.len:0) { 27 | Sequence(max:2, len:2) { 28 | Reference(var:'e', index:0, name:'term', rule:'term') 29 | Action(index:0, code:{ $$ = e; }, vars: 30 | 'e' 31 | ) 32 | } 33 | } 34 | Rule(name:'term', ref:3, vars.len:3, capts.len:0, codes.len:3, preds.len:0) { 35 | Alternate(max:4, len:3) { 36 | Sequence(max:8, len:6) { 37 | Reference(var:'l', index:0, name:'term', rule:'term') 38 | Reference(var:'(null)', index:void, name:'_', rule:'_') 39 | String(value:'+') 40 | Reference(var:'(null)', index:void, name:'_', rule:'_') 41 | Reference(var:'r', index:1, name:'factor', rule:'factor') 42 | Action(index:0, code:{ $$ = l + r; }, vars: 43 | 'l' 44 | 'r' 45 | ) 46 | } 47 | Sequence(max:8, len:6) { 48 | Reference(var:'l', index:0, name:'term', rule:'term') 49 | Reference(var:'(null)', index:void, name:'_', rule:'_') 50 | String(value:'-') 51 | Reference(var:'(null)', index:void, name:'_', rule:'_') 52 | Reference(var:'r', index:1, name:'factor', rule:'factor') 53 | Action(index:1, code:{ $$ = l - r; }, vars: 54 | 'l' 55 | 'r' 56 | ) 57 | } 58 | Sequence(max:2, len:2) { 59 | Reference(var:'e', index:2, name:'factor', rule:'factor') 60 | Action(index:2, code:{ $$ = e; }, vars: 61 | 'e' 62 | ) 63 | } 64 | } 65 | } 66 | Rule(name:'factor', ref:5, vars.len:3, capts.len:0, codes.len:3, preds.len:0) { 67 | Alternate(max:4, len:3) { 68 | Sequence(max:8, len:6) { 69 | Reference(var:'l', index:0, name:'factor', rule:'factor') 70 | Reference(var:'(null)', index:void, name:'_', rule:'_') 71 | String(value:'*') 72 | Reference(var:'(null)', index:void, name:'_', rule:'_') 73 | Reference(var:'r', index:1, name:'unary', rule:'unary') 74 | Action(index:0, code:{ $$ = l * r; }, vars: 75 | 'l' 76 | 'r' 77 | ) 78 | } 79 | Sequence(max:8, len:6) { 80 | Reference(var:'l', index:0, name:'factor', rule:'factor') 81 | Reference(var:'(null)', index:void, name:'_', rule:'_') 82 | String(value:'/') 83 | Reference(var:'(null)', index:void, name:'_', rule:'_') 84 | Reference(var:'r', index:1, name:'unary', rule:'unary') 85 | Action(index:1, code:{ $$ = l / r; }, vars: 86 | 'l' 87 | 'r' 88 | ) 89 | } 90 | Sequence(max:2, len:2) { 91 | Reference(var:'e', index:2, name:'unary', rule:'unary') 92 | Action(index:2, code:{ $$ = e; }, vars: 93 | 'e' 94 | ) 95 | } 96 | } 97 | } 98 | Rule(name:'unary', ref:5, vars.len:1, capts.len:0, codes.len:3, preds.len:0) { 99 | Alternate(max:4, len:3) { 100 | Sequence(max:4, len:4) { 101 | String(value:'+') 102 | Reference(var:'(null)', index:void, name:'_', rule:'_') 103 | Reference(var:'e', index:0, name:'unary', rule:'unary') 104 | Action(index:0, code:{ $$ = +e; }, vars: 105 | 'e' 106 | ) 107 | } 108 | Sequence(max:4, len:4) { 109 | String(value:'-') 110 | Reference(var:'(null)', index:void, name:'_', rule:'_') 111 | Reference(var:'e', index:0, name:'unary', rule:'unary') 112 | Action(index:1, code:{ $$ = -e; }, vars: 113 | 'e' 114 | ) 115 | } 116 | Sequence(max:2, len:2) { 117 | Reference(var:'e', index:0, name:'primary', rule:'primary') 118 | Action(index:2, code:{ $$ = e; }, vars: 119 | 'e' 120 | ) 121 | } 122 | } 123 | } 124 | Rule(name:'primary', ref:1, vars.len:1, capts.len:1, codes.len:2, preds.len:0) { 125 | Alternate(max:2, len:2) { 126 | Sequence(max:2, len:2) { 127 | Capture(index:0) { 128 | Quantity(min:1, max:-1) { 129 | Charclass(value:'0-9') 130 | } 131 | } 132 | Action(index:0, code:{ $$ = atoi($1); }, vars: 133 | $1 134 | ) 135 | } 136 | Sequence(max:8, len:6) { 137 | String(value:'(') 138 | Reference(var:'(null)', index:void, name:'_', rule:'_') 139 | Reference(var:'e', index:0, name:'expression', rule:'expression') 140 | Reference(var:'(null)', index:void, name:'_', rule:'_') 141 | String(value:')') 142 | Action(index:1, code:{ $$ = e; }, vars: 143 | 'e' 144 | ) 145 | } 146 | } 147 | } 148 | Rule(name:'_', ref:14, vars.len:0, capts.len:0, codes.len:0, preds.len:0) { 149 | Quantity(min:0, max:-1) { 150 | Charclass(value:' \t') 151 | } 152 | } 153 | Rule(name:'EOL', ref:3, vars.len:0, capts.len:0, codes.len:0, preds.len:0) { 154 | Alternate(max:4, len:4) { 155 | String(value:'\n') 156 | String(value:'\r\n') 157 | String(value:'\r') 158 | String(value:';') 159 | } 160 | } 161 | value_type: 'int' 162 | auxil_type: 'void *' 163 | prefix: 'calc' 164 | -------------------------------------------------------------------------------- /tests/error_action.d/expected.txt: -------------------------------------------------------------------------------- 1 | E1 failed: 1B: 2 | E2 failed: 1C:1 3 | E3 failed: 1D:12 4 | One of E[123] has failed: 2B: 5 | One of E[123] has failed: 2C:1 6 | One of E[123] has failed: 2D:12 7 | 1A:123 8 | 2A:123 9 | -------------------------------------------------------------------------------- /tests/error_action.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- ((RULE1 / RULE2 / OTHER) '\n')* 2 | 3 | RULE1 <- "1" [A-Z] ":" 4 | E1 ~{ printf("E1 failed: %s\n", $0); } 5 | E2 ~{ printf("E2 failed: %s\n", $0); } 6 | E3 ~{ printf("E3 failed: %s\n", $0); } 7 | { PRINT($0); } 8 | 9 | RULE2 <- "2" [A-Z] ":" 10 | (E1 E2 E3) ~{ printf("One of E[123] has failed: %s\n", $0); } 11 | { PRINT($0); } 12 | 13 | E1 <- "1" 14 | E2 <- "2" 15 | E3 <- "3" 16 | 17 | OTHER <- [^\n]+ 18 | -------------------------------------------------------------------------------- /tests/error_action.d/input.txt: -------------------------------------------------------------------------------- 1 | 1A:123 2 | 1B:023 3 | 1C:1?3 4 | 1D:124 5 | 2A:123 6 | 2B:023 7 | 2C:1?3 8 | 2D:124 9 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/expected-hex-0.txt: -------------------------------------------------------------------------------- 1 | PackCC 2 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/expected-unicode-0.txt: -------------------------------------------------------------------------------- 1 | 構文解析 2 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/expected-unicode-1.txt: -------------------------------------------------------------------------------- 1 | 𝓟𝓪𝓬𝓴𝓒𝓒 2 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/input-hex-0.txt: -------------------------------------------------------------------------------- 1 | PackCC 2 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/input-unicode-0.txt: -------------------------------------------------------------------------------- 1 | 構文解析 2 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/input-unicode-1.txt: -------------------------------------------------------------------------------- 1 | 𝓟𝓪𝓬𝓴𝓒𝓒 2 | -------------------------------------------------------------------------------- /tests/escape_sequences.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- TOKEN (_ TOKEN)* _* 2 | TOKEN <- (HEX0 / UNI0 / UNI1) { PRINT($0); } 3 | HEX0 <- "\x50\x61\x63\x6b\x43\x43" 4 | UNI0 <- "\u69CB\u6587\u89e3\u6790" 5 | UNI1 <- "\uD835\udcdf\uD835\udcea\uD835\udcec\uD835\udcf4\uD835\udcd2\uD835\udcd2" 6 | _ <- ' ' / '\n' 7 | -------------------------------------------------------------------------------- /tests/import.d/.gitignore: -------------------------------------------------------------------------------- 1 | t_*/ 2 | t_*.* 3 | -------------------------------------------------------------------------------- /tests/import.d/check_line_number.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2024 Arihiro Yoshida. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import sys 24 | import os 25 | import re 26 | 27 | def main(): 28 | args = sys.argv 29 | if len(args) < 3: 30 | print('Too few arguments') 31 | sys.exit(1) 32 | optp = args.pop(1) if args[1] == '--only-pre' else '' 33 | word = args.pop(1) 34 | path = args.pop(1) 35 | with open(path, 'r') as file: 36 | text = file.read().split('\n') 37 | for i, s in enumerate(text): 38 | if s.find(word) >= 0: 39 | break 40 | if i >= len(text): 41 | print('Keyword not found') 42 | sys.exit(2) 43 | if optp == '': 44 | if i == 0 or i == len(text) - 1: 45 | print('Keyword found in invalid line') 46 | sys.exit(2) 47 | m = re.search(r'^#line ([0-9]+) "(.*)"$', text[i + 1]) 48 | if m is None: 49 | print('#line directive not found one line after keyword') 50 | sys.exit(2) 51 | if int(m.group(1)) - 1 != i + 2: 52 | print('#line directive with inconsistent line number') 53 | sys.exit(2) 54 | if m.group(2) != os.path.abspath(path): 55 | print('#line directive with inconsistent file name') 56 | sys.exit(2) 57 | else: 58 | if i == 0: 59 | print('Keyword found in invalid line') 60 | sys.exit(2) 61 | m = re.search(r'^#line ([0-9]+) "(.*)"$', text[i - 1]) 62 | if m is None: 63 | print('#line directive not found one line before keyword') 64 | sys.exit(2) 65 | with open(m.group(2), 'r') as file: 66 | text = file.read().split('\n') 67 | j = int(m.group(1)) - 1 68 | if j < 0 or j >= len(text): 69 | print('#line directive with invalid line number') 70 | sys.exit(2) 71 | if text[j].find(word) < 0: 72 | print('#line directive with inconsistent line number') 73 | sys.exit(2) 74 | 75 | if __name__ == '__main__': 76 | main() 77 | -------------------------------------------------------------------------------- /tests/import.d/make_input.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | # Copyright (c) 2024 Arihiro Yoshida. All rights reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | import sys 24 | import os 25 | import re 26 | 27 | def main(): 28 | args = sys.argv 29 | if len(args) < 4: 30 | print('Too few arguments') 31 | sys.exit(1) 32 | root = args.pop(1) 33 | path = args.pop(1) 34 | more = args.pop(1) 35 | os.makedirs(os.path.dirname(root + '/' + path), exist_ok=True) 36 | id = re.sub(r'[^_a-zA-Z0-9]', '_', re.sub(r'\.peg$', '', os.path.basename(path))) 37 | with open(root + '/template.peg', 'r') as file: 38 | text = file.read() 39 | text = text.replace('${ID}', id + '_') 40 | text = text.replace('${MORE}', ' / ' + more + '_FILE' if more != '' else '') 41 | for i in range(4): 42 | imp = args.pop(1) if len(args) > 1 else '' 43 | text = text.replace('${IMPORT_' + str(i) + '}', '%import "' + imp + '"' if imp != '' else '') 44 | with open(root + '/' + path, 'w', newline='\n') as file: 45 | file.write(text) 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /tests/import.d/reference.peg: -------------------------------------------------------------------------------- 1 | %header { /* :t_input_HEADER: */ } 2 | 3 | %source { 4 | /* :t_input_SOURCE: */ 5 | } 6 | 7 | %common { 8 | /* :t_input_COMMON: */ 9 | } 10 | 11 | %earlyheader { /* :t_input_EARLYHEADER: */ } 12 | 13 | %earlysource { /* :t_input_EARLYSOURCE: */ } 14 | 15 | %earlycommon { 16 | /* :t_input_EARLYCOMMON: */ 17 | } 18 | 19 | t_input_FILE 20 | <- t_input_RULE1 21 | { 22 | /* :t_input_RULE1: */ 23 | } 24 | / t_input_RULE2 25 | { /* :t_input_RULE2: */ } 26 | / t_input_RULE3 { /* :t_input_RULE3: */ } 27 | / t_imp_0_FILE 28 | 29 | t_input_RULE1 <- '1' 30 | 31 | %header { /* :t_imp_0_HEADER: */ } 32 | 33 | %source { 34 | /* :t_imp_0_SOURCE: */ 35 | } 36 | 37 | %common { 38 | /* :t_imp_0_COMMON: */ 39 | } 40 | 41 | %earlyheader { /* :t_imp_0_EARLYHEADER: */ } 42 | 43 | %earlysource { /* :t_imp_0_EARLYSOURCE: */ } 44 | 45 | %earlycommon { 46 | /* :t_imp_0_EARLYCOMMON: */ 47 | } 48 | 49 | t_imp_0_FILE 50 | <- t_imp_0_RULE1 51 | { 52 | /* :t_imp_0_RULE1: */ 53 | } 54 | / t_imp_0_RULE2 55 | { /* :t_imp_0_RULE2: */ } 56 | / t_imp_0_RULE3 { /* :t_imp_0_RULE3: */ } 57 | / t_imp_1_FILE 58 | 59 | t_imp_0_RULE1 <- '1' 60 | 61 | %header { /* :t_imp_1_HEADER: */ } 62 | 63 | %source { 64 | /* :t_imp_1_SOURCE: */ 65 | } 66 | 67 | %common { 68 | /* :t_imp_1_COMMON: */ 69 | } 70 | 71 | %earlyheader { /* :t_imp_1_EARLYHEADER: */ } 72 | 73 | %earlysource { /* :t_imp_1_EARLYSOURCE: */ } 74 | 75 | %earlycommon { 76 | /* :t_imp_1_EARLYCOMMON: */ 77 | } 78 | 79 | t_imp_1_FILE 80 | <- t_imp_1_RULE1 81 | { 82 | /* :t_imp_1_RULE1: */ 83 | } 84 | / t_imp_1_RULE2 85 | { /* :t_imp_1_RULE2: */ } 86 | / t_imp_1_RULE3 { /* :t_imp_1_RULE3: */ } 87 | / t_imp_2_FILE 88 | 89 | t_imp_1_RULE1 <- '1' 90 | 91 | t_imp_1_RULE2 <- '2' 92 | 93 | t_imp_1_RULE3 <- '3' 94 | 95 | %header { /* :t_imp_2_HEADER: */ } 96 | 97 | %source { 98 | /* :t_imp_2_SOURCE: */ 99 | } 100 | 101 | %common { 102 | /* :t_imp_2_COMMON: */ 103 | } 104 | 105 | %earlyheader { /* :t_imp_2_EARLYHEADER: */ } 106 | 107 | %earlysource { /* :t_imp_2_EARLYSOURCE: */ } 108 | 109 | %earlycommon { 110 | /* :t_imp_2_EARLYCOMMON: */ 111 | } 112 | 113 | t_imp_2_FILE 114 | <- t_imp_2_RULE1 115 | { 116 | /* :t_imp_2_RULE1: */ 117 | } 118 | / t_imp_2_RULE2 119 | { /* :t_imp_2_RULE2: */ } 120 | / t_imp_2_RULE3 { /* :t_imp_2_RULE3: */ } 121 | / t_imp_3_FILE 122 | 123 | t_imp_2_RULE1 <- '1' 124 | 125 | %header { /* :t_imp_3_HEADER: */ } 126 | 127 | %source { 128 | /* :t_imp_3_SOURCE: */ 129 | } 130 | 131 | %common { 132 | /* :t_imp_3_COMMON: */ 133 | } 134 | 135 | %earlyheader { /* :t_imp_3_EARLYHEADER: */ } 136 | 137 | %earlysource { /* :t_imp_3_EARLYSOURCE: */ } 138 | 139 | %earlycommon { 140 | /* :t_imp_3_EARLYCOMMON: */ 141 | } 142 | 143 | t_imp_3_FILE 144 | <- t_imp_3_RULE1 145 | { 146 | /* :t_imp_3_RULE1: */ 147 | } 148 | / t_imp_3_RULE2 149 | { /* :t_imp_3_RULE2: */ } 150 | / t_imp_3_RULE3 { /* :t_imp_3_RULE3: */ } 151 | / t_imp_4_FILE 152 | 153 | t_imp_3_RULE1 <- '1' 154 | 155 | t_imp_3_RULE2 <- '2' 156 | 157 | t_imp_3_RULE3 <- '3' 158 | 159 | t_imp_2_RULE2 <- '2' 160 | 161 | t_imp_2_RULE3 <- '3' 162 | 163 | t_imp_0_RULE2 <- '2' 164 | 165 | t_imp_0_RULE3 <- '3' 166 | 167 | 168 | t_input_RULE2 <- '2' 169 | 170 | t_input_RULE3 <- '3' 171 | 172 | %header { /* :t_imp_4_HEADER: */ } 173 | 174 | %source { 175 | /* :t_imp_4_SOURCE: */ 176 | } 177 | 178 | %common { 179 | /* :t_imp_4_COMMON: */ 180 | } 181 | 182 | %earlyheader { /* :t_imp_4_EARLYHEADER: */ } 183 | 184 | %earlysource { /* :t_imp_4_EARLYSOURCE: */ } 185 | 186 | %earlycommon { 187 | /* :t_imp_4_EARLYCOMMON: */ 188 | } 189 | 190 | t_imp_4_FILE 191 | <- t_imp_4_RULE1 192 | { 193 | /* :t_imp_4_RULE1: */ 194 | } 195 | / t_imp_4_RULE2 196 | { /* :t_imp_4_RULE2: */ } 197 | / t_imp_4_RULE3 { /* :t_imp_4_RULE3: */ } 198 | / t_imp_5_FILE 199 | 200 | t_imp_4_RULE1 <- '1' 201 | 202 | %header { /* :t_imp_5_HEADER: */ } 203 | 204 | %source { 205 | /* :t_imp_5_SOURCE: */ 206 | } 207 | 208 | %common { 209 | /* :t_imp_5_COMMON: */ 210 | } 211 | 212 | %earlyheader { /* :t_imp_5_EARLYHEADER: */ } 213 | 214 | %earlysource { /* :t_imp_5_EARLYSOURCE: */ } 215 | 216 | %earlycommon { 217 | /* :t_imp_5_EARLYCOMMON: */ 218 | } 219 | 220 | t_imp_5_FILE 221 | <- t_imp_5_RULE1 222 | { 223 | /* :t_imp_5_RULE1: */ 224 | } 225 | / t_imp_5_RULE2 226 | { /* :t_imp_5_RULE2: */ } 227 | / t_imp_5_RULE3 { /* :t_imp_5_RULE3: */ } 228 | / t_imp_6_FILE 229 | 230 | %header { /* :t_imp_6_HEADER: */ } 231 | 232 | %source { 233 | /* :t_imp_6_SOURCE: */ 234 | } 235 | 236 | %common { 237 | /* :t_imp_6_COMMON: */ 238 | } 239 | 240 | %earlyheader { /* :t_imp_6_EARLYHEADER: */ } 241 | 242 | %earlysource { /* :t_imp_6_EARLYSOURCE: */ } 243 | 244 | %earlycommon { 245 | /* :t_imp_6_EARLYCOMMON: */ 246 | } 247 | 248 | t_imp_6_FILE 249 | <- t_imp_6_RULE1 250 | { 251 | /* :t_imp_6_RULE1: */ 252 | } 253 | / t_imp_6_RULE2 254 | { /* :t_imp_6_RULE2: */ } 255 | / t_imp_6_RULE3 { /* :t_imp_6_RULE3: */ } 256 | / t_imp_7_FILE 257 | 258 | t_imp_6_RULE1 <- '1' 259 | 260 | t_imp_6_RULE2 <- '2' 261 | 262 | t_imp_6_RULE3 <- '3' 263 | 264 | t_imp_5_RULE1 <- '1' 265 | 266 | %header { /* :t_imp_7_HEADER: */ } 267 | 268 | %source { 269 | /* :t_imp_7_SOURCE: */ 270 | } 271 | 272 | %common { 273 | /* :t_imp_7_COMMON: */ 274 | } 275 | 276 | %earlyheader { /* :t_imp_7_EARLYHEADER: */ } 277 | 278 | %earlysource { /* :t_imp_7_EARLYSOURCE: */ } 279 | 280 | %earlycommon { 281 | /* :t_imp_7_EARLYCOMMON: */ 282 | } 283 | 284 | t_imp_7_FILE 285 | <- t_imp_7_RULE1 286 | { 287 | /* :t_imp_7_RULE1: */ 288 | } 289 | / t_imp_7_RULE2 290 | { /* :t_imp_7_RULE2: */ } 291 | / t_imp_7_RULE3 { /* :t_imp_7_RULE3: */ } 292 | / t_imp_8_FILE 293 | 294 | %header { /* :t_imp_8_HEADER: */ } 295 | 296 | %source { 297 | /* :t_imp_8_SOURCE: */ 298 | } 299 | 300 | %common { 301 | /* :t_imp_8_COMMON: */ 302 | } 303 | 304 | %earlyheader { /* :t_imp_8_EARLYHEADER: */ } 305 | 306 | %earlysource { /* :t_imp_8_EARLYSOURCE: */ } 307 | 308 | %earlycommon { 309 | /* :t_imp_8_EARLYCOMMON: */ 310 | } 311 | 312 | t_imp_8_FILE 313 | <- t_imp_8_RULE1 314 | { 315 | /* :t_imp_8_RULE1: */ 316 | } 317 | / t_imp_8_RULE2 318 | { /* :t_imp_8_RULE2: */ } 319 | / t_imp_8_RULE3 { /* :t_imp_8_RULE3: */ } 320 | / t_imp_9_FILE 321 | 322 | t_imp_8_RULE1 <- '1' 323 | 324 | %header { /* :t_imp_9_HEADER: */ } 325 | 326 | %source { 327 | /* :t_imp_9_SOURCE: */ 328 | } 329 | 330 | %common { 331 | /* :t_imp_9_COMMON: */ 332 | } 333 | 334 | %earlyheader { /* :t_imp_9_EARLYHEADER: */ } 335 | 336 | %earlysource { /* :t_imp_9_EARLYSOURCE: */ } 337 | 338 | %earlycommon { 339 | /* :t_imp_9_EARLYCOMMON: */ 340 | } 341 | 342 | t_imp_9_FILE 343 | <- t_imp_9_RULE1 344 | { 345 | /* :t_imp_9_RULE1: */ 346 | } 347 | / t_imp_9_RULE2 348 | { /* :t_imp_9_RULE2: */ } 349 | / t_imp_9_RULE3 { /* :t_imp_9_RULE3: */ } 350 | / t_imp_a_FILE 351 | 352 | t_imp_9_RULE1 <- '1' 353 | 354 | t_imp_9_RULE2 <- '2' 355 | 356 | t_imp_9_RULE3 <- '3' 357 | 358 | t_imp_8_RULE2 <- '2' 359 | 360 | t_imp_8_RULE3 <- '3' 361 | 362 | t_imp_7_RULE1 <- '1' 363 | 364 | %header { /* :t_imp_a_HEADER: */ } 365 | 366 | %source { 367 | /* :t_imp_a_SOURCE: */ 368 | } 369 | 370 | %common { 371 | /* :t_imp_a_COMMON: */ 372 | } 373 | 374 | %earlyheader { /* :t_imp_a_EARLYHEADER: */ } 375 | 376 | %earlysource { /* :t_imp_a_EARLYSOURCE: */ } 377 | 378 | %earlycommon { 379 | /* :t_imp_a_EARLYCOMMON: */ 380 | } 381 | 382 | t_imp_a_FILE 383 | <- t_imp_a_RULE1 384 | { 385 | /* :t_imp_a_RULE1: */ 386 | } 387 | / t_imp_a_RULE2 388 | { /* :t_imp_a_RULE2: */ } 389 | / t_imp_a_RULE3 { /* :t_imp_a_RULE3: */ } 390 | / t_imp_b_FILE 391 | 392 | t_imp_a_RULE1 <- '1' 393 | 394 | %header { /* :t_imp_b_HEADER: */ } 395 | 396 | %source { 397 | /* :t_imp_b_SOURCE: */ 398 | } 399 | 400 | %common { 401 | /* :t_imp_b_COMMON: */ 402 | } 403 | 404 | %earlyheader { /* :t_imp_b_EARLYHEADER: */ } 405 | 406 | %earlysource { /* :t_imp_b_EARLYSOURCE: */ } 407 | 408 | %earlycommon { 409 | /* :t_imp_b_EARLYCOMMON: */ 410 | } 411 | 412 | t_imp_b_FILE 413 | <- t_imp_b_RULE1 414 | { 415 | /* :t_imp_b_RULE1: */ 416 | } 417 | / t_imp_b_RULE2 418 | { /* :t_imp_b_RULE2: */ } 419 | / t_imp_b_RULE3 { /* :t_imp_b_RULE3: */ } 420 | / t_imp_c_FILE 421 | 422 | %header { /* :t_imp_c_HEADER: */ } 423 | 424 | %source { 425 | /* :t_imp_c_SOURCE: */ 426 | } 427 | 428 | %common { 429 | /* :t_imp_c_COMMON: */ 430 | } 431 | 432 | %earlyheader { /* :t_imp_c_EARLYHEADER: */ } 433 | 434 | %earlysource { /* :t_imp_c_EARLYSOURCE: */ } 435 | 436 | %earlycommon { 437 | /* :t_imp_c_EARLYCOMMON: */ 438 | } 439 | 440 | t_imp_c_FILE 441 | <- t_imp_c_RULE1 442 | { 443 | /* :t_imp_c_RULE1: */ 444 | } 445 | / t_imp_c_RULE2 446 | { /* :t_imp_c_RULE2: */ } 447 | / t_imp_c_RULE3 { /* :t_imp_c_RULE3: */ } 448 | / t_imp_d_FILE 449 | 450 | t_imp_c_RULE1 <- '1' 451 | 452 | t_imp_c_RULE2 <- '2' 453 | 454 | t_imp_c_RULE3 <- '3' 455 | 456 | t_imp_b_RULE1 <- '1' 457 | 458 | %header { /* :t_imp_d_HEADER: */ } 459 | 460 | %source { 461 | /* :t_imp_d_SOURCE: */ 462 | } 463 | 464 | %common { 465 | /* :t_imp_d_COMMON: */ 466 | } 467 | 468 | %earlyheader { /* :t_imp_d_EARLYHEADER: */ } 469 | 470 | %earlysource { /* :t_imp_d_EARLYSOURCE: */ } 471 | 472 | %earlycommon { 473 | /* :t_imp_d_EARLYCOMMON: */ 474 | } 475 | 476 | t_imp_d_FILE 477 | <- t_imp_d_RULE1 478 | { 479 | /* :t_imp_d_RULE1: */ 480 | } 481 | / t_imp_d_RULE2 482 | { /* :t_imp_d_RULE2: */ } 483 | / t_imp_d_RULE3 { /* :t_imp_d_RULE3: */ } 484 | 485 | t_imp_d_RULE1 <- '1' 486 | 487 | t_imp_d_RULE2 <- '2' 488 | 489 | t_imp_d_RULE3 <- '3' 490 | 491 | t_imp_b_RULE2 <- '2' 492 | 493 | t_imp_b_RULE3 <- '3' 494 | 495 | t_imp_a_RULE2 <- '2' 496 | 497 | t_imp_a_RULE3 <- '3' 498 | 499 | t_imp_7_RULE2 <- '2' 500 | 501 | t_imp_7_RULE3 <- '3' 502 | 503 | t_imp_5_RULE2 <- '2' 504 | 505 | t_imp_5_RULE3 <- '3' 506 | 507 | t_imp_4_RULE2 <- '2' 508 | 509 | t_imp_4_RULE3 <- '3' 510 | 511 | %% 512 | /* :t_imp_3_CODE: */ 513 | /* :t_imp_2_CODE: */ 514 | /* :t_imp_1_CODE: */ 515 | /* :t_imp_0_CODE: */ 516 | /* :t_imp_6_CODE: */ 517 | /* :t_imp_9_CODE: */ 518 | /* :t_imp_8_CODE: */ 519 | /* :t_imp_c_CODE: */ 520 | /* :t_imp_d_CODE: */ 521 | /* :t_imp_b_CODE: */ 522 | /* :t_imp_a_CODE: */ 523 | /* :t_imp_7_CODE: */ 524 | /* :t_imp_5_CODE: */ 525 | /* :t_imp_4_CODE: */ 526 | /* :t_input_CODE: */ 527 | -------------------------------------------------------------------------------- /tests/import.d/template.peg: -------------------------------------------------------------------------------- 1 | %header { /* :${ID}HEADER: */ } 2 | 3 | %source { 4 | /* :${ID}SOURCE: */ 5 | } 6 | 7 | %common { 8 | /* :${ID}COMMON: */ 9 | } 10 | 11 | %earlyheader { /* :${ID}EARLYHEADER: */ } 12 | 13 | %earlysource { /* :${ID}EARLYSOURCE: */ } 14 | 15 | %earlycommon { 16 | /* :${ID}EARLYCOMMON: */ 17 | } 18 | 19 | ${ID}FILE 20 | <- ${ID}RULE1 21 | { 22 | /* :${ID}RULE1: */ 23 | } 24 | / ${ID}RULE2 25 | { /* :${ID}RULE2: */ } 26 | / ${ID}RULE3 { /* :${ID}RULE3: */ } 27 | ${MORE} 28 | ${IMPORT_0} 29 | ${ID}RULE1 <- '1' 30 | ${IMPORT_1} 31 | ${ID}RULE2 <- '2' 32 | ${IMPORT_2} 33 | ${ID}RULE3 <- '3' 34 | ${IMPORT_3} 35 | 36 | %% 37 | /* :${ID}CODE: */ 38 | -------------------------------------------------------------------------------- /tests/import_char.d/import_char.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing import_char.d - generation" { 6 | run test_generate 7 | [ "$status" -eq 0 ] 8 | } 9 | -------------------------------------------------------------------------------- /tests/import_char.d/input.peg: -------------------------------------------------------------------------------- 1 | %import "char/ascii_character_group.peg" 2 | %import "char/unicode_general_category.peg" 3 | %import "char/unicode_derived_core.peg" 4 | -------------------------------------------------------------------------------- /tests/issue_28.d/expected.txt: -------------------------------------------------------------------------------- 1 | A; 2 | 3 | Syntax error 4 | -------------------------------------------------------------------------------- /tests/issue_28.d/input.peg: -------------------------------------------------------------------------------- 1 | %source { 2 | #define PCC_ERROR(auxil) \ 3 | do { printf("Syntax error\n"); exit(1); } while (0) 4 | /* NOTE: To guarantee the output order, stderr, which can lead a race condition with stdout, is not used. */ 5 | } 6 | 7 | top <- rule+ { PRINT($0); } 8 | rule <- "A;" / _ 9 | _ <- [ \n]* 10 | -------------------------------------------------------------------------------- /tests/issue_28.d/input.txt: -------------------------------------------------------------------------------- 1 | A; 2 | A 3 | -------------------------------------------------------------------------------- /tests/issue_78.d/input.peg: -------------------------------------------------------------------------------- 1 | main <- ( "A" 2 | -------------------------------------------------------------------------------- /tests/issue_78.d/issue_78.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing issue_78.d - generation" { 6 | run test_generate 7 | [ "$status" -eq 10 ] 8 | [[ "$output" =~ ': input.peg:1:1: Illegal rule syntax'$ ]] 9 | } 10 | -------------------------------------------------------------------------------- /tests/lines.d/input.peg: -------------------------------------------------------------------------------- 1 | %header { /* :HEADER: */ } 2 | 3 | %source { 4 | /* :SOURCE: */ 5 | } 6 | 7 | %common { 8 | /* :COMMON: */ 9 | } 10 | 11 | %earlyheader { /* :EARLYHEADER: */ } 12 | 13 | %earlysource { /* :EARLYSOURCE: */ } 14 | 15 | %earlycommon { 16 | /* :EARLYCOMMON: */ 17 | } 18 | 19 | FILE 20 | <- RULE1 21 | { 22 | /* :RULE1: */ 23 | } 24 | / RULE2 25 | { /* :RULE2: */ } 26 | / RULE3 { /* :RULE3: */ } 27 | 28 | RULE1 <- '1' 29 | RULE2 <- '2' 30 | RULE3 <- '3' 31 | 32 | %% 33 | /* :CODE: */ 34 | -------------------------------------------------------------------------------- /tests/lines.d/lines.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing lines.d - generation" { 6 | PACKCC_OPTS=("--lines") 7 | test_generate 8 | } 9 | 10 | @test "Testing lines.d - header" { 11 | in_header ":EARLYHEADER:" 12 | in_header ":EARLYCOMMON:" 13 | in_header ":HEADER:" 14 | in_header ":COMMON:" 15 | 16 | ! in_header ":EARLYSOURCE:" 17 | ! in_header ":SOURCE:" 18 | ! in_header ":CODE:" 19 | ! in_header ":RULE1:" 20 | ! in_header ":RULE2:" 21 | ! in_header ":RULE3:" 22 | 23 | EARLYHEADER_I=$(get_line ":EARLYHEADER:" input.peg) 24 | EARLYHEADER_O=$(get_line ":EARLYHEADER:" parser.h) 25 | EARLYHEADER_S=$(get_line "^#line $EARLYHEADER_I \"input.peg\"" parser.h) 26 | EARLYHEADER_E=$(get_line "^#line $(($EARLYHEADER_O + 2)) \"parser.h\"" parser.h) 27 | [ "$EARLYHEADER_S" -eq $((EARLYHEADER_O - 1)) ] 28 | [ "$EARLYHEADER_E" -eq $((EARLYHEADER_O + 1)) ] 29 | 30 | EARLYCOMMON_I=$(get_line ":EARLYCOMMON:" input.peg) 31 | EARLYCOMMON_O=$(get_line ":EARLYCOMMON:" parser.h) 32 | EARLYCOMMON_S=$(get_line "^#line $EARLYCOMMON_I \"input.peg\"" parser.h) 33 | EARLYCOMMON_E=$(get_line "^#line $(($EARLYCOMMON_O + 2)) \"parser.h\"" parser.h) 34 | [ "$EARLYCOMMON_S" -eq $((EARLYCOMMON_O - 1)) ] 35 | [ "$EARLYCOMMON_E" -eq $((EARLYCOMMON_O + 1)) ] 36 | 37 | HEADER_I=$(get_line ":HEADER:" input.peg) 38 | HEADER_O=$(get_line ":HEADER:" parser.h) 39 | HEADER_S=$(get_line "^#line $HEADER_I \"input.peg\"" parser.h) 40 | HEADER_E=$(get_line "^#line $(($HEADER_O + 2)) \"parser.h\"" parser.h) 41 | [ "$HEADER_S" -eq $((HEADER_O - 1)) ] 42 | [ "$HEADER_E" -eq $((HEADER_O + 1)) ] 43 | 44 | COMMON_I=$(get_line ":COMMON:" input.peg) 45 | COMMON_O=$(get_line ":COMMON:" parser.h) 46 | COMMON_S=$(get_line "^#line $COMMON_I \"input.peg\"" parser.h) 47 | COMMON_E=$(get_line "^#line $(($COMMON_O + 2)) \"parser.h\"" parser.h) 48 | [ "$COMMON_S" -eq $((COMMON_O - 1)) ] 49 | [ "$COMMON_E" -eq $((COMMON_O + 1)) ] 50 | } 51 | 52 | @test "Testing lines.d - source" { 53 | in_source ":EARLYSOURCE:" 54 | in_source ":EARLYCOMMON:" 55 | in_source ":SOURCE:" 56 | in_source ":COMMON:" 57 | in_source ":CODE:" 58 | in_source ":RULE1:" 59 | in_source ":RULE2:" 60 | in_source ":RULE3:" 61 | 62 | ! in_source ":EARLYHEADER:" 63 | ! in_source ":HEADER:" 64 | 65 | EARLYSOURCE_I=$(get_line ":EARLYSOURCE:" input.peg) 66 | EARLYSOURCE_O=$(get_line ":EARLYSOURCE:" parser.c) 67 | EARLYSOURCE_S=$(get_line "^#line $EARLYSOURCE_I \"input.peg\"" parser.c) 68 | EARLYSOURCE_E=$(get_line "^#line $(($EARLYSOURCE_O + 2)) \"parser.c\"" parser.c) 69 | [ "$EARLYSOURCE_S" -eq $((EARLYSOURCE_O - 1)) ] 70 | [ "$EARLYSOURCE_E" -eq $((EARLYSOURCE_O + 1)) ] 71 | 72 | EARLYCOMMON_I=$(get_line ":EARLYCOMMON:" input.peg) 73 | EARLYCOMMON_O=$(get_line ":EARLYCOMMON:" parser.c) 74 | EARLYCOMMON_S=$(get_line "^#line $EARLYCOMMON_I \"input.peg\"" parser.c) 75 | EARLYCOMMON_E=$(get_line "^#line $(($EARLYCOMMON_O + 2)) \"parser.c\"" parser.c) 76 | [ "$EARLYCOMMON_S" -eq $((EARLYCOMMON_O - 1)) ] 77 | [ "$EARLYCOMMON_E" -eq $((EARLYCOMMON_O + 1)) ] 78 | 79 | SOURCE_I=$(get_line ":SOURCE:" input.peg) 80 | SOURCE_O=$(get_line ":SOURCE:" parser.c) 81 | SOURCE_S=$(get_line "^#line $SOURCE_I \"input.peg\"" parser.c) 82 | SOURCE_E=$(get_line "^#line $(($SOURCE_O + 2)) \"parser.c\"" parser.c) 83 | [ "$SOURCE_S" -eq $((SOURCE_O - 1)) ] 84 | [ "$SOURCE_E" -eq $((SOURCE_O + 1)) ] 85 | 86 | COMMON_I=$(get_line ":COMMON:" input.peg) 87 | COMMON_O=$(get_line ":COMMON:" parser.c) 88 | COMMON_S=$(get_line "^#line $COMMON_I \"input.peg\"" parser.c) 89 | COMMON_E=$(get_line "^#line $(($COMMON_O + 2)) \"parser.c\"" parser.c) 90 | [ "$COMMON_S" -eq $((COMMON_O - 1)) ] 91 | [ "$COMMON_E" -eq $((COMMON_O + 1)) ] 92 | 93 | RULE1_I=$(get_line ":RULE1:" input.peg) 94 | RULE1_O=$(get_line ":RULE1:" parser.c) 95 | RULE1_S=$(get_line "^#line $RULE1_I \"input.peg\"" parser.c) 96 | RULE1_E=$(get_line "^#line $(($RULE1_O + 2)) \"parser.c\"" parser.c) 97 | [ "$RULE1_S" -eq $((RULE1_O - 1)) ] 98 | [ "$RULE1_E" -eq $((RULE1_O + 1)) ] 99 | 100 | RULE2_I=$(get_line ":RULE2:" input.peg) 101 | RULE2_O=$(get_line ":RULE2:" parser.c) 102 | RULE2_S=$(get_line "^#line $RULE2_I \"input.peg\"" parser.c) 103 | RULE2_E=$(get_line "^#line $(($RULE2_O + 2)) \"parser.c\"" parser.c) 104 | [ "$RULE2_S" -eq $((RULE2_O - 1)) ] 105 | [ "$RULE2_E" -eq $((RULE2_O + 1)) ] 106 | 107 | RULE3_I=$(get_line ":RULE3:" input.peg) 108 | RULE3_O=$(get_line ":RULE3:" parser.c) 109 | RULE3_S=$(get_line "^#line $RULE3_I \"input.peg\"" parser.c) 110 | RULE3_E=$(get_line "^#line $(($RULE3_O + 2)) \"parser.c\"" parser.c) 111 | [ "$RULE3_S" -eq $((RULE3_O - 1)) ] 112 | [ "$RULE3_E" -eq $((RULE3_O + 1)) ] 113 | 114 | CODE_I=$(get_line ":CODE:" input.peg) 115 | CODE_O=$(get_line ":CODE:" parser.c) 116 | CODE_S=$(get_line "^#line $CODE_I \"input.peg\"" parser.c) 117 | [ "$CODE_S" -eq $((CODE_O - 1)) ] 118 | } 119 | -------------------------------------------------------------------------------- /tests/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "parser.h" 4 | 5 | #define PRINT(X) printf("%s\n", X); 6 | #define PRINT_L(LBL, X) printf("%s: %s\n", LBL, X); 7 | 8 | #ifndef RET_TYPE 9 | #define RET_TYPE int 10 | #endif 11 | 12 | #include "parser.c" 13 | 14 | int main(int argc, char **argv) { 15 | RET_TYPE ret; 16 | pcc_context_t *ctx = pcc_create(NULL); 17 | while (pcc_parse(ctx, &ret)); 18 | pcc_destroy(ctx); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /tests/negative_predicate.d/expected.txt: -------------------------------------------------------------------------------- 1 | FOLLOWED BY SPACE: second 2 | BEGINS WITH UPPER: LINE 3 | FOLLOWED BY SPACE: third 4 | BEGINS WITH UPPER: Fourth 5 | -------------------------------------------------------------------------------- /tests/negative_predicate.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- (BEGINS_WITH_UPPER / FOLLOWED_BY_SPACE / .)* 2 | 3 | BEGINS_WITH_UPPER <- ![a-z] [a-zA-Z]+ { printf("BEGINS WITH UPPER: %s\n", $0); } 4 | FOLLOWED_BY_SPACE <- [a-zA-Z]+ ![a-zA-Z0-9\n] { printf("FOLLOWED BY SPACE: %s\n", $0); } 5 | -------------------------------------------------------------------------------- /tests/negative_predicate.d/input.txt: -------------------------------------------------------------------------------- 1 | first123 2 | second LINE 3 | third line 4 | Fourth line 5 | -------------------------------------------------------------------------------- /tests/position.d/expected.txt: -------------------------------------------------------------------------------- 1 | NUMBER AT START POSITION: 12345 2 | NUMBER NOT AT START POSITION: 67890 3 | -------------------------------------------------------------------------------- /tests/position.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- (NUMBER_AT_START_POSITION / NUMBER_NOT_AT_START_POSITION / .)* 2 | 3 | NUMBER_AT_START_POSITION 4 | <- ^ ( [0] / [1-9][0-9]+ ) { printf("NUMBER AT START POSITION: %s\n", $0); } 5 | NUMBER_NOT_AT_START_POSITION 6 | <- !^ ( [0] / [1-9][0-9]+ ) { printf("NUMBER NOT AT START POSITION: %s\n", $0); } 7 | -------------------------------------------------------------------------------- /tests/position.d/input.txt: -------------------------------------------------------------------------------- 1 | 12345 67890 2 | -------------------------------------------------------------------------------- /tests/positive_predicate.d/expected.txt: -------------------------------------------------------------------------------- 1 | FOLLOWED BY SPACE: second 2 | BEGINS WITH UPPER: LINE 3 | FOLLOWED BY SPACE: third 4 | BEGINS WITH UPPER: Fourth 5 | -------------------------------------------------------------------------------- /tests/positive_predicate.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- (BEGINS_WITH_UPPER / FOLLOWED_BY_SPACE / .)* 2 | 3 | BEGINS_WITH_UPPER <- &[A-Z] [a-zA-Z]+ { printf("BEGINS WITH UPPER: %s\n", $0); } 4 | FOLLOWED_BY_SPACE <- [a-zA-Z]+ &" " { printf("FOLLOWED BY SPACE: %s\n", $0); } 5 | -------------------------------------------------------------------------------- /tests/positive_predicate.d/input.txt: -------------------------------------------------------------------------------- 1 | first123 2 | second LINE 3 | third line 4 | Fourth line 5 | -------------------------------------------------------------------------------- /tests/programmable_predicate.d/expected.txt: -------------------------------------------------------------------------------- 1 | rule_count_n: @count = 1 2 | rule_count_n: @count = 2 3 | rule_count_n: @count = 3 4 | rule_count_n: @count = 4 5 | rule_count_p: @count = 5 6 | rule_count_p: @count = 6 7 | rule_count_p: @count = 7 8 | rule_chars_0_p: @chars_l = 1, @chars_r = 0 9 | rule_chars_0_p: @chars_l = 1, @chars_r = 1 10 | rule_chars_0_n: @chars_l = 1, @chars_r = 2 11 | rule_chars_0_n: @chars_l = 1, @chars_r = 3 12 | rule_chars_0_p: @chars_l = 2, @chars_r = 0 13 | rule_chars_0_p: @chars_l = 2, @chars_r = 1 14 | rule_chars_0_p: @chars_l = 2, @chars_r = 2 15 | rule_chars_0_n: @chars_l = 2, @chars_r = 3 16 | rule_chars_0_p: @chars_l = 3, @chars_r = 0 17 | rule_chars_0_p: @chars_l = 3, @chars_r = 1 18 | rule_chars_0_p: @chars_l = 3, @chars_r = 2 19 | rule_chars_0_p: @chars_l = 3, @chars_r = 3 20 | rule_chars_1_p: @chars_l = A, @chars_r = A 21 | rule_chars_1_n: @chars_l = A, @chars_r = AB 22 | rule_chars_1_n: @chars_l = A, @chars_r = ABC 23 | rule_chars_1_n: @chars_l = AB, @chars_r = A 24 | rule_chars_1_p: @chars_l = AB, @chars_r = AB 25 | rule_chars_1_n: @chars_l = AB, @chars_r = ABC 26 | rule_chars_1_n: @chars_l = ABC, @chars_r = A 27 | rule_chars_1_n: @chars_l = ABC, @chars_r = AB 28 | rule_chars_1_p: @chars_l = ABC, @chars_r = ABC 29 | -------------------------------------------------------------------------------- /tests/programmable_predicate.d/input.peg: -------------------------------------------------------------------------------- 1 | %source { 2 | #include 3 | } 4 | 5 | %marker @count @chars_l @chars_r 6 | 7 | rule <- ( rule_count_p / rule_count_n / rule_chars_0_p / rule_chars_0_n / rule_chars_1_p / rule_chars_1_n / rule_other ) EOL 8 | 9 | rule_count_p 10 | <- &{ @count = 0; } ( '%' &{ @count++; } )+ &{ @@ = (@count >= 5); } 11 | { 12 | printf("rule_count_p: @count = %d\n", (int)@count); 13 | } 14 | rule_count_n 15 | <- &{ @count = 0; } ( '%' &{ @count++; } )+ !{ @@ = (@count >= 5); } 16 | { 17 | printf("rule_count_n: @count = %d\n", (int)@count); 18 | } 19 | 20 | rule_chars_0_p 21 | <- &{ @chars_l = 0; } ( '#' &{ @chars_l++; } )+ ( !EOL [^#] )+ &{ @chars_r = 0; } ( '#' &{ @chars_r++; } )* &{ @@ = (@chars_l >= @chars_r); } 22 | { 23 | printf("rule_chars_0_p: @chars_l = %d, @chars_r = %d\n", (int)@chars_l, (int)@chars_r); 24 | } 25 | rule_chars_0_n 26 | <- &{ @chars_l = 0; } ( '#' &{ @chars_l++; } )+ ( !EOL [^#] )+ &{ @chars_r = 0; } ( '#' &{ @chars_r++; } )* !{ @@ = (@chars_l >= @chars_r); } 27 | { 28 | printf("rule_chars_0_n: @chars_l = %d, @chars_r = %d\n", (int)@chars_l, (int)@chars_r); 29 | } 30 | 31 | rule_chars_1_p 32 | <- &{ @chars_l.set_string(""); } ( < [A-Z] > &{ @chars_l.append_string($1); } )+ ' '* < [A-Z]+ > &{ @chars_r.set_string($2); } &{ @@ = (strcmp(@chars_l.get_string(), @chars_r.get_string()) == 0); } 33 | { 34 | printf("rule_chars_1_p: @chars_l = %s, @chars_r = %s\n", @chars_l.get_string(), @chars_r.get_string()); 35 | } 36 | rule_chars_1_n 37 | <- &{ @chars_l.set_string(""); } ( < [A-Z] > &{ @chars_l.append_string($1); } )+ ' '* < [A-Z]+ > &{ @chars_r.set_string($2); } !{ @@ = (strcmp(@chars_l.get_string(), @chars_r.get_string()) == 0); } 38 | { 39 | printf("rule_chars_1_n: @chars_l = %s, @chars_r = %s\n", @chars_l.get_string(), @chars_r.get_string()); 40 | } 41 | 42 | rule_other 43 | <- .* 44 | { 45 | printf("rule_other: %s\n", $0); 46 | } 47 | 48 | EOL <- '\n' / '\r\n' / '\r' 49 | -------------------------------------------------------------------------------- /tests/programmable_predicate.d/input.txt: -------------------------------------------------------------------------------- 1 | % 2 | %% 3 | %%% 4 | %%%% 5 | %%%%% 6 | %%%%%% 7 | %%%%%%% 8 | # abc 9 | # abc # 10 | # abc ## 11 | # abc ### 12 | ## abc 13 | ## abc # 14 | ## abc ## 15 | ## abc ### 16 | ### abc 17 | ### abc # 18 | ### abc ## 19 | ### abc ### 20 | A A 21 | A AB 22 | A ABC 23 | AB A 24 | AB AB 25 | AB ABC 26 | ABC A 27 | ABC AB 28 | ABC ABC 29 | -------------------------------------------------------------------------------- /tests/quantifiers.d/expected-optional-repeatable.txt: -------------------------------------------------------------------------------- 1 | * 2 | *C 3 | *CC 4 | *CCCCCCCCCCCCCCCCCC 5 | -------------------------------------------------------------------------------- /tests/quantifiers.d/expected-optional.txt: -------------------------------------------------------------------------------- 1 | ? 2 | ?A 3 | -------------------------------------------------------------------------------- /tests/quantifiers.d/expected-repeatable.txt: -------------------------------------------------------------------------------- 1 | +B 2 | +BB 3 | +BBBBBBBBBBB 4 | -------------------------------------------------------------------------------- /tests/quantifiers.d/input-optional-repeatable.txt: -------------------------------------------------------------------------------- 1 | * 2 | *C 3 | *CC 4 | *CCCCCCCCCCCCCCCCCC 5 | -------------------------------------------------------------------------------- /tests/quantifiers.d/input-optional.txt: -------------------------------------------------------------------------------- 1 | ? 2 | ?A 3 | -------------------------------------------------------------------------------- /tests/quantifiers.d/input-repeatable.txt: -------------------------------------------------------------------------------- 1 | +B 2 | +BB 3 | +BBBBBBBBBBB 4 | -------------------------------------------------------------------------------- /tests/quantifiers.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- (LINE '\n')* 2 | LINE <- (REPEATABLE_OPTIONAL / OPTIONAL / REPEATABLE) { PRINT($0); } 3 | 4 | OPTIONAL <- "?" "A"? 5 | REPEATABLE <- "+" "B"+ 6 | REPEATABLE_OPTIONAL <- "*" "C"* 7 | -------------------------------------------------------------------------------- /tests/strings.d/expected-escapes.txt: -------------------------------------------------------------------------------- 1 | \'" =€ 2 | -------------------------------------------------------------------------------- /tests/strings.d/expected-utf8.txt: -------------------------------------------------------------------------------- 1 | Hello 2 | world 3 | 😊 4 | -------------------------------------------------------------------------------- /tests/strings.d/expected.txt: -------------------------------------------------------------------------------- 1 | Hello 2 | world 3 | -------------------------------------------------------------------------------- /tests/strings.d/input-escapes.txt: -------------------------------------------------------------------------------- 1 | \'" =€ 2 | -------------------------------------------------------------------------------- /tests/strings.d/input-utf8.txt: -------------------------------------------------------------------------------- 1 | Hello world 😊 2 | -------------------------------------------------------------------------------- /tests/strings.d/input.peg: -------------------------------------------------------------------------------- 1 | FILE <- TOKEN (_ TOKEN)* _* 2 | TOKEN <- (HELLO / WORLD / SMILEY / ESCAPES) { PRINT($0); } 3 | HELLO <- "Hello" 4 | WORLD <- 'world' 5 | SMILEY <- "😊" 6 | ESCAPES <- '\\\'\"\a\b\f\t\v\x3d\u20AC' 7 | _ <- ' ' / '\r\n' / '\n' 8 | -------------------------------------------------------------------------------- /tests/strings.d/input.txt: -------------------------------------------------------------------------------- 1 | Hello world 2 | -------------------------------------------------------------------------------- /tests/style.d/style.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | check_uncrustify_version() { 6 | version="$(uncrustify --version)" 7 | major="$(echo "$version" | cut -d. -f1 | grep -oE '[0-9]+$')" 8 | minor="$(echo "$version" | cut -d. -f2)" 9 | [ "$major" -gt 0 ] || { [ "$major" -eq 0 ] && [ "$minor" -ge 72 ]; } 10 | } 11 | 12 | test_style() { 13 | if ! command -v "uncrustify" &> /dev/null; then 14 | skip "uncrustify is not installed" 15 | elif ! check_uncrustify_version &> /dev/null; then 16 | skip "uncrustify is too old (minimal required version is 0.72.0)" 17 | else 18 | run uncrustify -q -c "$TESTDIR/uncrustify.cfg" -f "$1" 19 | [ "$status" -eq 0 ] 20 | diff --strip-trailing-cr -uN "$1" --label "$1" <(echo "$output") --label "formatted" 21 | fi 22 | } 23 | 24 | @test "Testing style.d - sources" { 25 | for file in "$ROOTDIR"/*/*.c; do 26 | test_style "$file" 27 | done 28 | } 29 | 30 | @test "Testing style.d - generated" { 31 | for file in "$ROOTDIR"/examples/*.peg; do 32 | test_generate "$file" 33 | test_style "style.d/parser.h" 34 | test_style "style.d/parser.c" 35 | done 36 | } 37 | -------------------------------------------------------------------------------- /tests/substitution.d/expected.h.txt: -------------------------------------------------------------------------------- 1 | TEST EARLYHEADER $$ " $$ " /* $$ */ // $$ 2 | TEST EARLYHEADER $0 " $0 " /* $0 */ // $0 3 | TEST EARLYHEADER $0s " $0s " /* $0s */ // $0s 4 | TEST EARLYHEADER $0e " $0e " /* $0e */ // $0e 5 | TEST EARLYHEADER $123 " $123 " /* $123 */ // $123 6 | TEST EARLYHEADER $123s " $123s " /* $123s */ // $123s 7 | TEST EARLYHEADER $123e " $123e " /* $123e */ // $123e 8 | TEST EARLYHEADER foo1 " foo1 " /* foo1 */ // foo1 9 | TEST EARLYHEADER FOO1 " FOO1 " /* FOO1 */ // FOO1 10 | TEST EARLYHEADER ${bar} " ${bar} " /* ${bar} */ // ${bar} 11 | TEST EARLYHEADER ${prefix} " ${prefix} " /* ${prefix} */ // ${prefix} 12 | TEST EARLYHEADER ${PREFIX} " ${PREFIX} " /* ${PREFIX} */ // ${PREFIX} 13 | TEST EARLYHEADER @@ " @@ " /* @@ */ // @@ 14 | TEST EARLYHEADER @var " @var " /* @var */ // @var 15 | TEST EARLYHEADER @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 16 | TEST EARLYHEADER @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 17 | TEST EARLYHEADER @var.append_string($0) " @var.append_string($0) " /* @var.append_string($0) */ // @var.append_string($0) 18 | TEST EARLYHEADER @var.save() " @var.save() " /* @var.save() */ // @var.save() 19 | TEST EARLYHEADER @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 20 | TEST EARLYHEADER @baz " @baz " /* @baz */ // @baz 21 | TEST EARLYHEADER @baz.get_string() " @baz.get_string() " /* @baz.get_string() */ // @baz.get_string() 22 | TEST EARLYHEADER @baz.set_string($0) " @baz.set_string($0) " /* @baz.set_string($0) */ // @baz.set_string($0) 23 | TEST EARLYHEADER @baz.append_string($0) " @baz.append_string($0) " /* @baz.append_string($0) */ // @baz.append_string($0) 24 | TEST EARLYHEADER @baz.save() " @baz.save() " /* @baz.save() */ // @baz.save() 25 | TEST EARLYHEADER @baz.restore() " @baz.restore() " /* @baz.restore() */ // @baz.restore() 26 | TEST EARLYHEADER @var " @var " /* @var */ // @var 27 | TEST EARLYHEADER @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 28 | TEST EARLYHEADER @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 29 | TEST EARLYHEADER @var.append_string($0)" @var.append_string($0)" /* @var.append_string($0)*/ // @var.append_string($0) 30 | TEST EARLYHEADER @var.save() " @var.save() " /* @var.save() */ // @var.save() 31 | TEST EARLYHEADER @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 32 | TEST EARLYCOMMON $$ " $$ " /* $$ */ // $$ 33 | TEST EARLYCOMMON $0 " $0 " /* $0 */ // $0 34 | TEST EARLYCOMMON $0s " $0s " /* $0s */ // $0s 35 | TEST EARLYCOMMON $0e " $0e " /* $0e */ // $0e 36 | TEST EARLYCOMMON $123 " $123 " /* $123 */ // $123 37 | TEST EARLYCOMMON $123s " $123s " /* $123s */ // $123s 38 | TEST EARLYCOMMON $123e " $123e " /* $123e */ // $123e 39 | TEST EARLYCOMMON foo1 " foo1 " /* foo1 */ // foo1 40 | TEST EARLYCOMMON FOO1 " FOO1 " /* FOO1 */ // FOO1 41 | TEST EARLYCOMMON ${bar} " ${bar} " /* ${bar} */ // ${bar} 42 | TEST EARLYCOMMON ${prefix} " ${prefix} " /* ${prefix} */ // ${prefix} 43 | TEST EARLYCOMMON ${PREFIX} " ${PREFIX} " /* ${PREFIX} */ // ${PREFIX} 44 | TEST EARLYCOMMON @@ " @@ " /* @@ */ // @@ 45 | TEST EARLYCOMMON @var " @var " /* @var */ // @var 46 | TEST EARLYCOMMON @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 47 | TEST EARLYCOMMON @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 48 | TEST EARLYCOMMON @var.append_string($0) " @var.append_string($0) " /* @var.append_string($0) */ // @var.append_string($0) 49 | TEST EARLYCOMMON @var.save() " @var.save() " /* @var.save() */ // @var.save() 50 | TEST EARLYCOMMON @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 51 | TEST EARLYCOMMON @baz " @baz " /* @baz */ // @baz 52 | TEST EARLYCOMMON @baz.get_string() " @baz.get_string() " /* @baz.get_string() */ // @baz.get_string() 53 | TEST EARLYCOMMON @baz.set_string($0) " @baz.set_string($0) " /* @baz.set_string($0) */ // @baz.set_string($0) 54 | TEST EARLYCOMMON @baz.append_string($0) " @baz.append_string($0) " /* @baz.append_string($0) */ // @baz.append_string($0) 55 | TEST EARLYCOMMON @baz.save() " @baz.save() " /* @baz.save() */ // @baz.save() 56 | TEST EARLYCOMMON @baz.restore() " @baz.restore() " /* @baz.restore() */ // @baz.restore() 57 | TEST EARLYCOMMON @var " @var " /* @var */ // @var 58 | TEST EARLYCOMMON @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 59 | TEST EARLYCOMMON @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 60 | TEST EARLYCOMMON @var.append_string($0)" @var.append_string($0)" /* @var.append_string($0)*/ // @var.append_string($0) 61 | TEST EARLYCOMMON @var.save() " @var.save() " /* @var.save() */ // @var.save() 62 | TEST EARLYCOMMON @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 63 | TEST HEADER $$ " $$ " /* $$ */ // $$ 64 | TEST HEADER $0 " $0 " /* $0 */ // $0 65 | TEST HEADER $0s " $0s " /* $0s */ // $0s 66 | TEST HEADER $0e " $0e " /* $0e */ // $0e 67 | TEST HEADER $123 " $123 " /* $123 */ // $123 68 | TEST HEADER $123s " $123s " /* $123s */ // $123s 69 | TEST HEADER $123e " $123e " /* $123e */ // $123e 70 | TEST HEADER foo1 " foo1 " /* foo1 */ // foo1 71 | TEST HEADER FOO1 " FOO1 " /* FOO1 */ // FOO1 72 | TEST HEADER ${bar} " ${bar} " /* ${bar} */ // ${bar} 73 | TEST HEADER ${prefix} " ${prefix} " /* ${prefix} */ // ${prefix} 74 | TEST HEADER ${PREFIX} " ${PREFIX} " /* ${PREFIX} */ // ${PREFIX} 75 | TEST HEADER @@ " @@ " /* @@ */ // @@ 76 | TEST HEADER @var " @var " /* @var */ // @var 77 | TEST HEADER @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 78 | TEST HEADER @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 79 | TEST HEADER @var.append_string($0) " @var.append_string($0) " /* @var.append_string($0) */ // @var.append_string($0) 80 | TEST HEADER @var.save() " @var.save() " /* @var.save() */ // @var.save() 81 | TEST HEADER @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 82 | TEST HEADER @baz " @baz " /* @baz */ // @baz 83 | TEST HEADER @baz.get_string() " @baz.get_string() " /* @baz.get_string() */ // @baz.get_string() 84 | TEST HEADER @baz.set_string($0) " @baz.set_string($0) " /* @baz.set_string($0) */ // @baz.set_string($0) 85 | TEST HEADER @baz.append_string($0) " @baz.append_string($0) " /* @baz.append_string($0) */ // @baz.append_string($0) 86 | TEST HEADER @baz.save() " @baz.save() " /* @baz.save() */ // @baz.save() 87 | TEST HEADER @baz.restore() " @baz.restore() " /* @baz.restore() */ // @baz.restore() 88 | TEST HEADER @var " @var " /* @var */ // @var 89 | TEST HEADER @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 90 | TEST HEADER @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 91 | TEST HEADER @var.append_string($0)" @var.append_string($0)" /* @var.append_string($0)*/ // @var.append_string($0) 92 | TEST HEADER @var.save() " @var.save() " /* @var.save() */ // @var.save() 93 | TEST HEADER @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 94 | TEST COMMON $$ " $$ " /* $$ */ // $$ 95 | TEST COMMON $0 " $0 " /* $0 */ // $0 96 | TEST COMMON $0s " $0s " /* $0s */ // $0s 97 | TEST COMMON $0e " $0e " /* $0e */ // $0e 98 | TEST COMMON $123 " $123 " /* $123 */ // $123 99 | TEST COMMON $123s " $123s " /* $123s */ // $123s 100 | TEST COMMON $123e " $123e " /* $123e */ // $123e 101 | TEST COMMON foo1 " foo1 " /* foo1 */ // foo1 102 | TEST COMMON FOO1 " FOO1 " /* FOO1 */ // FOO1 103 | TEST COMMON ${bar} " ${bar} " /* ${bar} */ // ${bar} 104 | TEST COMMON ${prefix} " ${prefix} " /* ${prefix} */ // ${prefix} 105 | TEST COMMON ${PREFIX} " ${PREFIX} " /* ${PREFIX} */ // ${PREFIX} 106 | TEST COMMON @@ " @@ " /* @@ */ // @@ 107 | TEST COMMON @var " @var " /* @var */ // @var 108 | TEST COMMON @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 109 | TEST COMMON @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 110 | TEST COMMON @var.append_string($0) " @var.append_string($0) " /* @var.append_string($0) */ // @var.append_string($0) 111 | TEST COMMON @var.save() " @var.save() " /* @var.save() */ // @var.save() 112 | TEST COMMON @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 113 | TEST COMMON @baz " @baz " /* @baz */ // @baz 114 | TEST COMMON @baz.get_string() " @baz.get_string() " /* @baz.get_string() */ // @baz.get_string() 115 | TEST COMMON @baz.set_string($0) " @baz.set_string($0) " /* @baz.set_string($0) */ // @baz.set_string($0) 116 | TEST COMMON @baz.append_string($0) " @baz.append_string($0) " /* @baz.append_string($0) */ // @baz.append_string($0) 117 | TEST COMMON @baz.save() " @baz.save() " /* @baz.save() */ // @baz.save() 118 | TEST COMMON @baz.restore() " @baz.restore() " /* @baz.restore() */ // @baz.restore() 119 | TEST COMMON @var " @var " /* @var */ // @var 120 | TEST COMMON @var.get_string() " @var.get_string() " /* @var.get_string() */ // @var.get_string() 121 | TEST COMMON @var.set_string($0) " @var.set_string($0) " /* @var.set_string($0) */ // @var.set_string($0) 122 | TEST COMMON @var.append_string($0)" @var.append_string($0)" /* @var.append_string($0)*/ // @var.append_string($0) 123 | TEST COMMON @var.save() " @var.save() " /* @var.save() */ // @var.save() 124 | TEST COMMON @var.restore() " @var.restore() " /* @var.restore() */ // @var.restore() 125 | -------------------------------------------------------------------------------- /tests/substitution.d/subst.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | check_output_parser() { 6 | diff --strip-trailing-cr -uN "${1/parser/expected}.txt" --label "${1/parser/expected}" <(grep '^ *TEST ' < "$1") --label "output" 7 | } 8 | 9 | @test "Testing substitution.d - generation" { 10 | test_generate 11 | } 12 | 13 | @test "Testing substitution.d - header" { 14 | check_output_parser "substitution.d/parser.h" 15 | } 16 | 17 | @test "Testing substitution.d - source" { 18 | check_output_parser "substitution.d/parser.c" 19 | } 20 | -------------------------------------------------------------------------------- /tests/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | generate_bats() { 4 | skip_all="" 5 | if [ -f "$1/input.skip.peg" ]; then 6 | skip_all=$'skip\n' 7 | fi 8 | cat < /dev/null && continue 64 | generate_bats "$DIR" > "$DIR/test.bats" 65 | done 66 | 67 | bats "$@" ./*.d 68 | 69 | if [ -f "packcc.gcda" ]; then 70 | echo "$(gcov packcc | grep "Lines executed") (see $TESTDIR/packcc.c.gcov for details)" 71 | fi 72 | } 73 | 74 | main "$@" 75 | -------------------------------------------------------------------------------- /tests/unused_functions.d/check.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing unused_functions.d - generated" { 6 | for file in "$TESTDIR"/unused_functions.d/*.peg; do 7 | test_generate "$file" 8 | gcc -fsigned-char -Wall -Wextra -Wno-unused-parameter -Wno-overlength-strings -pedantic -Werror \ 9 | -c unused_functions.d/parser.c -o unused_functions.d/parser.o 10 | done 11 | } 12 | -------------------------------------------------------------------------------- /tests/unused_functions.d/test0.peg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arithy/packcc/37fc49cfd1455704349dfe00603051eff5450608/tests/unused_functions.d/test0.peg -------------------------------------------------------------------------------- /tests/unused_functions.d/test1.peg: -------------------------------------------------------------------------------- 1 | %marker @foo 2 | -------------------------------------------------------------------------------- /tests/unused_functions.d/test2.peg: -------------------------------------------------------------------------------- 1 | rule <- &{ @@ = 1; } 2 | -------------------------------------------------------------------------------- /tests/unused_rule_elimination.d/dump.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "$TESTDIR/utils.sh" 4 | 5 | @test "Testing unused_rule_elimination.d - generation" { 6 | run "$PACKCC" --debug -o "unused_rule_elimination.d/parser" "unused_rule_elimination.d/input.peg" 2>&1 7 | check_output "unused_rule_elimination.d/expected.txt" 8 | } 9 | -------------------------------------------------------------------------------- /tests/unused_rule_elimination.d/expected.txt: -------------------------------------------------------------------------------- 1 | Rule(name:'statement', ref:0, vars.len:1, capts.len:0, codes.len:2, preds.len:0) { 2 | Alternate(max:2, len:2) { 3 | Sequence(max:8, len:5) { 4 | Reference(var:'(null)', index:void, name:'_', rule:'_') 5 | Reference(var:'e', index:0, name:'expression', rule:'expression') 6 | Reference(var:'(null)', index:void, name:'_', rule:'_') 7 | Reference(var:'(null)', index:void, name:'EOL', rule:'EOL') 8 | Action(index:0, code:{ printf(\"answer=%d\\n\", e); }, vars: 9 | 'e' 10 | ) 11 | } 12 | Sequence(max:4, len:3) { 13 | Quantity(min:0, max:-1) { 14 | Sequence(max:2, len:2) { 15 | Predicate(neg:1) { 16 | Reference(var:'(null)', index:void, name:'EOL', rule:'EOL') 17 | } 18 | Charclass(value:'null') 19 | } 20 | } 21 | Reference(var:'(null)', index:void, name:'EOL', rule:'EOL') 22 | Action(index:1, code:{ printf(\"error\\n\"); }, vars:none) 23 | } 24 | } 25 | } 26 | Rule(name:'expression', ref:2, vars.len:1, capts.len:0, codes.len:1, preds.len:0) { 27 | Sequence(max:2, len:2) { 28 | Reference(var:'e', index:0, name:'term', rule:'term') 29 | Action(index:0, code:{ $$ = e; }, vars: 30 | 'e' 31 | ) 32 | } 33 | } 34 | Rule(name:'term', ref:3, vars.len:3, capts.len:0, codes.len:3, preds.len:0) { 35 | Alternate(max:4, len:3) { 36 | Sequence(max:8, len:6) { 37 | Reference(var:'l', index:0, name:'term', rule:'term') 38 | Reference(var:'(null)', index:void, name:'_', rule:'_') 39 | String(value:'+') 40 | Reference(var:'(null)', index:void, name:'_', rule:'_') 41 | Reference(var:'r', index:1, name:'factor', rule:'factor') 42 | Action(index:0, code:{ $$ = l + r; }, vars: 43 | 'l' 44 | 'r' 45 | ) 46 | } 47 | Sequence(max:8, len:6) { 48 | Reference(var:'l', index:0, name:'term', rule:'term') 49 | Reference(var:'(null)', index:void, name:'_', rule:'_') 50 | String(value:'-') 51 | Reference(var:'(null)', index:void, name:'_', rule:'_') 52 | Reference(var:'r', index:1, name:'factor', rule:'factor') 53 | Action(index:1, code:{ $$ = l - r; }, vars: 54 | 'l' 55 | 'r' 56 | ) 57 | } 58 | Sequence(max:2, len:2) { 59 | Reference(var:'e', index:2, name:'factor', rule:'factor') 60 | Action(index:2, code:{ $$ = e; }, vars: 61 | 'e' 62 | ) 63 | } 64 | } 65 | } 66 | Rule(name:'factor', ref:5, vars.len:3, capts.len:0, codes.len:3, preds.len:0) { 67 | Alternate(max:4, len:3) { 68 | Sequence(max:8, len:6) { 69 | Reference(var:'l', index:0, name:'factor', rule:'factor') 70 | Reference(var:'(null)', index:void, name:'_', rule:'_') 71 | String(value:'*') 72 | Reference(var:'(null)', index:void, name:'_', rule:'_') 73 | Reference(var:'r', index:1, name:'unary', rule:'unary') 74 | Action(index:0, code:{ $$ = l * r; }, vars: 75 | 'l' 76 | 'r' 77 | ) 78 | } 79 | Sequence(max:8, len:6) { 80 | Reference(var:'l', index:0, name:'factor', rule:'factor') 81 | Reference(var:'(null)', index:void, name:'_', rule:'_') 82 | String(value:'/') 83 | Reference(var:'(null)', index:void, name:'_', rule:'_') 84 | Reference(var:'r', index:1, name:'unary', rule:'unary') 85 | Action(index:1, code:{ $$ = l / r; }, vars: 86 | 'l' 87 | 'r' 88 | ) 89 | } 90 | Sequence(max:2, len:2) { 91 | Reference(var:'e', index:2, name:'unary', rule:'unary') 92 | Action(index:2, code:{ $$ = e; }, vars: 93 | 'e' 94 | ) 95 | } 96 | } 97 | } 98 | Rule(name:'unary', ref:5, vars.len:1, capts.len:0, codes.len:3, preds.len:0) { 99 | Alternate(max:4, len:3) { 100 | Sequence(max:4, len:4) { 101 | String(value:'+') 102 | Reference(var:'(null)', index:void, name:'_', rule:'_') 103 | Reference(var:'e', index:0, name:'unary', rule:'unary') 104 | Action(index:0, code:{ $$ = +e; }, vars: 105 | 'e' 106 | ) 107 | } 108 | Sequence(max:4, len:4) { 109 | String(value:'-') 110 | Reference(var:'(null)', index:void, name:'_', rule:'_') 111 | Reference(var:'e', index:0, name:'unary', rule:'unary') 112 | Action(index:1, code:{ $$ = -e; }, vars: 113 | 'e' 114 | ) 115 | } 116 | Sequence(max:2, len:2) { 117 | Reference(var:'e', index:0, name:'primary', rule:'primary') 118 | Action(index:2, code:{ $$ = e; }, vars: 119 | 'e' 120 | ) 121 | } 122 | } 123 | } 124 | Rule(name:'primary', ref:1, vars.len:1, capts.len:1, codes.len:2, preds.len:0) { 125 | Alternate(max:2, len:2) { 126 | Sequence(max:2, len:2) { 127 | Capture(index:0) { 128 | Quantity(min:1, max:-1) { 129 | Charclass(value:'0-9') 130 | } 131 | } 132 | Action(index:0, code:{ $$ = atoi($1); }, vars: 133 | $1 134 | ) 135 | } 136 | Sequence(max:8, len:6) { 137 | String(value:'(') 138 | Reference(var:'(null)', index:void, name:'_', rule:'_') 139 | Reference(var:'e', index:0, name:'expression', rule:'expression') 140 | Reference(var:'(null)', index:void, name:'_', rule:'_') 141 | String(value:')') 142 | Action(index:1, code:{ $$ = e; }, vars: 143 | 'e' 144 | ) 145 | } 146 | } 147 | } 148 | Rule(name:'_', ref:14, vars.len:0, capts.len:0, codes.len:0, preds.len:0) { 149 | Quantity(min:0, max:-1) { 150 | Charclass(value:' \t') 151 | } 152 | } 153 | Rule(name:'EOL', ref:3, vars.len:0, capts.len:0, codes.len:0, preds.len:0) { 154 | Alternate(max:4, len:4) { 155 | String(value:'\n') 156 | String(value:'\r\n') 157 | String(value:'\r') 158 | String(value:';') 159 | } 160 | } 161 | value_type: 'int' 162 | auxil_type: 'void *' 163 | prefix: 'calc' 164 | -------------------------------------------------------------------------------- /tests/unused_rule_elimination.d/input.peg: -------------------------------------------------------------------------------- 1 | %prefix "calc" 2 | 3 | statement <- _ e:expression _ EOL { printf("answer=%d\n", e); } 4 | / ( !EOL . )* EOL { printf("error\n"); } 5 | 6 | expression <- e:term { $$ = e; } 7 | 8 | unused_statement <- _ e:unused_expression _ unused_EOL { printf("answer=%d\n", e); } 9 | / ( !unused_EOL . )* unused_EOL { printf("error\n"); } 10 | 11 | unused_expression <- e:unused_term { $$ = e; } 12 | 13 | unused_term <- l:unused_term _ '+' _ r:factor { $$ = l + r; } 14 | / l:unused_term _ '-' _ r:factor { $$ = l - r; } 15 | / e:factor { $$ = e; } 16 | 17 | unused_EOL <- '\n' / '\r\n' / '\r' / ';' 18 | 19 | term <- l:term _ '+' _ r:factor { $$ = l + r; } 20 | / l:term _ '-' _ r:factor { $$ = l - r; } 21 | / e:factor { $$ = e; } 22 | 23 | factor <- l:factor _ '*' _ r:unary { $$ = l * r; } 24 | / l:factor _ '/' _ r:unary { $$ = l / r; } 25 | / e:unary { $$ = e; } 26 | 27 | unary <- '+' _ e:unary { $$ = +e; } 28 | / '-' _ e:unary { $$ = -e; } 29 | / e:primary { $$ = e; } 30 | 31 | primary <- < [0-9]+ > { $$ = atoi($1); } 32 | / '(' _ e:expression _ ')' { $$ = e; } 33 | 34 | _ <- [ \t]* 35 | EOL <- '\n' / '\r\n' / '\r' / ';' 36 | -------------------------------------------------------------------------------- /tests/utils.sh: -------------------------------------------------------------------------------- 1 | test_generate () { 2 | (cd "$BATS_TEST_DIRNAME" && "$PACKCC" -I "$ROOTDIR/import" "${PACKCC_OPTS[@]}" -o "parser" "${1:-input.peg}") 3 | } 4 | 5 | test_compile() { 6 | ${CC:-cc} -I "$BATS_TEST_DIRNAME" "main.c" -o "$BATS_TEST_DIRNAME/parser" "$@" 7 | } 8 | 9 | check_output() { 10 | diff --strip-trailing-cr -uN "${1/input/expected}" --label "${1/input/expected}" <(echo "$output") --label "output" 11 | } 12 | 13 | run_for_input() { 14 | run timeout 5s "$BATS_TEST_DIRNAME/parser" < "$1" 15 | check_output "$1" 16 | } 17 | 18 | in_header() { 19 | grep -Fq "$1" "$BATS_TEST_DIRNAME/parser.h" 20 | } 21 | 22 | in_source() { 23 | grep -Fq "$1" "$BATS_TEST_DIRNAME/parser.c" 24 | } 25 | 26 | get_line() { 27 | sed -n "/$1/=" "$BATS_TEST_DIRNAME/$2" | tail -n1 28 | } 29 | --------------------------------------------------------------------------------