├── .editorconfig ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── action.yml ├── annotate.pl ├── fuzzer.c └── ignorelist.ini /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | max_line_length = 120 9 | insert_final_newline = true 10 | trim_trailing_whitespace = true 11 | 12 | [fuzzer.c] 13 | indent_size = 4 14 | 15 | [Makefile] 16 | indent_size = 8 17 | indent_style = tab 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tree-sitter/ 2 | artifacts/ 3 | corpus/ 4 | out/ 5 | fuzzer 6 | dict 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024 tree-sitter contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LANG_NAME ?= 2 | LANG_DIR ?= 3 | TIMEOUT ?= 10 4 | MAX_TIME ?= 60 5 | MAX_LEN ?= 4096 6 | FUZZER_DIR ?= . 7 | CORPUS_DIR ?= 8 | 9 | JQ_FILTER := .. | select((.type? == "STRING" or (.type? == "ALIAS" and .named? == false)) and .value? != "") | .value 10 | 11 | CFLAGS = -Og -g -Wall -Wextra -Wno-unused-but-set-variable 12 | 13 | fuzz: $(FUZZER_DIR)/fuzzer $(FUZZER_DIR)/dict 14 | @mkdir -p $(FUZZER_DIR)/artifacts $(FUZZER_DIR)/out 15 | $( $(FUZZER_DIR)/dict 21 | 22 | $(FUZZER_DIR)/fuzzer: CFLAGS += -fsanitize=fuzzer,address,undefined -fsanitize-ignorelist=ignorelist.ini 23 | $(FUZZER_DIR)/fuzzer: CFLAGS += $(shell pkg-config --cflags --libs tree-sitter) 24 | $(FUZZER_DIR)/fuzzer: fuzzer.c $(LANG_DIR)/src/parser.c $(wildcard $(LANG_DIR)/src/scanner.c) 25 | $(eval LANGUAGE = $(if $(LANG_NAME),tree_sitter_$(LANG_NAME),$(error LANG_NAME must be set))) 26 | clang -std=c11 -I$(LANG_DIR)/src -DTS_LANG=$(LANGUAGE) $(CFLAGS) $^ -o $@ 27 | 28 | .PHONY: 29 | clean: 30 | @rm -rf $(FUZZER_DIR)/artifacts $(FUZZER_DIR)/out $(FUZZER_DIR)/dict $(FUZZER_DIR)/fuzzer 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tree-sitter parser fuzzing 2 | 3 | > [!IMPORTANT] 4 | > This only works on Linux. 5 | 6 | ## Options 7 | 8 | ```yaml 9 | directory: 10 | description: The directory of the grammar 11 | corpus: 12 | description: The directory of the seed corpus 13 | timeout: 14 | description: The time to wait if the fuzzer hangs 15 | default: 10 16 | max-time: 17 | description: The maximum total fuzzing time 18 | default: 60 19 | max-length: 20 | description: The maximum fuzz input length 21 | default: 4096 22 | tree-sitter-version: 23 | description: The tree-sitter version to install 24 | default: latest 25 | ``` 26 | 27 | ## Example configuration 28 | 29 | ```yaml 30 | name: Fuzz parser 31 | 32 | on: 33 | push: 34 | branches: [master] 35 | paths: 36 | - src/scanner.c 37 | pull_request: 38 | paths: 39 | - src/scanner.c 40 | 41 | jobs: 42 | test: 43 | name: Parser fuzzing 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v4 47 | - uses: tree-sitter/fuzz-action@v4 48 | ``` 49 | 50 | ## Using locally 51 | 52 | ### Requirements 53 | 54 | - `pkg-config` 55 | - `make` 56 | - `jq` 57 | - `llvm` 58 | - `tree-sitter` library 59 | 60 | ### Usage 61 | 62 | ```bash 63 | make LANG_NAME=parser LANG_DIR=/path/to/tree-sitter-parser 64 | ``` 65 | 66 | > [!TIP] 67 | > Check the [Makefile](./Makefile) for more options. 68 | 69 | ## Credits 70 | 71 | Based on [vigoux/tree-sitter-fuzz-action](https://github.com/vigoux/tree-sitter-fuzz-action) 72 | -------------------------------------------------------------------------------- /action.yml: -------------------------------------------------------------------------------- 1 | name: Tree-sitter parser fuzzing 2 | description: Fuzzing of tree-sitter parsers using libFuzzer 3 | 4 | branding: 5 | color: green 6 | icon: shuffle 7 | 8 | inputs: 9 | directory: 10 | description: The directory of the grammar 11 | corpus: 12 | description: The directory of the seed corpus 13 | timeout: 14 | description: The time to wait if the fuzzer hangs 15 | default: "10" 16 | max-time: 17 | description: The maximum total fuzzing time 18 | default: "60" 19 | max-length: 20 | description: The maximum fuzz input length 21 | default: "4096" 22 | tree-sitter-version: 23 | description: The tree-sitter version to install 24 | default: latest 25 | 26 | runs: 27 | using: composite 28 | steps: 29 | - name: Install tree-sitter 30 | uses: tree-sitter/setup-action/lib@v1 31 | with: 32 | tree-sitter-ref: ${{inputs.tree-sitter-version}} 33 | - name: Run the fuzzer 34 | id: fuzz 35 | working-directory: ${{github.action_path}} 36 | shell: bash 37 | run: |- 38 | export LANG_DIR="$GITHUB_WORKSPACE${LANG_DIR:+/}$LANG_DIR" 39 | [[ -z $CORPUS_DIR ]] || export CORPUS_DIR="$GITHUB_WORKSPACE/$CORPUS_DIR" 40 | printf '::group::Running the fuzzer\n' 41 | make LANG_NAME=$(jq -r .name $LANG_DIR/src/grammar.json) |& tee >(perl annotate.pl) 42 | printf '::endgroup::\n' 43 | env: 44 | TIMEOUT: ${{inputs.timeout}} 45 | MAX_TIME: ${{inputs.max-time}} 46 | MAX_LEN: ${{inputs.max-length}} 47 | LANG_DIR: ${{inputs.directory}} 48 | CORPUS_DIR: ${{inputs.corpus}} 49 | - name: Upload fuzzer artifacts 50 | uses: actions/upload-artifact@v4 51 | if: failure() && steps.fuzz.outcome == 'failure' 52 | with: 53 | path: ${{github.action_path}}/artifacts/* 54 | name: fuzzer-artifacts-${{inputs.directory}} 55 | retention-days: 7 56 | overwrite: true 57 | -------------------------------------------------------------------------------- /annotate.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use Cwd 'abs_path'; 4 | use File::Spec::Functions 'abs2rel'; 5 | 6 | while (my $line = <>) { 7 | if ($line =~ /runtime error:/) { 8 | $line =~ /(?[^:]+):(?[0-9]+):(?[0-9]+): runtime error: (?.+)/; 9 | my $file = abs2rel(abs_path($+{file}), $ENV{GITHUB_WORKSPACE}); 10 | print "::notice file=$file,line=$+{line},col=$+{col},title=Sanitizer::$+{msg}\n"; 11 | } elsif ($line =~ /SUMMARY: AddressSanitizer: [^0-9]/) { 12 | $line =~ /AddressSanitizer: (?[A-Za-z-]+) (?[^:]+):(?[0-9]+):(?[0-9]+) (?.+)/; 13 | my $msg = join(' ', split('-', $+{id}), $+{msg}); 14 | my $file = abs2rel(abs_path($+{file}), $ENV{GITHUB_WORKSPACE}); 15 | print "::error file=$file,line=$+{line},col=$+{col},title=Sanitizer::$msg\n"; 16 | } elsif ($line =~ /ERROR: LeakSanitizer:/) { 17 | readline STDIN; readline STDIN; 18 | my $line = readline STDIN; 19 | my $line = readline(STDIN) if $line =~ /in __interceptor/; 20 | $line =~ /#1 0x[a-f0-9]+ (?in [A-Za-z0-9_]+) (?[^:]+):(?[0-9]+):(?[0-9]+)/; 21 | my $file = abs2rel(abs_path($+{file}), $ENV{GITHUB_WORKSPACE}); 22 | print "::error file=$file,line=$+{line},col=$+{col},title=Sanitizer::detected memory leak $+{msg}\n"; 23 | } elsif (/ERROR: libFuzzer: out-of-memory/) { 24 | readline STDIN; readline STDIN; readline STDIN; 25 | while ($line =~ /in (__|fuzzer)/) { 26 | my $line = readline STDIN; 27 | } 28 | if ($line =~ /^Live Heap Allocations/) { 29 | print '::error file=src/scanner.c,title=Sanitizer::out of memory (potential infinite loop)\n'; 30 | } else { 31 | $line =~ /#[0-9]+ 0x[a-f0-9]+ (?in [A-Za-z0-9_]+) (?[^:]+):(?[0-9]+):(?[0-9]+)/; 32 | my $file = abs2rel(abs_path($+{file}), $ENV{GITHUB_WORKSPACE}); 33 | print "::error file=$file,line=$+{line},col=$+{col},title=Sanitizer::out of memory $+{msg}\n"; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /fuzzer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifndef TS_LANG 5 | #error TS_LANG must be defined 6 | #endif 7 | 8 | const TSLanguage *TS_LANG(void); 9 | 10 | int LLVMFuzzerTestOneInput(const uint8_t *data, const size_t len) { 11 | // Create a parser. 12 | TSParser *parser = ts_parser_new(); 13 | 14 | // Set the parser's language. 15 | assert(ts_parser_set_language(parser, TS_LANG())); 16 | 17 | // Build a syntax tree based on source code stored in a string. 18 | TSTree *tree = ts_parser_parse_string(parser, NULL, (const char *)data, len); 19 | 20 | // Free all of the heap-allocated memory. 21 | ts_tree_delete(tree); 22 | ts_parser_delete(parser); 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /ignorelist.ini: -------------------------------------------------------------------------------- 1 | [nonnull-attribute] 2 | fun:tree_sitter_*_external_scanner_serialize 3 | fun:tree_sitter_*_external_scanner_deserialize 4 | --------------------------------------------------------------------------------