├── .envrc ├── .gitignore ├── src ├── unicode │ ├── ICU_SHA │ ├── utf.h │ ├── ptypes.h │ ├── urename.h │ ├── README.md │ └── umachine.h ├── ts_assert.h ├── lib.c ├── error_costs.h ├── wasm │ ├── stdlib-symbols.txt │ └── stdlib.c ├── host.h ├── tree.h ├── reduce_action.h ├── get_changed_ranges.h ├── alloc.h ├── wasm_store.h ├── tree_cursor.h ├── length.h ├── lexer.h ├── point.h ├── atomic.h ├── alloc.c ├── unicode.h ├── reusable_node.h ├── clock.h ├── stack.h ├── tree.c ├── portable │ └── endian.h ├── parser.h ├── language.h ├── language.c ├── array.h ├── subtree.h ├── lexer.c └── get_changed_ranges.c ├── .gitmodules ├── allocator.h ├── logger.go ├── dup_unix.go ├── copy.sh ├── tree_sitter.go ├── dup_windows.go ├── allocator.c ├── .github ├── dependabot.yml ├── FUNDING.yml └── workflows │ ├── ci.yml │ └── copy.yml ├── flake.lock ├── point.go ├── edit.go ├── language_test.go ├── go.mod ├── LICENSE ├── ranges.go ├── tree_cursor_test.go ├── lookahead_iterator_test.go ├── flake.nix ├── lookahead_iterator.go ├── README.md ├── go.sum ├── allocator.go ├── edit_test.go ├── tree.go ├── language.go ├── tree_cursor.go └── node.go /.envrc: -------------------------------------------------------------------------------- 1 | use flake 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .direnv 2 | -------------------------------------------------------------------------------- /src/unicode/ICU_SHA: -------------------------------------------------------------------------------- 1 | 552b01f61127d30d6589aa4bf99468224979b661 2 | -------------------------------------------------------------------------------- /src/unicode/utf.h: -------------------------------------------------------------------------------- 1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used. 2 | -------------------------------------------------------------------------------- /src/unicode/ptypes.h: -------------------------------------------------------------------------------- 1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used. 2 | -------------------------------------------------------------------------------- /src/unicode/urename.h: -------------------------------------------------------------------------------- 1 | // This file must exist in order for `utf8.h` and `utf16.h` to be used. 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tree-sitter"] 2 | path = tree-sitter 3 | url = https://github.com/tree-sitter/tree-sitter 4 | -------------------------------------------------------------------------------- /allocator.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void *c_malloc_fn(size_t size); 4 | 5 | void *c_calloc_fn(size_t num, size_t size); 6 | 7 | void *c_realloc_fn(void *ptr, size_t size); 8 | 9 | void c_free_fn(void *ptr); 10 | -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | type LogType int 4 | 5 | const ( 6 | LogTypeParse LogType = iota 7 | LogTypeLex 8 | ) 9 | 10 | // A callback that receives log messages during parser. 11 | type Logger = func(LogType, string) 12 | -------------------------------------------------------------------------------- /dup_unix.go: -------------------------------------------------------------------------------- 1 | //go:build linux || darwin 2 | 3 | package tree_sitter 4 | 5 | /* 6 | #include 7 | */ 8 | import "C" 9 | 10 | // Wrapper for Unix systems 11 | func dupeFD(fd uintptr) int { 12 | return int(C.dup(C.int(fd))) 13 | } 14 | -------------------------------------------------------------------------------- /src/ts_assert.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ASSERT_H_ 2 | #define TREE_SITTER_ASSERT_H_ 3 | 4 | #ifdef NDEBUG 5 | #define ts_assert(e) ((void)(e)) 6 | #else 7 | #include 8 | #define ts_assert(e) assert(e) 9 | #endif 10 | 11 | #endif // TREE_SITTER_ASSERT_H_ 12 | -------------------------------------------------------------------------------- /copy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SRC_DIR="tree-sitter/lib" 4 | 5 | if [ ! -d "$SRC_DIR/src" ] || [ ! -d "$SRC_DIR/include" ]; then 6 | echo "Error: source directories do not exist." 7 | exit 1 8 | fi 9 | 10 | cp -r "$SRC_DIR/src/" "." 11 | cp -r "$SRC_DIR/include/" "." 12 | -------------------------------------------------------------------------------- /tree_sitter.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | #include "lib.c" // <- This is needed to build the C library from the C source code, but cannot be included in files that have other declarations. 7 | */ 8 | import "C" 9 | -------------------------------------------------------------------------------- /src/lib.c: -------------------------------------------------------------------------------- 1 | #include "./alloc.c" 2 | #include "./get_changed_ranges.c" 3 | #include "./language.c" 4 | #include "./lexer.c" 5 | #include "./node.c" 6 | #include "./parser.c" 7 | #include "./query.c" 8 | #include "./stack.c" 9 | #include "./subtree.c" 10 | #include "./tree_cursor.c" 11 | #include "./tree.c" 12 | #include "./wasm_store.c" 13 | -------------------------------------------------------------------------------- /src/error_costs.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ERROR_COSTS_H_ 2 | #define TREE_SITTER_ERROR_COSTS_H_ 3 | 4 | #define ERROR_STATE 0 5 | #define ERROR_COST_PER_RECOVERY 500 6 | #define ERROR_COST_PER_MISSING_TREE 110 7 | #define ERROR_COST_PER_SKIPPED_TREE 100 8 | #define ERROR_COST_PER_SKIPPED_LINE 30 9 | #define ERROR_COST_PER_SKIPPED_CHAR 1 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /dup_windows.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package tree_sitter 4 | 5 | /* 6 | #include 7 | HANDLE _ts_dup(HANDLE handle); 8 | */ 9 | import "C" 10 | import "unsafe" 11 | 12 | // Wrapper for Windows systems 13 | func dupeFD(handle uintptr) uintptr { 14 | hHandle := C.HANDLE(unsafe.Pointer(handle)) 15 | return uintptr(unsafe.Pointer(C._ts_dup(hHandle))) 16 | } 17 | -------------------------------------------------------------------------------- /src/wasm/stdlib-symbols.txt: -------------------------------------------------------------------------------- 1 | "calloc", 2 | "free", 3 | "iswalnum", 4 | "iswalpha", 5 | "iswblank", 6 | "iswdigit", 7 | "iswlower", 8 | "iswspace", 9 | "iswupper", 10 | "iswxdigit", 11 | "malloc", 12 | "memchr", 13 | "memcmp", 14 | "memcpy", 15 | "memmove", 16 | "memset", 17 | "realloc", 18 | "strcmp", 19 | "strlen", 20 | "strncat", 21 | "strncmp", 22 | "strncpy", 23 | "towlower", 24 | "towupper", 25 | -------------------------------------------------------------------------------- /allocator.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern void *go_malloc(size_t size); 4 | extern void *go_calloc(size_t num, size_t size); 5 | extern void *go_realloc(void *ptr, size_t size); 6 | extern void go_free(void *ptr); 7 | 8 | void *c_malloc_fn(size_t size) { return go_malloc(size); } 9 | 10 | void *c_calloc_fn(size_t num, size_t size) { return go_calloc(num, size); } 11 | 12 | void *c_realloc_fn(void *ptr, size_t size) { return go_realloc(ptr, size); } 13 | 14 | void c_free_fn(void *ptr) { go_free(ptr); } 15 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | day: saturday 8 | commit-message: 9 | prefix: ci 10 | groups: 11 | actions: 12 | patterns: ["*"] 13 | labels: [dependencies] 14 | open-pull-requests-limit: 1 15 | - package-ecosystem: gitsubmodule 16 | directory: / 17 | schedule: 18 | interval: weekly 19 | day: sunday 20 | commit-message: 21 | prefix: build 22 | labels: [dependencies] 23 | open-pull-requests-limit: 1 24 | -------------------------------------------------------------------------------- /src/host.h: -------------------------------------------------------------------------------- 1 | 2 | // Determine endian and pointer size based on known defines. 3 | // TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments 4 | // to override this. 5 | 6 | #if !defined(TS_BIG_ENDIAN) 7 | #if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ 8 | || (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__))) 9 | #define TS_BIG_ENDIAN 1 10 | #else 11 | #define TS_BIG_ENDIAN 0 12 | #endif 13 | #endif 14 | 15 | #if !defined(TS_PTR_SIZE) 16 | #if UINTPTR_MAX == 0xFFFFFFFF 17 | #define TS_PTR_SIZE 32 18 | #else 19 | #define TS_PTR_SIZE 64 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "nixpkgs": { 4 | "locked": { 5 | "lastModified": 1762596750, 6 | "narHash": "sha256-rXXuz51Bq7DHBlfIjN7jO8Bu3du5TV+3DSADBX7/9YQ=", 7 | "owner": "NixOS", 8 | "repo": "nixpkgs", 9 | "rev": "b6a8526db03f735b89dd5ff348f53f752e7ddc8e", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "NixOS", 14 | "ref": "nixos-unstable", 15 | "repo": "nixpkgs", 16 | "type": "github" 17 | } 18 | }, 19 | "root": { 20 | "inputs": { 21 | "nixpkgs": "nixpkgs" 22 | } 23 | } 24 | }, 25 | "root": "root", 26 | "version": 7 27 | } 28 | -------------------------------------------------------------------------------- /point.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | 9 | // A position in a multi-line text document, in terms of rows and columns. 10 | // 11 | // Rows and columns are zero-based. 12 | type Point struct { 13 | Row uint 14 | Column uint 15 | } 16 | 17 | func NewPoint(row, column uint) Point { 18 | return Point{Row: row, Column: column} 19 | } 20 | 21 | func (p *Point) toTSPoint() C.TSPoint { 22 | return C.TSPoint{ 23 | row: C.uint32_t(p.Row), 24 | column: C.uint32_t(p.Column), 25 | } 26 | } 27 | 28 | func (p *Point) fromTSPoint(tp C.TSPoint) { 29 | p.Row = uint(tp.row) 30 | p.Column = uint(tp.column) 31 | } 32 | -------------------------------------------------------------------------------- /edit.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | 9 | type InputEdit struct { 10 | StartByte uint 11 | OldEndByte uint 12 | NewEndByte uint 13 | StartPosition Point 14 | OldEndPosition Point 15 | NewEndPosition Point 16 | } 17 | 18 | func (i *InputEdit) toTSInputEdit() *C.TSInputEdit { 19 | return &C.TSInputEdit{ 20 | start_byte: C.uint(i.StartByte), 21 | old_end_byte: C.uint(i.OldEndByte), 22 | new_end_byte: C.uint(i.NewEndByte), 23 | start_point: i.StartPosition.toTSPoint(), 24 | old_end_point: i.OldEndPosition.toTSPoint(), 25 | new_end_point: i.NewEndPosition.toTSPoint(), 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_TREE_H_ 2 | #define TREE_SITTER_TREE_H_ 3 | 4 | #include "./subtree.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | const Subtree *child; 12 | const Subtree *parent; 13 | Length position; 14 | TSSymbol alias_symbol; 15 | } ParentCacheEntry; 16 | 17 | struct TSTree { 18 | Subtree root; 19 | const TSLanguage *language; 20 | TSRange *included_ranges; 21 | unsigned included_range_count; 22 | }; 23 | 24 | TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count); 25 | TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, TSSymbol alias); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif // TREE_SITTER_TREE_H_ 32 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: tree-sitter 4 | patreon: # Replace with a single Patreon username 5 | open_collective: tree-sitter # Replace with a single Open Collective username 6 | ko_fi: amaanq 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 12 | polar: # Replace with a single Polar username 13 | buy_me_a_coffee: # Replace with a single Buy Me a Coffee username 14 | thanks_dev: # Replace with a single thanks.dev username 15 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 16 | -------------------------------------------------------------------------------- /src/reduce_action.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_REDUCE_ACTION_H_ 2 | #define TREE_SITTER_REDUCE_ACTION_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./array.h" 9 | #include "tree_sitter/api.h" 10 | 11 | typedef struct { 12 | uint32_t count; 13 | TSSymbol symbol; 14 | int dynamic_precedence; 15 | unsigned short production_id; 16 | } ReduceAction; 17 | 18 | typedef Array(ReduceAction) ReduceActionSet; 19 | 20 | static inline void ts_reduce_action_set_add(ReduceActionSet *self, 21 | ReduceAction new_action) { 22 | for (uint32_t i = 0; i < self->size; i++) { 23 | ReduceAction action = self->contents[i]; 24 | if (action.symbol == new_action.symbol && action.count == new_action.count) 25 | return; 26 | } 27 | array_push(self, new_action); 28 | } 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif // TREE_SITTER_REDUCE_ACTION_H_ 35 | -------------------------------------------------------------------------------- /language_test.go: -------------------------------------------------------------------------------- 1 | package tree_sitter_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestSymbolMetadataChecks(t *testing.T) { 10 | language := getLanguage("rust") 11 | for id := range language.NodeKindCount() { 12 | name := language.NodeKindForId(uint16(id)) 13 | 14 | switch name { 15 | case "_type", "_expression", "_pattern", "_literal", "_literal_pattern", "_declaration_statement": 16 | assert.True(t, language.NodeKindIsSupertype(uint16(id))) 17 | 18 | case "_raw_string_literal_start", "_raw_string_literal_end", "_line_doc_comment", "_error_sentinel": 19 | assert.False(t, language.NodeKindIsSupertype(uint16(id))) 20 | 21 | case "enum_item", "struct_item", "type_item": 22 | assert.True(t, language.NodeKindIsNamed(uint16(id))) 23 | 24 | case "=>", "[", "]", "(", ")", "{", "}": 25 | assert.True(t, language.NodeKindIsVisible(uint16(id))) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tree-sitter/go-tree-sitter 2 | 3 | go 1.23 4 | 5 | require ( 6 | github.com/mattn/go-pointer v0.0.1 7 | github.com/stretchr/testify v1.10.0 8 | github.com/tree-sitter/tree-sitter-c v0.23.4 9 | github.com/tree-sitter/tree-sitter-cpp v0.23.4 10 | github.com/tree-sitter/tree-sitter-embedded-template v0.23.2 11 | github.com/tree-sitter/tree-sitter-go v0.23.4 12 | github.com/tree-sitter/tree-sitter-html v0.23.2 13 | github.com/tree-sitter/tree-sitter-java v0.23.5 14 | github.com/tree-sitter/tree-sitter-javascript v0.23.1 15 | github.com/tree-sitter/tree-sitter-json v0.24.8 16 | github.com/tree-sitter/tree-sitter-php v0.23.11 17 | github.com/tree-sitter/tree-sitter-python v0.23.6 18 | github.com/tree-sitter/tree-sitter-ruby v0.23.1 19 | github.com/tree-sitter/tree-sitter-rust v0.23.2 20 | ) 21 | 22 | require ( 23 | github.com/davecgh/go-spew v1.1.1 // indirect 24 | github.com/pmezard/go-difflib v1.0.0 // indirect 25 | gopkg.in/yaml.v3 v3.0.1 // indirect 26 | ) 27 | -------------------------------------------------------------------------------- /src/get_changed_ranges.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_GET_CHANGED_RANGES_H_ 2 | #define TREE_SITTER_GET_CHANGED_RANGES_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./tree_cursor.h" 9 | #include "./subtree.h" 10 | 11 | typedef Array(TSRange) TSRangeArray; 12 | 13 | void ts_range_array_get_changed_ranges( 14 | const TSRange *old_ranges, unsigned old_range_count, 15 | const TSRange *new_ranges, unsigned new_range_count, 16 | TSRangeArray *differences 17 | ); 18 | 19 | bool ts_range_array_intersects( 20 | const TSRangeArray *self, unsigned start_index, 21 | uint32_t start_byte, uint32_t end_byte 22 | ); 23 | 24 | unsigned ts_subtree_get_changed_ranges( 25 | const Subtree *old_tree, const Subtree *new_tree, 26 | TreeCursor *cursor1, TreeCursor *cursor2, 27 | const TSLanguage *language, 28 | const TSRangeArray *included_range_differences, 29 | TSRange **ranges 30 | ); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | 36 | #endif // TREE_SITTER_GET_CHANGED_RANGES_H_ 37 | -------------------------------------------------------------------------------- /src/alloc.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ALLOC_H_ 2 | #define TREE_SITTER_ALLOC_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) 13 | #define TS_PUBLIC 14 | #else 15 | #define TS_PUBLIC __attribute__((visibility("default"))) 16 | #endif 17 | 18 | TS_PUBLIC extern void *(*ts_current_malloc)(size_t size); 19 | TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size); 20 | TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size); 21 | TS_PUBLIC extern void (*ts_current_free)(void *ptr); 22 | 23 | // Allow clients to override allocation functions 24 | #ifndef ts_malloc 25 | #define ts_malloc ts_current_malloc 26 | #endif 27 | #ifndef ts_calloc 28 | #define ts_calloc ts_current_calloc 29 | #endif 30 | #ifndef ts_realloc 31 | #define ts_realloc ts_current_realloc 32 | #endif 33 | #ifndef ts_free 34 | #define ts_free ts_current_free 35 | #endif 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif 40 | 41 | #endif // TREE_SITTER_ALLOC_H_ 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2024 Amaan Qureshi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ranges.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | import "fmt" 9 | 10 | // A range of positions in a multi-line text document, both in terms of bytes 11 | // and of rows and columns. 12 | type Range struct { 13 | StartByte uint 14 | EndByte uint 15 | StartPoint Point 16 | EndPoint Point 17 | } 18 | 19 | // An error that occurred in [Parser.SetIncludedRanges]. 20 | type IncludedRangesError struct { 21 | Index uint32 22 | } 23 | 24 | func (r *Range) ToTSRange() C.TSRange { 25 | return C.TSRange{ 26 | start_byte: C.uint32_t(r.StartByte), 27 | end_byte: C.uint32_t(r.EndByte), 28 | start_point: r.StartPoint.toTSPoint(), 29 | end_point: r.EndPoint.toTSPoint(), 30 | } 31 | } 32 | 33 | func (r *Range) FromTSRange(tr C.TSRange) { 34 | r.StartByte = uint(tr.start_byte) 35 | r.EndByte = uint(tr.end_byte) 36 | r.StartPoint.fromTSPoint(tr.start_point) 37 | r.EndPoint.fromTSPoint(tr.end_point) 38 | } 39 | 40 | func (i *IncludedRangesError) Error() string { 41 | return fmt.Sprintf("Incorrect range by index: %d", i.Index) 42 | } 43 | -------------------------------------------------------------------------------- /src/wasm_store.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_WASM_H_ 2 | #define TREE_SITTER_WASM_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "tree_sitter/api.h" 9 | #include "./parser.h" 10 | 11 | bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language); 12 | void ts_wasm_store_reset(TSWasmStore *self); 13 | bool ts_wasm_store_has_error(const TSWasmStore *self); 14 | 15 | bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state); 16 | bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state); 17 | 18 | uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self); 19 | void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address); 20 | bool ts_wasm_store_call_scanner_scan(TSWasmStore *self, uint32_t scanner_address, uint32_t valid_tokens_ix); 21 | uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *self, uint32_t scanner_address, char *buffer); 22 | void ts_wasm_store_call_scanner_deserialize(TSWasmStore *self, uint32_t scanner, const char *buffer, unsigned length); 23 | 24 | void ts_wasm_language_retain(const TSLanguage *self); 25 | void ts_wasm_language_release(const TSLanguage *self); 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | 31 | #endif // TREE_SITTER_WASM_H_ 32 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | 8 | jobs: 9 | test: 10 | strategy: 11 | matrix: 12 | os: [ubuntu-latest, macos-latest, windows-latest] 13 | runs-on: ${{ matrix.os }} 14 | 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | with: 19 | submodules: "recursive" 20 | 21 | - name: Set up Go 22 | uses: actions/setup-go@v5 23 | with: 24 | go-version: "1.23" 25 | 26 | - name: Build 27 | run: go build -v ./... 28 | 29 | - name: Test 30 | run: go test -v ./... 31 | 32 | nix: 33 | strategy: 34 | matrix: 35 | os: [ubuntu-latest, macos-latest] 36 | runs-on: ${{ matrix.os }} 37 | 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | with: 42 | submodules: "recursive" 43 | 44 | - name: Set up Nix 45 | uses: DeterminateSystems/nix-installer-action@main 46 | 47 | - name: Set up Nix Cache 48 | uses: DeterminateSystems/magic-nix-cache-action@main 49 | with: 50 | use-flakehub: false 51 | 52 | - name: Build and test with Nix 53 | run: nix build 54 | -------------------------------------------------------------------------------- /src/unicode/README.md: -------------------------------------------------------------------------------- 1 | # ICU Parts 2 | 3 | This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu). 4 | 5 | ### License 6 | 7 | The license for these files is contained in the `LICENSE` file within this directory. 8 | 9 | ### Contents 10 | 11 | * Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory: 12 | * `utf8.h` 13 | * `utf16.h` 14 | * `umachine.h` 15 | * Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed: 16 | * `ptypes.h` 17 | * `urename.h` 18 | * `utf.h` 19 | * `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained. 20 | * `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository. 21 | * `README.md` - This text file. 22 | 23 | ### Updating ICU 24 | 25 | To incorporate changes from the upstream `icu` repository: 26 | 27 | * Update `ICU_SHA` with the new Git SHA. 28 | * Update `LICENSE` with the license text from the directory mentioned above. 29 | * Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository. 30 | -------------------------------------------------------------------------------- /src/tree_cursor.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_TREE_CURSOR_H_ 2 | #define TREE_SITTER_TREE_CURSOR_H_ 3 | 4 | #include "./subtree.h" 5 | 6 | typedef struct { 7 | const Subtree *subtree; 8 | Length position; 9 | uint32_t child_index; 10 | uint32_t structural_child_index; 11 | uint32_t descendant_index; 12 | } TreeCursorEntry; 13 | 14 | typedef struct { 15 | const TSTree *tree; 16 | Array(TreeCursorEntry) stack; 17 | TSSymbol root_alias_symbol; 18 | } TreeCursor; 19 | 20 | typedef enum { 21 | TreeCursorStepNone, 22 | TreeCursorStepHidden, 23 | TreeCursorStepVisible, 24 | } TreeCursorStep; 25 | 26 | void ts_tree_cursor_init(TreeCursor *self, TSNode node); 27 | void ts_tree_cursor_current_status( 28 | const TSTreeCursor *_self, 29 | TSFieldId *field_id, 30 | bool *has_later_siblings, 31 | bool *has_later_named_siblings, 32 | bool *can_have_later_siblings_with_this_field, 33 | TSSymbol *supertypes, 34 | unsigned *supertype_count 35 | ); 36 | 37 | TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self); 38 | TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self); 39 | 40 | static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) { 41 | const TreeCursor *self = (const TreeCursor *)_self; 42 | TreeCursorEntry *last_entry = array_back(&self->stack); 43 | return *last_entry->subtree; 44 | } 45 | 46 | TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self); 47 | 48 | #endif // TREE_SITTER_TREE_CURSOR_H_ 49 | -------------------------------------------------------------------------------- /src/length.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_LENGTH_H_ 2 | #define TREE_SITTER_LENGTH_H_ 3 | 4 | #include 5 | #include 6 | #include "./point.h" 7 | #include "tree_sitter/api.h" 8 | 9 | typedef struct { 10 | uint32_t bytes; 11 | TSPoint extent; 12 | } Length; 13 | 14 | static const Length LENGTH_UNDEFINED = {0, {0, 1}}; 15 | static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}}; 16 | 17 | static inline bool length_is_undefined(Length length) { 18 | return length.bytes == 0 && length.extent.column != 0; 19 | } 20 | 21 | static inline Length length_min(Length len1, Length len2) { 22 | return (len1.bytes < len2.bytes) ? len1 : len2; 23 | } 24 | 25 | static inline Length length_add(Length len1, Length len2) { 26 | Length result; 27 | result.bytes = len1.bytes + len2.bytes; 28 | result.extent = point_add(len1.extent, len2.extent); 29 | return result; 30 | } 31 | 32 | static inline Length length_sub(Length len1, Length len2) { 33 | Length result; 34 | result.bytes = (len1.bytes >= len2.bytes) ? len1.bytes - len2.bytes : 0; 35 | result.extent = point_sub(len1.extent, len2.extent); 36 | return result; 37 | } 38 | 39 | static inline Length length_zero(void) { 40 | Length result = {0, {0, 0}}; 41 | return result; 42 | } 43 | 44 | static inline Length length_saturating_sub(Length len1, Length len2) { 45 | if (len1.bytes > len2.bytes) { 46 | return length_sub(len1, len2); 47 | } else { 48 | return length_zero(); 49 | } 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /src/lexer.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_LEXER_H_ 2 | #define TREE_SITTER_LEXER_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./length.h" 9 | #include "./subtree.h" 10 | #include "tree_sitter/api.h" 11 | #include "./parser.h" 12 | 13 | typedef struct { 14 | uint32_t value; 15 | bool valid; 16 | } ColumnData; 17 | 18 | typedef struct { 19 | TSLexer data; 20 | Length current_position; 21 | Length token_start_position; 22 | Length token_end_position; 23 | 24 | TSRange *included_ranges; 25 | const char *chunk; 26 | TSInput input; 27 | TSLogger logger; 28 | 29 | uint32_t included_range_count; 30 | uint32_t current_included_range_index; 31 | uint32_t chunk_start; 32 | uint32_t chunk_size; 33 | uint32_t lookahead_size; 34 | bool did_get_column; 35 | ColumnData column_data; 36 | 37 | char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; 38 | } Lexer; 39 | 40 | void ts_lexer_init(Lexer *self); 41 | void ts_lexer_delete(Lexer *self); 42 | void ts_lexer_set_input(Lexer *self, TSInput input); 43 | void ts_lexer_reset(Lexer *self, Length position); 44 | void ts_lexer_start(Lexer *self); 45 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte); 46 | void ts_lexer_mark_end(Lexer *self); 47 | bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count); 48 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count); 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | #endif // TREE_SITTER_LEXER_H_ 55 | -------------------------------------------------------------------------------- /src/point.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_POINT_H_ 2 | #define TREE_SITTER_POINT_H_ 3 | 4 | #include "tree_sitter/api.h" 5 | 6 | #define POINT_ZERO ((TSPoint) {0, 0}) 7 | #define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX}) 8 | 9 | static inline TSPoint point__new(unsigned row, unsigned column) { 10 | TSPoint result = {row, column}; 11 | return result; 12 | } 13 | 14 | static inline TSPoint point_add(TSPoint a, TSPoint b) { 15 | if (b.row > 0) 16 | return point__new(a.row + b.row, b.column); 17 | else 18 | return point__new(a.row, a.column + b.column); 19 | } 20 | 21 | static inline TSPoint point_sub(TSPoint a, TSPoint b) { 22 | if (a.row > b.row) 23 | return point__new(a.row - b.row, a.column); 24 | else 25 | return point__new(0, (a.column >= b.column) ? a.column - b.column : 0); 26 | } 27 | 28 | static inline bool point_lte(TSPoint a, TSPoint b) { 29 | return (a.row < b.row) || (a.row == b.row && a.column <= b.column); 30 | } 31 | 32 | static inline bool point_lt(TSPoint a, TSPoint b) { 33 | return (a.row < b.row) || (a.row == b.row && a.column < b.column); 34 | } 35 | 36 | static inline bool point_gt(TSPoint a, TSPoint b) { 37 | return (a.row > b.row) || (a.row == b.row && a.column > b.column); 38 | } 39 | 40 | static inline bool point_gte(TSPoint a, TSPoint b) { 41 | return (a.row > b.row) || (a.row == b.row && a.column >= b.column); 42 | } 43 | 44 | static inline bool point_eq(TSPoint a, TSPoint b) { 45 | return a.row == b.row && a.column == b.column; 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /tree_cursor_test.go: -------------------------------------------------------------------------------- 1 | package tree_sitter_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | . "github.com/tree-sitter/go-tree-sitter" 7 | tree_sitter_go "github.com/tree-sitter/tree-sitter-go/bindings/go" 8 | ) 9 | 10 | func ExampleTreeCursor() { 11 | parser := NewParser() 12 | defer parser.Close() 13 | 14 | language := NewLanguage(tree_sitter_go.Language()) 15 | 16 | parser.SetLanguage(language) 17 | 18 | tree := parser.Parse( 19 | []byte(` 20 | package main 21 | 22 | 23 | func main() { 24 | return 25 | } 26 | `), 27 | nil, 28 | ) 29 | defer tree.Close() 30 | 31 | cursor := tree.Walk() 32 | defer cursor.Close() 33 | 34 | fmt.Println(cursor.Node().Kind()) 35 | 36 | fmt.Println(cursor.GotoFirstChild()) 37 | fmt.Println(cursor.Node().Kind()) 38 | 39 | fmt.Println(cursor.GotoFirstChild()) 40 | fmt.Println(cursor.Node().Kind()) 41 | 42 | // Returns `false` because the `package` node has no children 43 | fmt.Println(cursor.GotoFirstChild()) 44 | 45 | fmt.Println(cursor.GotoNextSibling()) 46 | fmt.Println(cursor.Node().Kind()) 47 | 48 | fmt.Println(cursor.GotoParent()) 49 | fmt.Println(cursor.Node().Kind()) 50 | 51 | fmt.Println(cursor.GotoNextSibling()) 52 | fmt.Println(cursor.GotoNextSibling()) 53 | fmt.Println(cursor.Node().Kind()) 54 | 55 | // Output: 56 | // source_file 57 | // true 58 | // package_clause 59 | // true 60 | // package 61 | // false 62 | // true 63 | // package_identifier 64 | // true 65 | // package_clause 66 | // true 67 | // false 68 | // function_declaration 69 | } 70 | -------------------------------------------------------------------------------- /.github/workflows/copy.yml: -------------------------------------------------------------------------------- 1 | name: Copy and Sync Tree-Sitter Files 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * 0" # weekly 6 | workflow_dispatch: 7 | 8 | jobs: 9 | sync: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | with: 16 | submodules: "recursive" 17 | 18 | - name: Run copy script 19 | run: ./copy.sh 20 | 21 | - name: Update submodule & Verify no changes 22 | run: | 23 | cd tree-sitter 24 | git fetch 25 | git checkout release-0.25 26 | cd .. 27 | git submodule update --remote 28 | git diff --exit-code || echo "Changes found" 29 | 30 | - name: Commit new changes and create PR 31 | if: ${{ failure() }} 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | run: | 35 | git config user.name "github-actions[bot]" 36 | git config user.email "github-actions[bot]@users.noreply.github.com" 37 | git branch -D auto-sync-tree-sitter || true 38 | git push origin --delete auto-sync-tree-sitter || true 39 | git checkout -b auto-sync-tree-sitter 40 | git add src include tree-sitter 41 | git commit -m "chore(auto-sync): update core tree-sitter library" 42 | gh pr create --title "chore(auto-sync): update core tree-sitter library" --body "This PR was automatically generated by the GitHub Actions workflow to update the core tree-sitter library." --base master --head auto-sync-tree-sitter 43 | -------------------------------------------------------------------------------- /src/atomic.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ATOMIC_H_ 2 | #define TREE_SITTER_ATOMIC_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef __TINYC__ 9 | 10 | static inline size_t atomic_load(const volatile size_t *p) { 11 | return *p; 12 | } 13 | 14 | static inline uint32_t atomic_inc(volatile uint32_t *p) { 15 | *p += 1; 16 | return *p; 17 | } 18 | 19 | static inline uint32_t atomic_dec(volatile uint32_t *p) { 20 | *p-= 1; 21 | return *p; 22 | } 23 | 24 | #elif defined(_WIN32) 25 | 26 | #include 27 | 28 | static inline size_t atomic_load(const volatile size_t *p) { 29 | return *p; 30 | } 31 | 32 | static inline uint32_t atomic_inc(volatile uint32_t *p) { 33 | return InterlockedIncrement((long volatile *)p); 34 | } 35 | 36 | static inline uint32_t atomic_dec(volatile uint32_t *p) { 37 | return InterlockedDecrement((long volatile *)p); 38 | } 39 | 40 | #else 41 | 42 | static inline size_t atomic_load(const volatile size_t *p) { 43 | #ifdef __ATOMIC_RELAXED 44 | return __atomic_load_n(p, __ATOMIC_RELAXED); 45 | #else 46 | return __sync_fetch_and_add((volatile size_t *)p, 0); 47 | #endif 48 | } 49 | 50 | static inline uint32_t atomic_inc(volatile uint32_t *p) { 51 | #ifdef __ATOMIC_RELAXED 52 | return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST); 53 | #else 54 | return __sync_add_and_fetch(p, 1U); 55 | #endif 56 | } 57 | 58 | static inline uint32_t atomic_dec(volatile uint32_t *p) { 59 | #ifdef __ATOMIC_RELAXED 60 | return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST); 61 | #else 62 | return __sync_sub_and_fetch(p, 1U); 63 | #endif 64 | } 65 | 66 | #endif 67 | 68 | #endif // TREE_SITTER_ATOMIC_H_ 69 | -------------------------------------------------------------------------------- /src/alloc.c: -------------------------------------------------------------------------------- 1 | #include "alloc.h" 2 | #include "tree_sitter/api.h" 3 | #include 4 | 5 | static void *ts_malloc_default(size_t size) { 6 | void *result = malloc(size); 7 | if (size > 0 && !result) { 8 | fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size); 9 | abort(); 10 | } 11 | return result; 12 | } 13 | 14 | static void *ts_calloc_default(size_t count, size_t size) { 15 | void *result = calloc(count, size); 16 | if (count > 0 && !result) { 17 | fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size); 18 | abort(); 19 | } 20 | return result; 21 | } 22 | 23 | static void *ts_realloc_default(void *buffer, size_t size) { 24 | void *result = realloc(buffer, size); 25 | if (size > 0 && !result) { 26 | fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size); 27 | abort(); 28 | } 29 | return result; 30 | } 31 | 32 | // Allow clients to override allocation functions dynamically 33 | TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default; 34 | TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default; 35 | TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default; 36 | TS_PUBLIC void (*ts_current_free)(void *) = free; 37 | 38 | void ts_set_allocator( 39 | void *(*new_malloc)(size_t size), 40 | void *(*new_calloc)(size_t count, size_t size), 41 | void *(*new_realloc)(void *ptr, size_t size), 42 | void (*new_free)(void *ptr) 43 | ) { 44 | ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default; 45 | ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default; 46 | ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default; 47 | ts_current_free = new_free ? new_free : free; 48 | } 49 | -------------------------------------------------------------------------------- /lookahead_iterator_test.go: -------------------------------------------------------------------------------- 1 | package tree_sitter_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | . "github.com/tree-sitter/go-tree-sitter" 8 | ) 9 | 10 | func TestLookaheadIterator(t *testing.T) { 11 | parser := NewParser() 12 | defer parser.Close() 13 | language := getLanguage("rust") 14 | parser.SetLanguage(language) 15 | 16 | tree := parser.Parse([]byte("struct Stuff {}"), nil) 17 | defer tree.Close() 18 | assert.NotNil(t, tree) 19 | 20 | cursor := tree.Walk() 21 | 22 | assert.True(t, cursor.GotoFirstChild()) // struct 23 | assert.True(t, cursor.GotoFirstChild()) // struct keyword 24 | 25 | nextState := cursor.Node().NextParseState() 26 | assert.NotEqual(t, 0, nextState) 27 | assert.Equal(t, nextState, language.NextState(cursor.Node().ParseState(), cursor.Node().GrammarId())) 28 | assert.True(t, uint(nextState) < uint(language.ParseStateCount())) 29 | assert.True(t, cursor.GotoNextSibling()) // type_identifier 30 | assert.Equal(t, nextState, cursor.Node().ParseState()) 31 | assert.Equal(t, cursor.Node().GrammarName(), "identifier") 32 | assert.NotEqual(t, cursor.Node().GrammarId(), cursor.Node().KindId()) 33 | 34 | expectedSymbols := []string{"//", "/*", "identifier", "line_comment", "block_comment"} 35 | lookahead := language.LookaheadIterator(nextState) 36 | defer lookahead.Close() 37 | assert.NotNil(t, lookahead) 38 | assert.Equal(t, lookahead.Language(), language) 39 | assert.Equal(t, lookahead.IterNames(), expectedSymbols) 40 | 41 | lookahead.ResetState(nextState) 42 | assert.Equal(t, lookahead.IterNames(), expectedSymbols) 43 | 44 | lookahead.Reset(language, nextState) 45 | var names []string 46 | symbols := lookahead.Iter() 47 | for _, s := range symbols { 48 | names = append(names, language.NodeKindForId(s)) 49 | } 50 | assert.Equal(t, names, expectedSymbols) 51 | } 52 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Go bindings for the Tree-sitter parsing library"; 3 | 4 | inputs = { 5 | nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; 6 | 7 | self.submodules = true; 8 | }; 9 | 10 | outputs = 11 | inputs: 12 | let 13 | inherit (inputs.nixpkgs) lib; 14 | inherit (inputs) self; 15 | systems = [ 16 | "x86_64-linux" 17 | "aarch64-linux" 18 | "x86_64-darwin" 19 | "aarch64-darwin" 20 | ]; 21 | eachSystem = lib.genAttrs systems; 22 | pkgsFor = inputs.nixpkgs.legacyPackages; 23 | in 24 | { 25 | packages = eachSystem ( 26 | system: 27 | let 28 | pkgs = pkgsFor.${system}; 29 | inherit (pkgs) lib; 30 | in 31 | { 32 | default = pkgs.buildGoModule { 33 | pname = "go-tree-sitter"; 34 | version = "0.25.1"; 35 | 36 | src = self; 37 | 38 | vendorHash = "sha256-6rj6oNohxBQt0LhIaHh3fQKHbNCsLsBkuPYNquHEVzE="; 39 | proxyVendor = true; 40 | 41 | subPackages = [ "." ]; 42 | 43 | meta = { 44 | description = "Go bindings for Tree-sitter parsing library"; 45 | homepage = "https://github.com/tree-sitter/go-tree-sitter"; 46 | license = lib.licenses.mit; 47 | maintainers = [ lib.maintainers.amaanq ]; 48 | }; 49 | }; 50 | } 51 | ); 52 | 53 | devShells = eachSystem ( 54 | system: 55 | let 56 | pkgs = pkgsFor.${system}; 57 | in 58 | { 59 | default = pkgs.mkShell { 60 | buildInputs = [ 61 | pkgs.go 62 | pkgs.gopls 63 | ]; 64 | }; 65 | } 66 | ); 67 | 68 | checks = eachSystem (system: { 69 | inherit (self.packages.${system}) default; 70 | }); 71 | }; 72 | } 73 | -------------------------------------------------------------------------------- /src/unicode.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_UNICODE_H_ 2 | #define TREE_SITTER_UNICODE_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | 11 | #define U_EXPORT 12 | #define U_EXPORT2 13 | #include "unicode/utf8.h" 14 | #include "unicode/utf16.h" 15 | #include "portable/endian.h" 16 | 17 | #define U16_NEXT_LE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 18 | (c)=le16toh((s)[(i)++]); \ 19 | if(U16_IS_LEAD(c)) { \ 20 | uint16_t __c2; \ 21 | if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 22 | ++(i); \ 23 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 24 | } \ 25 | } \ 26 | } UPRV_BLOCK_MACRO_END 27 | 28 | #define U16_NEXT_BE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 29 | (c)=be16toh((s)[(i)++]); \ 30 | if(U16_IS_LEAD(c)) { \ 31 | uint16_t __c2; \ 32 | if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 33 | ++(i); \ 34 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 35 | } \ 36 | } \ 37 | } UPRV_BLOCK_MACRO_END 38 | 39 | static const int32_t TS_DECODE_ERROR = U_SENTINEL; 40 | 41 | static inline uint32_t ts_decode_utf8( 42 | const uint8_t *string, 43 | uint32_t length, 44 | int32_t *code_point 45 | ) { 46 | uint32_t i = 0; 47 | U8_NEXT(string, i, length, *code_point); 48 | return i; 49 | } 50 | 51 | static inline uint32_t ts_decode_utf16_le( 52 | const uint8_t *string, 53 | uint32_t length, 54 | int32_t *code_point 55 | ) { 56 | uint32_t i = 0; 57 | U16_NEXT_LE(((uint16_t *)string), i, length, *code_point); 58 | return i * 2; 59 | } 60 | 61 | static inline uint32_t ts_decode_utf16_be( 62 | const uint8_t *string, 63 | uint32_t length, 64 | int32_t *code_point 65 | ) { 66 | uint32_t i = 0; 67 | U16_NEXT_BE(((uint16_t *)string), i, length, *code_point); 68 | return i * 2; 69 | } 70 | 71 | #ifdef __cplusplus 72 | } 73 | #endif 74 | 75 | #endif // TREE_SITTER_UNICODE_H_ 76 | -------------------------------------------------------------------------------- /lookahead_iterator.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | 9 | import ( 10 | "unsafe" 11 | ) 12 | 13 | type LookaheadIterator struct { 14 | _inner *C.TSLookaheadIterator 15 | } 16 | 17 | func newLookaheadIterator(ptr *C.TSLookaheadIterator) *LookaheadIterator { 18 | return &LookaheadIterator{_inner: ptr} 19 | } 20 | 21 | func (l *LookaheadIterator) Close() { 22 | C.ts_lookahead_iterator_delete(l._inner) 23 | } 24 | 25 | func (l *LookaheadIterator) Language() *Language { 26 | return NewLanguage(unsafe.Pointer(C.ts_lookahead_iterator_language(l._inner))) 27 | } 28 | 29 | // Get the current symbol of the lookahead iterator. 30 | func (l *LookaheadIterator) Symbol() uint16 { 31 | return uint16(C.ts_lookahead_iterator_current_symbol(l._inner)) 32 | } 33 | 34 | // Get the current symbol name of the lookahead iterator. 35 | func (l *LookaheadIterator) SymbolName() string { 36 | return C.GoString(C.ts_lookahead_iterator_current_symbol_name(l._inner)) 37 | } 38 | 39 | // Reset the lookahead iterator. 40 | // 41 | // This returns `true` if the language was set successfully and `false` 42 | // otherwise. 43 | func (l *LookaheadIterator) Reset(language *Language, state uint16) bool { 44 | return bool(C.ts_lookahead_iterator_reset(l._inner, language.Inner, C.TSStateId(state))) 45 | } 46 | 47 | // Reset the lookahead iterator to another state. 48 | // 49 | // This returns `true` if the iterator was reset to the given state and 50 | // `false` otherwise. 51 | func (l *LookaheadIterator) ResetState(state uint16) bool { 52 | return bool(C.ts_lookahead_iterator_reset_state(l._inner, C.TSStateId(state))) 53 | } 54 | 55 | // Iterate symbols. 56 | func (l *LookaheadIterator) Iter() []uint16 { 57 | var symbols []uint16 58 | for C.ts_lookahead_iterator_next(l._inner) { 59 | symbols = append(symbols, l.Symbol()) 60 | } 61 | return symbols 62 | } 63 | 64 | // Iterate symbol names. 65 | func (l *LookaheadIterator) IterNames() []string { 66 | var names []string 67 | for C.ts_lookahead_iterator_next(l._inner) { 68 | names = append(names, l.SymbolName()) 69 | } 70 | return names 71 | } 72 | -------------------------------------------------------------------------------- /src/reusable_node.h: -------------------------------------------------------------------------------- 1 | #include "./subtree.h" 2 | 3 | typedef struct { 4 | Subtree tree; 5 | uint32_t child_index; 6 | uint32_t byte_offset; 7 | } StackEntry; 8 | 9 | typedef struct { 10 | Array(StackEntry) stack; 11 | Subtree last_external_token; 12 | } ReusableNode; 13 | 14 | static inline ReusableNode reusable_node_new(void) { 15 | return (ReusableNode) {array_new(), NULL_SUBTREE}; 16 | } 17 | 18 | static inline void reusable_node_clear(ReusableNode *self) { 19 | array_clear(&self->stack); 20 | self->last_external_token = NULL_SUBTREE; 21 | } 22 | 23 | static inline Subtree reusable_node_tree(ReusableNode *self) { 24 | return self->stack.size > 0 25 | ? self->stack.contents[self->stack.size - 1].tree 26 | : NULL_SUBTREE; 27 | } 28 | 29 | static inline uint32_t reusable_node_byte_offset(ReusableNode *self) { 30 | return self->stack.size > 0 31 | ? self->stack.contents[self->stack.size - 1].byte_offset 32 | : UINT32_MAX; 33 | } 34 | 35 | static inline void reusable_node_delete(ReusableNode *self) { 36 | array_delete(&self->stack); 37 | } 38 | 39 | static inline void reusable_node_advance(ReusableNode *self) { 40 | StackEntry last_entry = *array_back(&self->stack); 41 | uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree); 42 | if (ts_subtree_has_external_tokens(last_entry.tree)) { 43 | self->last_external_token = ts_subtree_last_external_token(last_entry.tree); 44 | } 45 | 46 | Subtree tree; 47 | uint32_t next_index; 48 | do { 49 | StackEntry popped_entry = array_pop(&self->stack); 50 | next_index = popped_entry.child_index + 1; 51 | if (self->stack.size == 0) return; 52 | tree = array_back(&self->stack)->tree; 53 | } while (ts_subtree_child_count(tree) <= next_index); 54 | 55 | array_push(&self->stack, ((StackEntry) { 56 | .tree = ts_subtree_children(tree)[next_index], 57 | .child_index = next_index, 58 | .byte_offset = byte_offset, 59 | })); 60 | } 61 | 62 | static inline bool reusable_node_descend(ReusableNode *self) { 63 | StackEntry last_entry = *array_back(&self->stack); 64 | if (ts_subtree_child_count(last_entry.tree) > 0) { 65 | array_push(&self->stack, ((StackEntry) { 66 | .tree = ts_subtree_children(last_entry.tree)[0], 67 | .child_index = 0, 68 | .byte_offset = last_entry.byte_offset, 69 | })); 70 | return true; 71 | } else { 72 | return false; 73 | } 74 | } 75 | 76 | static inline void reusable_node_advance_past_leaf(ReusableNode *self) { 77 | while (reusable_node_descend(self)) {} 78 | reusable_node_advance(self); 79 | } 80 | 81 | static inline void reusable_node_reset(ReusableNode *self, Subtree tree) { 82 | reusable_node_clear(self); 83 | array_push(&self->stack, ((StackEntry) { 84 | .tree = tree, 85 | .child_index = 0, 86 | .byte_offset = 0, 87 | })); 88 | 89 | // Never reuse the root node, because it has a non-standard internal structure 90 | // due to transformations that are applied when it is accepted: adding the EOF 91 | // child and any extra children. 92 | if (!reusable_node_descend(self)) { 93 | reusable_node_clear(self); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go Tree-sitter 2 | 3 | [![CI][ci]](https://github.com/tree-sitter/go-tree-sitter/actions/workflows/ci.yml) 4 | [![Go version][go version]](https://github.com/tree-sitter/go-tree-sitter/blob/master/go.mod) 5 | [![Version][version]](https://github.com/tree-sitter/go-tree-sitter/tags) 6 | [![Docs][docs]](https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter) 7 | 8 | This repository contains Go bindings for the [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) parsing library. 9 | 10 | To use this in your Go project, run: 11 | 12 | ```sh 13 | go get github.com/tree-sitter/go-tree-sitter@latest 14 | ``` 15 | 16 | Example usage: 17 | 18 | ```go 19 | package main 20 | 21 | import ( 22 | "fmt" 23 | 24 | tree_sitter "github.com/tree-sitter/go-tree-sitter" 25 | tree_sitter_javascript "github.com/tree-sitter/tree-sitter-javascript/bindings/go" 26 | ) 27 | 28 | func main() { 29 | code := []byte("const foo = 1 + 2") 30 | 31 | parser := tree_sitter.NewParser() 32 | defer parser.Close() 33 | parser.SetLanguage(tree_sitter.NewLanguage(tree_sitter_javascript.Language())) 34 | 35 | tree := parser.Parse(code, nil) 36 | defer tree.Close() 37 | 38 | root := tree.RootNode() 39 | fmt.Println(root.ToSexp()) 40 | } 41 | ``` 42 | 43 | By default, none of the grammars are included in this package. 44 | This way, you can only bring in what you need, but it's at the slight cost of having to call `go get` n times. 45 | 46 | In the example above, to fetch the JavaScript grammar, you can run the following: 47 | 48 | ```sh 49 | go get github.com/tree-sitter/tree-sitter-javascript@latest 50 | ``` 51 | 52 | Alternatively you can also load grammars at runtime from a shared library via [purego](https://github.com/ebitengine/purego). 53 | 54 | The example below shows how to load the JavaScript grammar from a shared library (`libtree-sitter-PARSER_NAME.so`) at runtime on Linux & macOS: 55 | 56 | For more information on other platforms, see the [purego documentation](https://github.com/ebitengine/purego#supported-platforms) 57 | 58 | ```go 59 | package main 60 | 61 | import ( 62 | tree_sitter "github.com/tree-sitter/go-tree-sitter" 63 | "github.com/ebitengine/purego" 64 | ) 65 | 66 | func main() { 67 | path := "/path/to/your/parser.so" 68 | lib, err := purego.Dlopen(path, purego.RTLD_NOW|purego.RTLD_GLOBAL) 69 | if err != nil { 70 | // handle error 71 | } 72 | 73 | var javascriptLanguage func() uintptr 74 | purego.RegisterLibFunc(&javascriptLanguage, lib, "tree_sitter_javascript") 75 | 76 | language := tree_sitter.NewLanguage(unsafe.Pointer(javascriptLanguage())) 77 | } 78 | ``` 79 | 80 | > [!NOTE] 81 | > Due to [bugs with `runtime.SetFinalizer` and CGO](https://groups.google.com/g/golang-nuts/c/LIWj6Gl--es), you must always call `Close` 82 | > on an object that allocates memory from C. This must be done for the `Parser`, `Tree`, `TreeCursor`, `Query`, `QueryCursor`, and `LookaheadIterator` objects. 83 | 84 | For more information, see the [documentation](https://pkg.go.dev/github.com/tree-sitter/go-tree-sitter). 85 | 86 | [ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/go-tree-sitter/ci.yml?logo=github&label=CI 87 | [go version]: https://img.shields.io/github/go-mod/go-version/tree-sitter/go-tree-sitter 88 | [version]: https://img.shields.io/github/v/tag/tree-sitter/go-tree-sitter?label=version 89 | [docs]: https://pkg.go.dev/badge/github.com/tree-sitter/go-tree-sitter.svg?style=flat-square 90 | -------------------------------------------------------------------------------- /src/wasm/stdlib.c: -------------------------------------------------------------------------------- 1 | // This file implements a very simple allocator for external scanners running 2 | // in WASM. Allocation is just bumping a static pointer and growing the heap 3 | // as needed, and freeing is mostly a noop. But in the special case of freeing 4 | // the last-allocated pointer, we'll reuse that pointer again. 5 | 6 | #ifdef TREE_SITTER_FEATURE_WASM 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | extern void tree_sitter_debug_message(const char *, size_t); 14 | 15 | #define PAGESIZE 0x10000 16 | #define MAX_HEAP_SIZE (4 * 1024 * 1024) 17 | 18 | typedef struct { 19 | size_t size; 20 | char data[0]; 21 | } Region; 22 | 23 | static Region *heap_end = NULL; 24 | static Region *heap_start = NULL; 25 | static Region *next = NULL; 26 | 27 | // Get the region metadata for the given heap pointer. 28 | static inline Region *region_for_ptr(void *ptr) { 29 | return ((Region *)ptr) - 1; 30 | } 31 | 32 | // Get the location of the next region after the given region, 33 | // if the given region had the given size. 34 | static inline Region *region_after(Region *self, size_t len) { 35 | char *address = self->data + len; 36 | char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3); 37 | return (Region *)aligned; 38 | } 39 | 40 | static void *get_heap_end() { 41 | return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE); 42 | } 43 | 44 | static int grow_heap(size_t size) { 45 | size_t new_page_count = ((size - 1) / PAGESIZE) + 1; 46 | return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX; 47 | } 48 | 49 | // Clear out the heap, and move it to the given address. 50 | void reset_heap(void *new_heap_start) { 51 | heap_start = new_heap_start; 52 | next = new_heap_start; 53 | heap_end = get_heap_end(); 54 | } 55 | 56 | void *malloc(size_t size) { 57 | Region *region_end = region_after(next, size); 58 | 59 | if (region_end > heap_end) { 60 | if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) { 61 | return NULL; 62 | } 63 | if (!grow_heap(size)) return NULL; 64 | heap_end = get_heap_end(); 65 | } 66 | 67 | void *result = &next->data; 68 | next->size = size; 69 | next = region_end; 70 | 71 | return result; 72 | } 73 | 74 | void free(void *ptr) { 75 | if (ptr == NULL) return; 76 | 77 | Region *region = region_for_ptr(ptr); 78 | Region *region_end = region_after(region, region->size); 79 | 80 | // When freeing the last allocated pointer, re-use that 81 | // pointer for the next allocation. 82 | if (region_end == next) { 83 | next = region; 84 | } 85 | } 86 | 87 | void *calloc(size_t count, size_t size) { 88 | void *result = malloc(count * size); 89 | memset(result, 0, count * size); 90 | return result; 91 | } 92 | 93 | void *realloc(void *ptr, size_t new_size) { 94 | if (ptr == NULL) { 95 | return malloc(new_size); 96 | } 97 | 98 | Region *region = region_for_ptr(ptr); 99 | Region *region_end = region_after(region, region->size); 100 | 101 | // When reallocating the last allocated region, return 102 | // the same pointer, and skip copying the data. 103 | if (region_end == next) { 104 | next = region; 105 | return malloc(new_size); 106 | } 107 | 108 | void *result = malloc(new_size); 109 | memcpy(result, ®ion->data, region->size); 110 | return result; 111 | } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/mattn/go-pointer v0.0.1 h1:n+XhsuGeVO6MEAp7xyEukFINEa+Quek5psIR/ylA6o0= 4 | github.com/mattn/go-pointer v0.0.1/go.mod h1:2zXcozF6qYGgmsG+SeTZz3oAbFLdD3OWqnUbNvJZAlc= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 8 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 9 | github.com/tree-sitter/tree-sitter-c v0.23.4 h1:nBPH3FV07DzAD7p0GfNvXM+Y7pNIoPenQWBpvM++t4c= 10 | github.com/tree-sitter/tree-sitter-c v0.23.4/go.mod h1:MkI5dOiIpeN94LNjeCp8ljXN/953JCwAby4bClMr6bw= 11 | github.com/tree-sitter/tree-sitter-cpp v0.23.4 h1:LaWZsiqQKvR65yHgKmnaqA+uz6tlDJTJFCyFIeZU/8w= 12 | github.com/tree-sitter/tree-sitter-cpp v0.23.4/go.mod h1:doqNW64BriC7WBCQ1klf0KmJpdEvfxyXtoEybnBo6v8= 13 | github.com/tree-sitter/tree-sitter-embedded-template v0.23.2 h1:nFkkH6Sbe56EXLmZBqHHcamTpmz3TId97I16EnGy4rg= 14 | github.com/tree-sitter/tree-sitter-embedded-template v0.23.2/go.mod h1:HNPOhN0qF3hWluYLdxWs5WbzP/iE4aaRVPMsdxuzIaQ= 15 | github.com/tree-sitter/tree-sitter-go v0.23.4 h1:yt5KMGnTHS+86pJmLIAZMWxukr8W7Ae1STPvQUuNROA= 16 | github.com/tree-sitter/tree-sitter-go v0.23.4/go.mod h1:Jrx8QqYN0v7npv1fJRH1AznddllYiCMUChtVjxPK040= 17 | github.com/tree-sitter/tree-sitter-html v0.23.2 h1:1UYDV+Yd05GGRhVnTcbP58GkKLSHHZwVaN+lBZV11Lc= 18 | github.com/tree-sitter/tree-sitter-html v0.23.2/go.mod h1:gpUv/dG3Xl/eebqgeYeFMt+JLOY9cgFinb/Nw08a9og= 19 | github.com/tree-sitter/tree-sitter-java v0.23.5 h1:J9YeMGMwXYlKSP3K4Us8CitC6hjtMjqpeOf2GGo6tig= 20 | github.com/tree-sitter/tree-sitter-java v0.23.5/go.mod h1:NRKlI8+EznxA7t1Yt3xtraPk1Wzqh3GAIC46wxvc320= 21 | github.com/tree-sitter/tree-sitter-javascript v0.23.1 h1:1fWupaRC0ArlHJ/QJzsfQ3Ibyopw7ZfQK4xXc40Zveo= 22 | github.com/tree-sitter/tree-sitter-javascript v0.23.1/go.mod h1:lmGD1EJdCA+v0S1u2fFgepMg/opzSg/4pgFym2FPGAs= 23 | github.com/tree-sitter/tree-sitter-json v0.24.8 h1:tV5rMkihgtiOe14a9LHfDY5kzTl5GNUYe6carZBn0fQ= 24 | github.com/tree-sitter/tree-sitter-json v0.24.8/go.mod h1:F351KK0KGvCaYbZ5zxwx/gWWvZhIDl0eMtn+1r+gQbo= 25 | github.com/tree-sitter/tree-sitter-php v0.23.11 h1:iHewsLNDmznh8kgGyfWfujsZxIz1YGbSd2ZTEM0ZiP8= 26 | github.com/tree-sitter/tree-sitter-php v0.23.11/go.mod h1:T/kbfi+UcCywQfUNAJnGTN/fMSUjnwPXA8k4yoIks74= 27 | github.com/tree-sitter/tree-sitter-python v0.23.6 h1:qHnWFR5WhtMQpxBZRwiaU5Hk/29vGju6CVtmvu5Haas= 28 | github.com/tree-sitter/tree-sitter-python v0.23.6/go.mod h1:cpdthSy/Yoa28aJFBscFHlGiU+cnSiSh1kuDVtI8YeM= 29 | github.com/tree-sitter/tree-sitter-ruby v0.23.1 h1:T/NKHUA+iVbHM440hFx+lzVOzS4dV6z8Qw8ai+72bYo= 30 | github.com/tree-sitter/tree-sitter-ruby v0.23.1/go.mod h1:kUS4kCCQloFcdX6sdpr8p6r2rogbM6ZjTox5ZOQy8cA= 31 | github.com/tree-sitter/tree-sitter-rust v0.23.2 h1:6AtoooCW5GqNrRpfnvl0iUhxTAZEovEmLKDbyHlfw90= 32 | github.com/tree-sitter/tree-sitter-rust v0.23.2/go.mod h1:hfeGWic9BAfgTrc7Xf6FaOAguCFJRo3RBbs7QJ6D7MI= 33 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 34 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 35 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 36 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 37 | -------------------------------------------------------------------------------- /allocator.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | #include "allocator.h" 7 | */ 8 | import "C" 9 | 10 | import ( 11 | "sync/atomic" 12 | "unsafe" 13 | ) 14 | 15 | var ( 16 | malloc_fn atomic.Value 17 | calloc_fn atomic.Value 18 | realloc_fn atomic.Value 19 | free_fn atomic.Value 20 | ) 21 | 22 | func init() { 23 | malloc_fn.Store((func(C.size_t) unsafe.Pointer)(nil)) 24 | calloc_fn.Store((func(C.size_t, C.size_t) unsafe.Pointer)(nil)) 25 | realloc_fn.Store((func(unsafe.Pointer, C.size_t) unsafe.Pointer)(nil)) 26 | free_fn.Store((func(unsafe.Pointer))(nil)) 27 | } 28 | 29 | //export go_malloc 30 | func go_malloc(size C.size_t) unsafe.Pointer { 31 | if fn := malloc_fn.Load().(func(C.size_t) unsafe.Pointer); fn != nil { 32 | return fn(size) 33 | } 34 | return C.malloc(size) 35 | } 36 | 37 | //export go_calloc 38 | func go_calloc(num, size C.size_t) unsafe.Pointer { 39 | if fn := calloc_fn.Load().(func(C.size_t, C.size_t) unsafe.Pointer); fn != nil { 40 | return fn(num, size) 41 | } 42 | return C.calloc(num, size) 43 | } 44 | 45 | //export go_realloc 46 | func go_realloc(ptr unsafe.Pointer, size C.size_t) unsafe.Pointer { 47 | if fn := realloc_fn.Load().(func(unsafe.Pointer, C.size_t) unsafe.Pointer); fn != nil { 48 | return fn(ptr, size) 49 | } 50 | return C.realloc(ptr, size) 51 | } 52 | 53 | //export go_free 54 | func go_free(ptr unsafe.Pointer) { 55 | if fn := free_fn.Load().(func(unsafe.Pointer)); fn != nil { 56 | fn(ptr) 57 | return 58 | } 59 | C.free(ptr) 60 | } 61 | 62 | // Sets the memory allocation functions that the core library should use. 63 | func SetAllocator( 64 | newMalloc func(size uint) unsafe.Pointer, 65 | newCalloc func(num, size uint) unsafe.Pointer, 66 | newRealloc func(ptr unsafe.Pointer, size uint) unsafe.Pointer, 67 | newFree func(ptr unsafe.Pointer), 68 | ) { 69 | if newMalloc == nil && newCalloc == nil && newRealloc == nil && newFree == nil { 70 | malloc_fn.Store((func(C.size_t) unsafe.Pointer)(nil)) 71 | calloc_fn.Store((func(C.size_t, C.size_t) unsafe.Pointer)(nil)) 72 | realloc_fn.Store((func(unsafe.Pointer, C.size_t) unsafe.Pointer)(nil)) 73 | free_fn.Store((func(unsafe.Pointer))(nil)) 74 | 75 | C.ts_set_allocator(nil, nil, nil, nil) 76 | return 77 | } 78 | 79 | if newMalloc != nil { 80 | malloc_fn.Store(func(size C.size_t) unsafe.Pointer { 81 | return newMalloc(uint(size)) 82 | }) 83 | } else { 84 | malloc_fn.Store(func(size C.size_t) unsafe.Pointer { 85 | return C.malloc(size) 86 | }) 87 | } 88 | 89 | if newCalloc != nil { 90 | calloc_fn.Store(func(num, size C.size_t) unsafe.Pointer { 91 | return newCalloc(uint(num), uint(size)) 92 | }) 93 | } else { 94 | calloc_fn.Store(func(num, size C.size_t) unsafe.Pointer { 95 | return C.calloc(num, size) 96 | }) 97 | } 98 | 99 | if newRealloc != nil { 100 | realloc_fn.Store(func(ptr unsafe.Pointer, size C.size_t) unsafe.Pointer { 101 | return newRealloc(ptr, uint(size)) 102 | }) 103 | } else { 104 | realloc_fn.Store(func(ptr unsafe.Pointer, size C.size_t) unsafe.Pointer { 105 | return C.realloc(ptr, size) 106 | }) 107 | } 108 | 109 | if newFree != nil { 110 | free_fn.Store(func(ptr unsafe.Pointer) { 111 | newFree(ptr) 112 | }) 113 | } else { 114 | free_fn.Store(func(ptr unsafe.Pointer) { 115 | C.free(ptr) 116 | }) 117 | } 118 | 119 | var cMalloc, cCalloc, cRealloc, cFree unsafe.Pointer 120 | if newMalloc != nil { 121 | cMalloc = unsafe.Pointer(C.c_malloc_fn) 122 | } 123 | if newCalloc != nil { 124 | cCalloc = unsafe.Pointer(C.c_calloc_fn) 125 | } 126 | if newRealloc != nil { 127 | cRealloc = unsafe.Pointer(C.c_realloc_fn) 128 | } 129 | if newFree != nil { 130 | cFree = unsafe.Pointer(C.c_free_fn) 131 | } 132 | 133 | C.ts_set_allocator( 134 | (*[0]byte)(cMalloc), 135 | (*[0]byte)(cCalloc), 136 | (*[0]byte)(cRealloc), 137 | (*[0]byte)(cFree), 138 | ) 139 | } 140 | -------------------------------------------------------------------------------- /src/clock.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_CLOCK_H_ 2 | #define TREE_SITTER_CLOCK_H_ 3 | 4 | #include 5 | #include 6 | 7 | typedef uint64_t TSDuration; 8 | 9 | #ifdef _WIN32 10 | 11 | // Windows: 12 | // * Represent a time as a performance counter value. 13 | // * Represent a duration as a number of performance counter ticks. 14 | 15 | #include 16 | typedef uint64_t TSClock; 17 | 18 | static inline TSDuration duration_from_micros(uint64_t micros) { 19 | LARGE_INTEGER frequency; 20 | QueryPerformanceFrequency(&frequency); 21 | return micros * (uint64_t)frequency.QuadPart / 1000000; 22 | } 23 | 24 | static inline uint64_t duration_to_micros(TSDuration self) { 25 | LARGE_INTEGER frequency; 26 | QueryPerformanceFrequency(&frequency); 27 | return self * 1000000 / (uint64_t)frequency.QuadPart; 28 | } 29 | 30 | static inline TSClock clock_null(void) { 31 | return 0; 32 | } 33 | 34 | static inline TSClock clock_now(void) { 35 | LARGE_INTEGER result; 36 | QueryPerformanceCounter(&result); 37 | return (uint64_t)result.QuadPart; 38 | } 39 | 40 | static inline TSClock clock_after(TSClock base, TSDuration duration) { 41 | return base + duration; 42 | } 43 | 44 | static inline bool clock_is_null(TSClock self) { 45 | return !self; 46 | } 47 | 48 | static inline bool clock_is_gt(TSClock self, TSClock other) { 49 | return self > other; 50 | } 51 | 52 | #elif defined(CLOCK_MONOTONIC) 53 | 54 | // POSIX with monotonic clock support (Linux, macOS) 55 | // * Represent a time as a monotonic (seconds, nanoseconds) pair. 56 | // * Represent a duration as a number of microseconds. 57 | // 58 | // On these platforms, parse timeouts will correspond accurately to 59 | // real time, regardless of what other processes are running. 60 | 61 | #include 62 | typedef struct timespec TSClock; 63 | 64 | static inline TSDuration duration_from_micros(uint64_t micros) { 65 | return micros; 66 | } 67 | 68 | static inline uint64_t duration_to_micros(TSDuration self) { 69 | return self; 70 | } 71 | 72 | static inline TSClock clock_now(void) { 73 | TSClock result; 74 | clock_gettime(CLOCK_MONOTONIC, &result); 75 | return result; 76 | } 77 | 78 | static inline TSClock clock_null(void) { 79 | return (TSClock) {0, 0}; 80 | } 81 | 82 | static inline TSClock clock_after(TSClock base, TSDuration duration) { 83 | TSClock result = base; 84 | result.tv_sec += duration / 1000000; 85 | result.tv_nsec += (duration % 1000000) * 1000; 86 | if (result.tv_nsec >= 1000000000) { 87 | result.tv_nsec -= 1000000000; 88 | ++(result.tv_sec); 89 | } 90 | return result; 91 | } 92 | 93 | static inline bool clock_is_null(TSClock self) { 94 | return !self.tv_sec && !self.tv_nsec; 95 | } 96 | 97 | static inline bool clock_is_gt(TSClock self, TSClock other) { 98 | if (self.tv_sec > other.tv_sec) return true; 99 | if (self.tv_sec < other.tv_sec) return false; 100 | return self.tv_nsec > other.tv_nsec; 101 | } 102 | 103 | #else 104 | 105 | // POSIX without monotonic clock support 106 | // * Represent a time as a process clock value. 107 | // * Represent a duration as a number of process clock ticks. 108 | // 109 | // On these platforms, parse timeouts may be affected by other processes, 110 | // which is not ideal, but is better than using a non-monotonic time API 111 | // like `gettimeofday`. 112 | 113 | #include 114 | typedef uint64_t TSClock; 115 | 116 | static inline TSDuration duration_from_micros(uint64_t micros) { 117 | return micros * (uint64_t)CLOCKS_PER_SEC / 1000000; 118 | } 119 | 120 | static inline uint64_t duration_to_micros(TSDuration self) { 121 | return self * 1000000 / (uint64_t)CLOCKS_PER_SEC; 122 | } 123 | 124 | static inline TSClock clock_null(void) { 125 | return 0; 126 | } 127 | 128 | static inline TSClock clock_now(void) { 129 | return (uint64_t)clock(); 130 | } 131 | 132 | static inline TSClock clock_after(TSClock base, TSDuration duration) { 133 | return base + duration; 134 | } 135 | 136 | static inline bool clock_is_null(TSClock self) { 137 | return !self; 138 | } 139 | 140 | static inline bool clock_is_gt(TSClock self, TSClock other) { 141 | return self > other; 142 | } 143 | 144 | #endif 145 | 146 | #endif // TREE_SITTER_CLOCK_H_ 147 | -------------------------------------------------------------------------------- /edit_test.go: -------------------------------------------------------------------------------- 1 | package tree_sitter_test 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | 7 | . "github.com/tree-sitter/go-tree-sitter" 8 | ) 9 | 10 | type testEdit struct { 11 | insertedText []byte 12 | position uint 13 | deletedLength uint 14 | } 15 | 16 | func performEdit(tree *Tree, input *[]byte, edit *testEdit) (InputEdit, error) { 17 | startByte := edit.position 18 | oldEndByte := edit.position + edit.deletedLength 19 | newEndByte := edit.position + uint(len(edit.insertedText)) 20 | 21 | startPosition, err := positionForOffset(*input, startByte) 22 | if err != nil { 23 | return InputEdit{}, err 24 | } 25 | 26 | oldEndPosition, err := positionForOffset(*input, oldEndByte) 27 | if err != nil { 28 | return InputEdit{}, err 29 | } 30 | 31 | newInput := make([]byte, 0, len(*input)-int(edit.deletedLength)+len(edit.insertedText)) 32 | newInput = append(newInput, (*input)[:startByte]...) 33 | newInput = append(newInput, edit.insertedText...) 34 | newInput = append(newInput, (*input)[oldEndByte:]...) 35 | *input = newInput 36 | 37 | newEndPosition, err := positionForOffset(*input, newEndByte) 38 | if err != nil { 39 | return InputEdit{}, err 40 | } 41 | 42 | inputEdit := InputEdit{ 43 | StartByte: startByte, 44 | OldEndByte: oldEndByte, 45 | NewEndByte: newEndByte, 46 | StartPosition: startPosition, 47 | OldEndPosition: oldEndPosition, 48 | NewEndPosition: newEndPosition, 49 | } 50 | tree.Edit(&inputEdit) 51 | return inputEdit, nil 52 | } 53 | 54 | func positionForOffset(input []byte, offset uint) (Point, error) { 55 | if offset > uint(len(input)) { 56 | return Point{}, fmt.Errorf("failed to address an offset: %d", offset) 57 | } 58 | 59 | var result Point 60 | var last uint 61 | 62 | for i := uint(0); i < offset; i++ { 63 | if input[i] == '\n' { 64 | result.Row++ 65 | last = i 66 | } 67 | } 68 | 69 | if result.Row > 0 { 70 | result.Column = uint(offset - last - 1) 71 | } else { 72 | result.Column = uint(offset) 73 | } 74 | 75 | return result, nil 76 | } 77 | 78 | func invertEdit(input []byte, edit *testEdit) *testEdit { 79 | position := edit.position 80 | removedContent := input[position : position+edit.deletedLength] 81 | return &testEdit{ 82 | position: position, 83 | deletedLength: uint(len(edit.insertedText)), 84 | insertedText: removedContent, 85 | } 86 | } 87 | 88 | func getRandomEdit(rand *rand.Rand, input []byte) testEdit { 89 | choice := rand.Intn(10) 90 | if choice < 2 { 91 | // Insert text at end 92 | insertedText := randWords(rand, 3) 93 | return testEdit{ 94 | position: uint(len(input)), 95 | deletedLength: 0, 96 | insertedText: insertedText, 97 | } 98 | } else if choice < 5 { 99 | // Delete text from the end 100 | deletedLength := uint(rand.Intn(30)) 101 | if deletedLength > uint(len(input)) { 102 | deletedLength = uint(len(input)) 103 | } 104 | return testEdit{ 105 | position: uint(len(input)) - deletedLength, 106 | deletedLength: deletedLength, 107 | insertedText: []byte{}, 108 | } 109 | } else if choice < 8 { 110 | // Insert at a random position 111 | position := uint(rand.Intn(len(input))) 112 | wordCount := 1 + rand.Intn(3) 113 | insertedText := randWords(rand, wordCount) 114 | return testEdit{ 115 | position: position, 116 | deletedLength: 0, 117 | insertedText: insertedText, 118 | } 119 | } else { 120 | // Replace at random position 121 | position := uint(rand.Intn(len(input))) 122 | deletedLength := uint(rand.Intn(len(input) - int(position))) 123 | wordCount := 1 + rand.Intn(3) 124 | insertedText := randWords(rand, wordCount) 125 | return testEdit{ 126 | position: position, 127 | deletedLength: deletedLength, 128 | insertedText: insertedText, 129 | } 130 | } 131 | } 132 | 133 | var operators = []byte{'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%'} 134 | 135 | func randWords(rand *rand.Rand, maxCount int) []byte { 136 | var result []byte 137 | wordCount := rand.Intn(maxCount) 138 | for i := 0; i < wordCount; i++ { 139 | if i > 0 { 140 | if rand.Intn(5) == 0 { 141 | result = append(result, '\n') 142 | } else { 143 | result = append(result, ' ') 144 | } 145 | } 146 | if rand.Intn(3) == 0 { 147 | index := rand.Intn(len(operators)) 148 | result = append(result, operators[index]) 149 | } else { 150 | for j := 0; j < rand.Intn(8); j++ { 151 | result = append(result, byte(rand.Intn(26)+'a')) 152 | } 153 | } 154 | } 155 | return result 156 | } 157 | -------------------------------------------------------------------------------- /tree.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | 9 | import ( 10 | "unsafe" 11 | ) 12 | 13 | // A stateful object that this is used to produce a [Tree] based on some 14 | // source code. 15 | type Tree struct { 16 | _inner *C.TSTree 17 | } 18 | 19 | // Create a new tree from a raw pointer. 20 | func newTree(inner *C.TSTree) *Tree { 21 | return &Tree{_inner: inner} 22 | } 23 | 24 | // Get the root node of the syntax tree. 25 | func (t *Tree) RootNode() *Node { 26 | return &Node{_inner: C.ts_tree_root_node(t._inner)} 27 | } 28 | 29 | // Get the root node of the syntax tree, but with its position shifted 30 | // forward by the given offset. 31 | func (t *Tree) RootNodeWithOffset(offsetBytes int, offsetExtent Point) *Node { 32 | return &Node{_inner: C.ts_tree_root_node_with_offset(t._inner, C.uint(offsetBytes), offsetExtent.toTSPoint())} 33 | } 34 | 35 | // Get the language that was used to parse the syntax tree. 36 | func (t *Tree) Language() *Language { 37 | return &Language{Inner: C.ts_tree_language(t._inner)} 38 | } 39 | 40 | // Edit the syntax tree to keep it in sync with source code that has been 41 | // edited. 42 | // 43 | // You must describe the edit both in terms of byte offsets and in terms of 44 | // row/column coordinates. 45 | func (t *Tree) Edit(edit *InputEdit) { 46 | C.ts_tree_edit(t._inner, edit.toTSInputEdit()) 47 | } 48 | 49 | // Create a new [TreeCursor] starting from the root of the tree. 50 | func (t *Tree) Walk() *TreeCursor { 51 | return t.RootNode().Walk() 52 | } 53 | 54 | // Compare this old edited syntax tree to a new syntax tree representing 55 | // the same document, returning a sequence of ranges whose syntactic 56 | // structure has changed. 57 | // 58 | // For this to work correctly, this syntax tree must have been edited such 59 | // that its ranges match up to the new tree. Generally, you'll want to 60 | // call this method right after calling one of the [Parser.parse] 61 | // functions. Call it on the old tree that was passed to parse, and 62 | // pass the new tree that was returned from `parse`. 63 | // 64 | // The returned ranges indicate areas where the hierarchical structure of syntax 65 | // nodes (from root to leaf) has changed between the old and new trees. Characters 66 | // outside these ranges have identical ancestor nodes in both trees. 67 | // 68 | // Note that the returned ranges may be slightly larger than the exact changed areas, 69 | // but Tree-sitter attempts to make them as small as possible. 70 | func (t *Tree) ChangedRanges(other *Tree) []Range { 71 | var count C.uint 72 | ptr := C.ts_tree_get_changed_ranges(t._inner, other._inner, &count) 73 | ranges := make([]Range, int(count)) 74 | for i := uintptr(0); i < uintptr(count); i++ { 75 | val := *(*C.TSRange)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + i*unsafe.Sizeof(*ptr))) 76 | ranges[i] = Range{ 77 | StartPoint: Point{Row: uint(val.start_point.row), Column: uint(val.start_point.column)}, 78 | EndPoint: Point{Row: uint(val.end_point.row), Column: uint(val.end_point.column)}, 79 | StartByte: uint(val.start_byte), 80 | EndByte: uint(val.end_byte), 81 | } 82 | } 83 | go_free(unsafe.Pointer(ptr)) 84 | return ranges 85 | } 86 | 87 | // Get the included ranges that were used to parse the syntax tree. 88 | func (t *Tree) IncludedRanges() []Range { 89 | var count C.uint 90 | ptr := C.ts_tree_included_ranges(t._inner, &count) 91 | ranges := make([]Range, int(count)) 92 | for i := uintptr(0); i < uintptr(count); i++ { 93 | val := *(*C.TSRange)(unsafe.Pointer(uintptr(unsafe.Pointer(ptr)) + i*unsafe.Sizeof(*ptr))) 94 | ranges[i] = Range{ 95 | StartPoint: Point{Row: uint(val.start_point.row), Column: uint(val.start_point.column)}, 96 | EndPoint: Point{Row: uint(val.end_point.row), Column: uint(val.end_point.column)}, 97 | StartByte: uint(val.start_byte), 98 | EndByte: uint(val.end_byte), 99 | } 100 | } 101 | go_free(unsafe.Pointer(ptr)) 102 | return ranges 103 | } 104 | 105 | // Print a graph of the tree to the given file descriptor. 106 | // The graph is formatted in the DOT language. You may want to pipe this 107 | // graph directly to a `dot(1)` process in order to generate SVG 108 | // output. 109 | func (t *Tree) PrintDotGraph(file int) { 110 | C.ts_tree_print_dot_graph(t._inner, C.int(file)) 111 | } 112 | 113 | func (t *Tree) Close() { 114 | if t != nil { 115 | C.ts_tree_delete(t._inner) 116 | } 117 | } 118 | 119 | func (t *Tree) Clone() *Tree { 120 | return newTree(C.ts_tree_copy(t._inner)) 121 | } 122 | -------------------------------------------------------------------------------- /src/stack.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_PARSE_STACK_H_ 2 | #define TREE_SITTER_PARSE_STACK_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./array.h" 9 | #include "./subtree.h" 10 | #include 11 | 12 | typedef struct Stack Stack; 13 | 14 | typedef unsigned StackVersion; 15 | #define STACK_VERSION_NONE ((StackVersion)-1) 16 | 17 | typedef struct { 18 | SubtreeArray subtrees; 19 | StackVersion version; 20 | } StackSlice; 21 | typedef Array(StackSlice) StackSliceArray; 22 | 23 | typedef struct { 24 | Length position; 25 | unsigned depth; 26 | TSStateId state; 27 | } StackSummaryEntry; 28 | typedef Array(StackSummaryEntry) StackSummary; 29 | 30 | // Create a stack. 31 | Stack *ts_stack_new(SubtreePool *subtree_pool); 32 | 33 | // Release the memory reserved for a given stack. 34 | void ts_stack_delete(Stack *self); 35 | 36 | // Get the stack's current number of versions. 37 | uint32_t ts_stack_version_count(const Stack *self); 38 | 39 | // Get the state at the top of the given version of the stack. If the stack is 40 | // empty, this returns the initial state, 0. 41 | TSStateId ts_stack_state(const Stack *self, StackVersion version); 42 | 43 | // Get the last external token associated with a given version of the stack. 44 | Subtree ts_stack_last_external_token(const Stack *self, StackVersion version); 45 | 46 | // Set the last external token associated with a given version of the stack. 47 | void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token); 48 | 49 | // Get the position of the given version of the stack within the document. 50 | Length ts_stack_position(const Stack *, StackVersion); 51 | 52 | // Push a tree and state onto the given version of the stack. 53 | // 54 | // This transfers ownership of the tree to the Stack. Callers that 55 | // need to retain ownership of the tree for their own purposes should 56 | // first retain the tree. 57 | void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state); 58 | 59 | // Pop the given number of entries from the given version of the stack. This 60 | // operation can increase the number of stack versions by revealing multiple 61 | // versions which had previously been merged. It returns an array that 62 | // specifies the index of each revealed version and the trees that were 63 | // removed from that version. 64 | StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count); 65 | 66 | // Remove an error at the top of the given version of the stack. 67 | SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version); 68 | 69 | // Remove any pending trees from the top of the given version of the stack. 70 | StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version); 71 | 72 | // Remove all trees from the given version of the stack. 73 | StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version); 74 | 75 | // Get the maximum number of tree nodes reachable from this version of the stack 76 | // since the last error was detected. 77 | unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version); 78 | 79 | int ts_stack_dynamic_precedence(Stack *self, StackVersion version); 80 | 81 | bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version); 82 | 83 | // Compute a summary of all the parse states near the top of the given 84 | // version of the stack and store the summary for later retrieval. 85 | void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth); 86 | 87 | // Retrieve a summary of all the parse states near the top of the 88 | // given version of the stack. 89 | StackSummary *ts_stack_get_summary(Stack *self, StackVersion version); 90 | 91 | // Get the total cost of all errors on the given version of the stack. 92 | unsigned ts_stack_error_cost(const Stack *self, StackVersion version); 93 | 94 | // Merge the given two stack versions if possible, returning true 95 | // if they were successfully merged and false otherwise. 96 | bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2); 97 | 98 | // Determine whether the given two stack versions can be merged. 99 | bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2); 100 | 101 | Subtree ts_stack_resume(Stack *self, StackVersion version); 102 | 103 | void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead); 104 | 105 | void ts_stack_halt(Stack *self, StackVersion version); 106 | 107 | bool ts_stack_is_active(const Stack *self, StackVersion version); 108 | 109 | bool ts_stack_is_paused(const Stack *self, StackVersion version); 110 | 111 | bool ts_stack_is_halted(const Stack *self, StackVersion version); 112 | 113 | void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2); 114 | 115 | void ts_stack_swap_versions(Stack *, StackVersion v1, StackVersion v2); 116 | 117 | StackVersion ts_stack_copy_version(Stack *self, StackVersion version); 118 | 119 | // Remove the given version from the stack. 120 | void ts_stack_remove_version(Stack *self, StackVersion version); 121 | 122 | void ts_stack_clear(Stack *self); 123 | 124 | bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f); 125 | 126 | #ifdef __cplusplus 127 | } 128 | #endif 129 | 130 | #endif // TREE_SITTER_PARSE_STACK_H_ 131 | -------------------------------------------------------------------------------- /src/tree.c: -------------------------------------------------------------------------------- 1 | #include "tree_sitter/api.h" 2 | #include "./array.h" 3 | #include "./get_changed_ranges.h" 4 | #include "./length.h" 5 | #include "./subtree.h" 6 | #include "./tree_cursor.h" 7 | #include "./tree.h" 8 | 9 | TSTree *ts_tree_new( 10 | Subtree root, const TSLanguage *language, 11 | const TSRange *included_ranges, unsigned included_range_count 12 | ) { 13 | TSTree *result = ts_malloc(sizeof(TSTree)); 14 | result->root = root; 15 | result->language = ts_language_copy(language); 16 | result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); 17 | memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); 18 | result->included_range_count = included_range_count; 19 | return result; 20 | } 21 | 22 | TSTree *ts_tree_copy(const TSTree *self) { 23 | ts_subtree_retain(self->root); 24 | return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count); 25 | } 26 | 27 | void ts_tree_delete(TSTree *self) { 28 | if (!self) return; 29 | 30 | SubtreePool pool = ts_subtree_pool_new(0); 31 | ts_subtree_release(&pool, self->root); 32 | ts_subtree_pool_delete(&pool); 33 | ts_language_delete(self->language); 34 | ts_free(self->included_ranges); 35 | ts_free(self); 36 | } 37 | 38 | TSNode ts_tree_root_node(const TSTree *self) { 39 | return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0); 40 | } 41 | 42 | TSNode ts_tree_root_node_with_offset( 43 | const TSTree *self, 44 | uint32_t offset_bytes, 45 | TSPoint offset_extent 46 | ) { 47 | Length offset = {offset_bytes, offset_extent}; 48 | return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0); 49 | } 50 | 51 | const TSLanguage *ts_tree_language(const TSTree *self) { 52 | return self->language; 53 | } 54 | 55 | void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { 56 | for (unsigned i = 0; i < self->included_range_count; i++) { 57 | TSRange *range = &self->included_ranges[i]; 58 | if (range->end_byte >= edit->old_end_byte) { 59 | if (range->end_byte != UINT32_MAX) { 60 | range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte); 61 | range->end_point = point_add( 62 | edit->new_end_point, 63 | point_sub(range->end_point, edit->old_end_point) 64 | ); 65 | if (range->end_byte < edit->new_end_byte) { 66 | range->end_byte = UINT32_MAX; 67 | range->end_point = POINT_MAX; 68 | } 69 | } 70 | } else if (range->end_byte > edit->start_byte) { 71 | range->end_byte = edit->start_byte; 72 | range->end_point = edit->start_point; 73 | } 74 | if (range->start_byte >= edit->old_end_byte) { 75 | range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte); 76 | range->start_point = point_add( 77 | edit->new_end_point, 78 | point_sub(range->start_point, edit->old_end_point) 79 | ); 80 | if (range->start_byte < edit->new_end_byte) { 81 | range->start_byte = UINT32_MAX; 82 | range->start_point = POINT_MAX; 83 | } 84 | } else if (range->start_byte > edit->start_byte) { 85 | range->start_byte = edit->start_byte; 86 | range->start_point = edit->start_point; 87 | } 88 | } 89 | 90 | SubtreePool pool = ts_subtree_pool_new(0); 91 | self->root = ts_subtree_edit(self->root, edit, &pool); 92 | ts_subtree_pool_delete(&pool); 93 | } 94 | 95 | TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) { 96 | *length = self->included_range_count; 97 | TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange)); 98 | memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange)); 99 | return ranges; 100 | } 101 | 102 | TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) { 103 | TreeCursor cursor1 = {NULL, array_new(), 0}; 104 | TreeCursor cursor2 = {NULL, array_new(), 0}; 105 | ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree)); 106 | ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree)); 107 | 108 | TSRangeArray included_range_differences = array_new(); 109 | ts_range_array_get_changed_ranges( 110 | old_tree->included_ranges, old_tree->included_range_count, 111 | new_tree->included_ranges, new_tree->included_range_count, 112 | &included_range_differences 113 | ); 114 | 115 | TSRange *result; 116 | *length = ts_subtree_get_changed_ranges( 117 | &old_tree->root, &new_tree->root, &cursor1, &cursor2, 118 | old_tree->language, &included_range_differences, &result 119 | ); 120 | 121 | array_delete(&included_range_differences); 122 | array_delete(&cursor1.stack); 123 | array_delete(&cursor2.stack); 124 | return result; 125 | } 126 | 127 | #ifdef _WIN32 128 | 129 | #include 130 | #include 131 | 132 | int _ts_dup(HANDLE handle) { 133 | HANDLE dup_handle; 134 | if (!DuplicateHandle( 135 | GetCurrentProcess(), handle, 136 | GetCurrentProcess(), &dup_handle, 137 | 0, FALSE, DUPLICATE_SAME_ACCESS 138 | )) return -1; 139 | 140 | return _open_osfhandle((intptr_t)dup_handle, 0); 141 | } 142 | 143 | void ts_tree_print_dot_graph(const TSTree *self, int fd) { 144 | FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a"); 145 | ts_subtree_print_dot_graph(self->root, self->language, file); 146 | fclose(file); 147 | } 148 | 149 | #elif !defined(__wasi__) // WASI doesn't support dup 150 | 151 | #include 152 | 153 | int _ts_dup(int file_descriptor) { 154 | return dup(file_descriptor); 155 | } 156 | 157 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { 158 | FILE *file = fdopen(_ts_dup(file_descriptor), "a"); 159 | ts_subtree_print_dot_graph(self->root, self->language, file); 160 | fclose(file); 161 | } 162 | 163 | #else 164 | 165 | void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) { 166 | (void)self; 167 | (void)file_descriptor; 168 | } 169 | 170 | #endif 171 | -------------------------------------------------------------------------------- /language.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | 9 | import ( 10 | "fmt" 11 | "unsafe" 12 | ) 13 | 14 | const LANGUAGE_VERSION = C.TREE_SITTER_LANGUAGE_VERSION 15 | 16 | const MIN_COMPATIBLE_LANGUAGE_VERSION = C.TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 17 | 18 | // An opaque object that defines how to parse a particular language. The code 19 | // for each [Language] is generated by the Tree-sitter CLI. 20 | type Language struct { 21 | Inner *C.TSLanguage 22 | } 23 | 24 | // An error that occurred when trying to assign an incompatible [TSLanguage] to 25 | // a [TSParser]. 26 | type LanguageError struct { 27 | version uint32 28 | } 29 | 30 | // The metadata associated with a language. 31 | // 32 | // Currently, this metadata can be used to check the [Semantic Version](https://semver.org/) 33 | // of the language. This version information should be used to signal if a given parser might 34 | // be incompatible with existing queries when upgrading between major versions, or minor versions 35 | // if it's in zerover. 36 | type LanguageMetadata struct { 37 | MajorVersion uint8 38 | MinorVersion uint8 39 | PatchVersion uint8 40 | } 41 | 42 | func NewLanguage(ptr unsafe.Pointer) *Language { 43 | return &Language{Inner: (*C.TSLanguage)(ptr)} 44 | } 45 | 46 | // Deprecated: Use [Language.AbiVersion] instead. 47 | // 48 | // Get the ABI version number that indicates which version of the 49 | // Tree-sitter CLI that was used to generate this [Language]. 50 | func (l *Language) Version() uint32 { 51 | return uint32(C.ts_language_version(l.Inner)) 52 | } 53 | 54 | // Get the ABI version number that indicates which version of the 55 | // Tree-sitter CLI that was used to generate this [Language]. 56 | func (l *Language) AbiVersion() uint32 { 57 | return uint32(C.ts_language_abi_version(l.Inner)) 58 | } 59 | 60 | // Get the metadata for this language. This information is generated by the 61 | // CLI, and relies on the language author providing the correct metadata in 62 | // the language's `tree-sitter.json` file. 63 | func (l *Language) Metadata() *LanguageMetadata { 64 | ptr := C.ts_language_metadata(l.Inner) 65 | if ptr == nil { 66 | return nil 67 | } 68 | return &LanguageMetadata{ 69 | MajorVersion: uint8(ptr.major_version), 70 | MinorVersion: uint8(ptr.minor_version), 71 | PatchVersion: uint8(ptr.patch_version), 72 | } 73 | } 74 | 75 | // Get the number of distinct node types in this language. 76 | func (l *Language) NodeKindCount() uint32 { 77 | return uint32(C.ts_language_symbol_count(l.Inner)) 78 | } 79 | 80 | // Get the number of valid states in this language. 81 | func (l *Language) ParseStateCount() uint32 { 82 | return uint32(C.ts_language_state_count(l.Inner)) 83 | } 84 | 85 | // Get the name of the node kind for the given numerical id. 86 | func (l *Language) NodeKindForId(id uint16) string { 87 | return C.GoString(C.ts_language_symbol_name(l.Inner, C.TSSymbol(id))) 88 | } 89 | 90 | // Get the numeric id for the given node kind. 91 | func (l *Language) IdForNodeKind(kind string, named bool) uint16 { 92 | return uint16(C.ts_language_symbol_for_name(l.Inner, C.CString(kind), C.uint32_t(len(kind)), C.bool(named))) 93 | } 94 | 95 | // Check if the node type for the given numerical id is named (as opposed 96 | // to an anonymous node type). 97 | func (l *Language) NodeKindIsNamed(id uint16) bool { 98 | return C.ts_language_symbol_type(l.Inner, C.TSSymbol(id)) == C.TSSymbolTypeRegular 99 | } 100 | 101 | // Check if the node type for the given numerical id is visible (as opposed 102 | // to a hidden node type). 103 | func (l *Language) NodeKindIsVisible(id uint16) bool { 104 | return C.ts_language_symbol_type(l.Inner, C.TSSymbol(id)) <= C.TSSymbolTypeAnonymous 105 | } 106 | 107 | // Check if the node type for the given numerical id is a supertype. 108 | func (l *Language) NodeKindIsSupertype(id uint16) bool { 109 | return C.ts_language_symbol_type(l.Inner, C.TSSymbol(id)) == C.TSSymbolTypeSupertype 110 | } 111 | 112 | // Get the number of distinct field names in this language. 113 | func (l *Language) FieldCount() uint32 { 114 | return uint32(C.ts_language_field_count(l.Inner)) 115 | } 116 | 117 | // Get the field names for the given numerical id. 118 | func (l *Language) FieldNameForId(id uint16) string { 119 | return C.GoString(C.ts_language_field_name_for_id(l.Inner, C.TSFieldId(id))) 120 | } 121 | 122 | // Get the numerical id for the given field name. 123 | func (l *Language) FieldIdForName(name string) uint16 { 124 | return uint16(C.ts_language_field_id_for_name(l.Inner, C.CString(name), C.uint32_t(len(name)))) 125 | } 126 | 127 | // Get the next parse state. Combine this with 128 | // [Language.LookaheadIterator] to 129 | // generate completion suggestions or valid symbols in error nodes. 130 | func (l *Language) NextState(state uint16, id uint16) uint16 { 131 | return uint16(C.ts_language_next_state(l.Inner, C.TSStateId(state), C.TSSymbol(id))) 132 | } 133 | 134 | // Create a new lookahead iterator for this language and parse state. 135 | // 136 | // This returns `nil` if state is invalid for this language. 137 | // 138 | // Iterating [LookaheadIterator] will yield valid symbols in the given 139 | // parse state. Newly created lookahead iterators will return the `ERROR` 140 | // symbol from [LookaheadIterator.Symbol]. 141 | // 142 | // Lookahead iterators can be useful to generate suggestions and improve 143 | // syntax error diagnostics. To get symbols valid in an ERROR node, use the 144 | // lookahead iterator on its first leaf node state. For `MISSING` nodes, a 145 | // lookahead iterator created on the previous non-extra leaf node may be 146 | // appropriate. 147 | func (l *Language) LookaheadIterator(state uint16) *LookaheadIterator { 148 | ptr := C.ts_lookahead_iterator_new(l.Inner, C.TSStateId(state)) 149 | if ptr == nil { 150 | return nil 151 | } 152 | return newLookaheadIterator(ptr) 153 | } 154 | 155 | func (l *LanguageError) Error() string { 156 | return fmt.Sprintf("Incompatible language version %d. Expected minimum %d, maximum %d", l.version, C.TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, C.TREE_SITTER_LANGUAGE_VERSION) 157 | } 158 | -------------------------------------------------------------------------------- /tree_cursor.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | 9 | // A stateful object for walking a syntax [Tree] efficiently. 10 | type TreeCursor struct { 11 | _inner C.TSTreeCursor 12 | } 13 | 14 | func newTreeCursor(node Node) *TreeCursor { 15 | return &TreeCursor{_inner: C.ts_tree_cursor_new(node._inner)} 16 | } 17 | 18 | func (tc *TreeCursor) Close() { 19 | C.ts_tree_cursor_delete(&tc._inner) 20 | } 21 | 22 | func (tc *TreeCursor) Copy() *TreeCursor { 23 | return &TreeCursor{_inner: C.ts_tree_cursor_copy(&tc._inner)} 24 | } 25 | 26 | // Get the tree cursor's current [Node]. 27 | func (tc *TreeCursor) Node() *Node { 28 | return newNode(C.ts_tree_cursor_current_node(&tc._inner)) 29 | } 30 | 31 | // Get the numerical field id of this tree cursor's current node. 32 | // 33 | // See also [TreeCursor.FieldName]. 34 | func (tc *TreeCursor) FieldId() uint16 { 35 | return uint16(C.ts_tree_cursor_current_field_id(&tc._inner)) 36 | } 37 | 38 | // Get the field name of this tree cursor's current node. 39 | func (tc *TreeCursor) FieldName() string { 40 | return C.GoString(C.ts_tree_cursor_current_field_name(&tc._inner)) 41 | } 42 | 43 | // Get the depth of the cursor's current node relative to the original 44 | // node that the cursor was constructed with. 45 | func (tc *TreeCursor) Depth() uint32 { 46 | return uint32(C.ts_tree_cursor_current_depth(&tc._inner)) 47 | } 48 | 49 | // Get the index of the cursor's current node out of all of the 50 | // descendants of the original node that the cursor was constructed with. 51 | func (tc *TreeCursor) DescendantIndex() uint32 { 52 | return uint32(C.ts_tree_cursor_current_descendant_index(&tc._inner)) 53 | } 54 | 55 | // Move this cursor to the first child of its current node. 56 | // 57 | // This returns `true` if the cursor successfully moved, and returns 58 | // `false` if there were no children. 59 | func (tc *TreeCursor) GotoFirstChild() bool { 60 | return bool(C.ts_tree_cursor_goto_first_child(&tc._inner)) 61 | } 62 | 63 | // Move this cursor to the last child of its current node. 64 | // 65 | // This returns `true` if the cursor successfully moved, and returns 66 | // `false` if there were no children. 67 | // 68 | // Note that this function may be slower than 69 | // [TreeCursor.GotoFirstChild] because it needs to 70 | // iterate through all the children to compute the child's position. 71 | func (tc *TreeCursor) GotoLastChild() bool { 72 | return bool(C.ts_tree_cursor_goto_last_child(&tc._inner)) 73 | } 74 | 75 | // Move this cursor to the parent of its current node. 76 | // 77 | // This returns `true` if the cursor successfully moved, and returns 78 | // `false` if there was no parent node (the cursor was already on the 79 | // root node). 80 | // 81 | // Note that the given node is considered the root of the cursor, 82 | // and the cursor cannot walk outside this node. 83 | func (tc *TreeCursor) GotoParent() bool { 84 | return bool(C.ts_tree_cursor_goto_parent(&tc._inner)) 85 | } 86 | 87 | // Move this cursor to the next sibling of its current node. 88 | // 89 | // This returns `true` if the cursor successfully moved, and returns 90 | // `false` if there was no next sibling node. 91 | // 92 | // Note that the given node is considered the root of the cursor, 93 | // and the cursor cannot walk outside this node. 94 | func (tc *TreeCursor) GotoNextSibling() bool { 95 | return bool(C.ts_tree_cursor_goto_next_sibling(&tc._inner)) 96 | } 97 | 98 | // Move the cursor to the node that is the nth descendant of 99 | // the original node that the cursor was constructed with, where 100 | // zero represents the original node itself. 101 | func (tc *TreeCursor) GotoDescendant(descendantIndex uint32) { 102 | C.ts_tree_cursor_goto_descendant(&tc._inner, C.uint32_t(descendantIndex)) 103 | } 104 | 105 | // Move this cursor to the previous sibling of its current node. 106 | // 107 | // This returns `true` if the cursor successfully moved, and returns 108 | // `false` if there was no previous sibling node. 109 | // 110 | // Note, that this function may be slower than 111 | // [TreeCursor.GotoNextSibling] due to how node 112 | // positions are stored. In the worst case, this will need to iterate 113 | // through all the children upto the previous sibling node to recalculate 114 | // its position. Also note that the node the cursor was constructed with 115 | // is considered the root of the cursor, and the cursor cannot 116 | // walk outside this node. 117 | func (tc *TreeCursor) GotoPreviousSibling() bool { 118 | return bool(C.ts_tree_cursor_goto_previous_sibling(&tc._inner)) 119 | } 120 | 121 | // Move this cursor to the first child of its current node that extends 122 | // beyond the given byte offset. 123 | // 124 | // This returns the index of the child node if one was found, and returns 125 | // `nil` if no such child was found. 126 | func (tc *TreeCursor) GotoFirstChildForByte(byteIndex uint32) *uint { 127 | res := C.ts_tree_cursor_goto_first_child_for_byte(&tc._inner, C.uint32_t(byteIndex)) 128 | if res < 0 { 129 | return nil 130 | } 131 | index := uint(res) 132 | return &index 133 | } 134 | 135 | // Move this cursor to the first child of its current node that extends 136 | // beyond the given byte offset. 137 | // 138 | // This returns the index of the child node if one was found, and returns 139 | // `nil` if no such child was found. 140 | func (tc *TreeCursor) GotoFirstChildForPoint(point Point) *uint { 141 | res := C.ts_tree_cursor_goto_first_child_for_point(&tc._inner, point.toTSPoint()) 142 | if res < 0 { 143 | return nil 144 | } 145 | index := uint(res) 146 | return &index 147 | } 148 | 149 | // Re-initialize this tree cursor to start at the original node that the 150 | // cursor was constructed with. 151 | func (tc *TreeCursor) Reset(node Node) { 152 | C.ts_tree_cursor_reset(&tc._inner, node._inner) 153 | } 154 | 155 | // Re-initialize a tree cursor to the same position as another cursor. 156 | // 157 | // Unlike [TreeCursor.Reset], this will not lose parent 158 | // information and allows reusing already created cursors. 159 | func (tc *TreeCursor) ResetTo(cursor *TreeCursor) { 160 | C.ts_tree_cursor_reset_to(&tc._inner, &cursor._inner) 161 | } 162 | -------------------------------------------------------------------------------- /src/portable/endian.h: -------------------------------------------------------------------------------- 1 | // "License": Public Domain 2 | // I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like. 3 | // In case there are jurisdictions that don't support putting things in the public domain you can also consider it to 4 | // be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it 5 | // an example on how to get the endian conversion functions on different platforms. 6 | 7 | // updates from https://github.com/mikepb/endian.h/issues/4 8 | 9 | #ifndef ENDIAN_H 10 | #define ENDIAN_H 11 | 12 | #if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) 13 | 14 | # define __WINDOWS__ 15 | 16 | #endif 17 | 18 | #if defined(HAVE_ENDIAN_H) || \ 19 | defined(__linux__) || \ 20 | defined(__GNU__) || \ 21 | defined(__OpenBSD__) || \ 22 | defined(__CYGWIN__) || \ 23 | defined(__MSYS__) || \ 24 | defined(__EMSCRIPTEN__) 25 | 26 | # include 27 | 28 | #elif defined(HAVE_SYS_ENDIAN_H) || \ 29 | defined(__FreeBSD__) || \ 30 | defined(__NetBSD__) || \ 31 | defined(__DragonFly__) 32 | 33 | # include 34 | 35 | #elif defined(__APPLE__) 36 | # define __BYTE_ORDER BYTE_ORDER 37 | # define __BIG_ENDIAN BIG_ENDIAN 38 | # define __LITTLE_ENDIAN LITTLE_ENDIAN 39 | # define __PDP_ENDIAN PDP_ENDIAN 40 | 41 | # if !defined(_POSIX_C_SOURCE) 42 | # include 43 | 44 | # define htobe16(x) OSSwapHostToBigInt16(x) 45 | # define htole16(x) OSSwapHostToLittleInt16(x) 46 | # define be16toh(x) OSSwapBigToHostInt16(x) 47 | # define le16toh(x) OSSwapLittleToHostInt16(x) 48 | 49 | # define htobe32(x) OSSwapHostToBigInt32(x) 50 | # define htole32(x) OSSwapHostToLittleInt32(x) 51 | # define be32toh(x) OSSwapBigToHostInt32(x) 52 | # define le32toh(x) OSSwapLittleToHostInt32(x) 53 | 54 | # define htobe64(x) OSSwapHostToBigInt64(x) 55 | # define htole64(x) OSSwapHostToLittleInt64(x) 56 | # define be64toh(x) OSSwapBigToHostInt64(x) 57 | # define le64toh(x) OSSwapLittleToHostInt64(x) 58 | # else 59 | # if BYTE_ORDER == LITTLE_ENDIAN 60 | # define htobe16(x) __builtin_bswap16(x) 61 | # define htole16(x) (x) 62 | # define be16toh(x) __builtin_bswap16(x) 63 | # define le16toh(x) (x) 64 | 65 | # define htobe32(x) __builtin_bswap32(x) 66 | # define htole32(x) (x) 67 | # define be32toh(x) __builtin_bswap32(x) 68 | # define le32toh(x) (x) 69 | 70 | # define htobe64(x) __builtin_bswap64(x) 71 | # define htole64(x) (x) 72 | # define be64toh(x) __builtin_bswap64(x) 73 | # define le64toh(x) (x) 74 | # elif BYTE_ORDER == BIG_ENDIAN 75 | # define htobe16(x) (x) 76 | # define htole16(x) __builtin_bswap16(x) 77 | # define be16toh(x) (x) 78 | # define le16toh(x) __builtin_bswap16(x) 79 | 80 | # define htobe32(x) (x) 81 | # define htole32(x) __builtin_bswap32(x) 82 | # define be32toh(x) (x) 83 | # define le32toh(x) __builtin_bswap32(x) 84 | 85 | # define htobe64(x) (x) 86 | # define htole64(x) __builtin_bswap64(x) 87 | # define be64toh(x) (x) 88 | # define le64toh(x) __builtin_bswap64(x) 89 | # else 90 | # error byte order not supported 91 | # endif 92 | # endif 93 | 94 | #elif defined(__WINDOWS__) 95 | 96 | # if defined(_MSC_VER) && !defined(__clang__) 97 | # include 98 | # define B_SWAP_16(x) _byteswap_ushort(x) 99 | # define B_SWAP_32(x) _byteswap_ulong(x) 100 | # define B_SWAP_64(x) _byteswap_uint64(x) 101 | # else 102 | # define B_SWAP_16(x) __builtin_bswap16(x) 103 | # define B_SWAP_32(x) __builtin_bswap32(x) 104 | # define B_SWAP_64(x) __builtin_bswap64(x) 105 | # endif 106 | 107 | # if defined(__MINGW32__) || defined(HAVE_SYS_PARAM_H) 108 | # include 109 | # endif 110 | 111 | # ifndef BIG_ENDIAN 112 | # ifdef __BIG_ENDIAN 113 | # define BIG_ENDIAN __BIG_ENDIAN 114 | # elif defined(__ORDER_BIG_ENDIAN__) 115 | # define BIG_ENDIAN __ORDER_BIG_ENDIAN__ 116 | # else 117 | # define BIG_ENDIAN 4321 118 | # endif 119 | # endif 120 | 121 | # ifndef LITTLE_ENDIAN 122 | # ifdef __LITTLE_ENDIAN 123 | # define LITTLE_ENDIAN __LITTLE_ENDIAN 124 | # elif defined(__ORDER_LITTLE_ENDIAN__) 125 | # define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ 126 | # else 127 | # define LITTLE_ENDIAN 1234 128 | # endif 129 | # endif 130 | 131 | # ifndef BYTE_ORDER 132 | # ifdef __BYTE_ORDER 133 | # define BYTE_ORDER __BYTE_ORDER 134 | # elif defined(__BYTE_ORDER__) 135 | # define BYTE_ORDER __BYTE_ORDER__ 136 | # else 137 | /* assume LE on Windows if nothing was defined */ 138 | # define BYTE_ORDER LITTLE_ENDIAN 139 | # endif 140 | # endif 141 | 142 | # if BYTE_ORDER == LITTLE_ENDIAN 143 | 144 | # define htobe16(x) B_SWAP_16(x) 145 | # define htole16(x) (x) 146 | # define be16toh(x) B_SWAP_16(x) 147 | # define le16toh(x) (x) 148 | 149 | # define htobe32(x) B_SWAP_32(x) 150 | # define htole32(x) (x) 151 | # define be32toh(x) B_SWAP_32(x) 152 | # define le32toh(x) (x) 153 | 154 | # define htobe64(x) B_SWAP_64(x) 155 | # define htole64(x) (x) 156 | # define be64toh(x) B_SWAP_64(x) 157 | # define le64toh(x) (x) 158 | 159 | # elif BYTE_ORDER == BIG_ENDIAN 160 | 161 | # define htobe16(x) (x) 162 | # define htole16(x) B_SWAP_16(x) 163 | # define be16toh(x) (x) 164 | # define le16toh(x) B_SWAP_16(x) 165 | 166 | # define htobe32(x) (x) 167 | # define htole32(x) B_SWAP_32(x) 168 | # define be32toh(x) (x) 169 | # define le32toh(x) B_SWAP_32(x) 170 | 171 | # define htobe64(x) (x) 172 | # define htole64(x) B_SWAP_64(x) 173 | # define be64toh(x) (x) 174 | # define le64toh(x) B_SWAP_64(x) 175 | 176 | # else 177 | 178 | # error byte order not supported 179 | 180 | # endif 181 | 182 | #elif defined(__QNXNTO__) 183 | 184 | # include 185 | 186 | # define __LITTLE_ENDIAN 1234 187 | # define __BIG_ENDIAN 4321 188 | # define __PDP_ENDIAN 3412 189 | 190 | # if defined(__BIGENDIAN__) 191 | 192 | # define __BYTE_ORDER __BIG_ENDIAN 193 | 194 | # define htobe16(x) (x) 195 | # define htobe32(x) (x) 196 | # define htobe64(x) (x) 197 | 198 | # define htole16(x) ENDIAN_SWAP16(x) 199 | # define htole32(x) ENDIAN_SWAP32(x) 200 | # define htole64(x) ENDIAN_SWAP64(x) 201 | 202 | # elif defined(__LITTLEENDIAN__) 203 | 204 | # define __BYTE_ORDER __LITTLE_ENDIAN 205 | 206 | # define htole16(x) (x) 207 | # define htole32(x) (x) 208 | # define htole64(x) (x) 209 | 210 | # define htobe16(x) ENDIAN_SWAP16(x) 211 | # define htobe32(x) ENDIAN_SWAP32(x) 212 | # define htobe64(x) ENDIAN_SWAP64(x) 213 | 214 | # else 215 | 216 | # error byte order not supported 217 | 218 | # endif 219 | 220 | # define be16toh(x) ENDIAN_BE16(x) 221 | # define be32toh(x) ENDIAN_BE32(x) 222 | # define be64toh(x) ENDIAN_BE64(x) 223 | # define le16toh(x) ENDIAN_LE16(x) 224 | # define le32toh(x) ENDIAN_LE32(x) 225 | # define le64toh(x) ENDIAN_LE64(x) 226 | 227 | #else 228 | 229 | # error platform not supported 230 | 231 | #endif 232 | 233 | #endif 234 | -------------------------------------------------------------------------------- /src/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_PARSER_H_ 2 | #define TREE_SITTER_PARSER_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #define ts_builtin_sym_error ((TSSymbol)-1) 13 | #define ts_builtin_sym_end 0 14 | #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 15 | 16 | #ifndef TREE_SITTER_API_H_ 17 | typedef uint16_t TSStateId; 18 | typedef uint16_t TSSymbol; 19 | typedef uint16_t TSFieldId; 20 | typedef struct TSLanguage TSLanguage; 21 | typedef struct TSLanguageMetadata TSLanguageMetadata; 22 | typedef struct TSLanguageMetadata { 23 | uint8_t major_version; 24 | uint8_t minor_version; 25 | uint8_t patch_version; 26 | } TSLanguageMetadata; 27 | #endif 28 | 29 | typedef struct { 30 | TSFieldId field_id; 31 | uint8_t child_index; 32 | bool inherited; 33 | } TSFieldMapEntry; 34 | 35 | // Used to index the field and supertype maps. 36 | typedef struct { 37 | uint16_t index; 38 | uint16_t length; 39 | } TSMapSlice; 40 | 41 | typedef struct { 42 | bool visible; 43 | bool named; 44 | bool supertype; 45 | } TSSymbolMetadata; 46 | 47 | typedef struct TSLexer TSLexer; 48 | 49 | struct TSLexer { 50 | int32_t lookahead; 51 | TSSymbol result_symbol; 52 | void (*advance)(TSLexer *, bool); 53 | void (*mark_end)(TSLexer *); 54 | uint32_t (*get_column)(TSLexer *); 55 | bool (*is_at_included_range_start)(const TSLexer *); 56 | bool (*eof)(const TSLexer *); 57 | void (*log)(const TSLexer *, const char *, ...); 58 | }; 59 | 60 | typedef enum { 61 | TSParseActionTypeShift, 62 | TSParseActionTypeReduce, 63 | TSParseActionTypeAccept, 64 | TSParseActionTypeRecover, 65 | } TSParseActionType; 66 | 67 | typedef union { 68 | struct { 69 | uint8_t type; 70 | TSStateId state; 71 | bool extra; 72 | bool repetition; 73 | } shift; 74 | struct { 75 | uint8_t type; 76 | uint8_t child_count; 77 | TSSymbol symbol; 78 | int16_t dynamic_precedence; 79 | uint16_t production_id; 80 | } reduce; 81 | uint8_t type; 82 | } TSParseAction; 83 | 84 | typedef struct { 85 | uint16_t lex_state; 86 | uint16_t external_lex_state; 87 | } TSLexMode; 88 | 89 | typedef struct { 90 | uint16_t lex_state; 91 | uint16_t external_lex_state; 92 | uint16_t reserved_word_set_id; 93 | } TSLexerMode; 94 | 95 | typedef union { 96 | TSParseAction action; 97 | struct { 98 | uint8_t count; 99 | bool reusable; 100 | } entry; 101 | } TSParseActionEntry; 102 | 103 | typedef struct { 104 | int32_t start; 105 | int32_t end; 106 | } TSCharacterRange; 107 | 108 | struct TSLanguage { 109 | uint32_t abi_version; 110 | uint32_t symbol_count; 111 | uint32_t alias_count; 112 | uint32_t token_count; 113 | uint32_t external_token_count; 114 | uint32_t state_count; 115 | uint32_t large_state_count; 116 | uint32_t production_id_count; 117 | uint32_t field_count; 118 | uint16_t max_alias_sequence_length; 119 | const uint16_t *parse_table; 120 | const uint16_t *small_parse_table; 121 | const uint32_t *small_parse_table_map; 122 | const TSParseActionEntry *parse_actions; 123 | const char * const *symbol_names; 124 | const char * const *field_names; 125 | const TSMapSlice *field_map_slices; 126 | const TSFieldMapEntry *field_map_entries; 127 | const TSSymbolMetadata *symbol_metadata; 128 | const TSSymbol *public_symbol_map; 129 | const uint16_t *alias_map; 130 | const TSSymbol *alias_sequences; 131 | const TSLexerMode *lex_modes; 132 | bool (*lex_fn)(TSLexer *, TSStateId); 133 | bool (*keyword_lex_fn)(TSLexer *, TSStateId); 134 | TSSymbol keyword_capture_token; 135 | struct { 136 | const bool *states; 137 | const TSSymbol *symbol_map; 138 | void *(*create)(void); 139 | void (*destroy)(void *); 140 | bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); 141 | unsigned (*serialize)(void *, char *); 142 | void (*deserialize)(void *, const char *, unsigned); 143 | } external_scanner; 144 | const TSStateId *primary_state_ids; 145 | const char *name; 146 | const TSSymbol *reserved_words; 147 | uint16_t max_reserved_word_set_size; 148 | uint32_t supertype_count; 149 | const TSSymbol *supertype_symbols; 150 | const TSMapSlice *supertype_map_slices; 151 | const TSSymbol *supertype_map_entries; 152 | TSLanguageMetadata metadata; 153 | }; 154 | 155 | static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { 156 | uint32_t index = 0; 157 | uint32_t size = len - index; 158 | while (size > 1) { 159 | uint32_t half_size = size / 2; 160 | uint32_t mid_index = index + half_size; 161 | const TSCharacterRange *range = &ranges[mid_index]; 162 | if (lookahead >= range->start && lookahead <= range->end) { 163 | return true; 164 | } else if (lookahead > range->end) { 165 | index = mid_index; 166 | } 167 | size -= half_size; 168 | } 169 | const TSCharacterRange *range = &ranges[index]; 170 | return (lookahead >= range->start && lookahead <= range->end); 171 | } 172 | 173 | /* 174 | * Lexer Macros 175 | */ 176 | 177 | #ifdef _MSC_VER 178 | #define UNUSED __pragma(warning(suppress : 4101)) 179 | #else 180 | #define UNUSED __attribute__((unused)) 181 | #endif 182 | 183 | #define START_LEXER() \ 184 | bool result = false; \ 185 | bool skip = false; \ 186 | UNUSED \ 187 | bool eof = false; \ 188 | int32_t lookahead; \ 189 | goto start; \ 190 | next_state: \ 191 | lexer->advance(lexer, skip); \ 192 | start: \ 193 | skip = false; \ 194 | lookahead = lexer->lookahead; 195 | 196 | #define ADVANCE(state_value) \ 197 | { \ 198 | state = state_value; \ 199 | goto next_state; \ 200 | } 201 | 202 | #define ADVANCE_MAP(...) \ 203 | { \ 204 | static const uint16_t map[] = { __VA_ARGS__ }; \ 205 | for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ 206 | if (map[i] == lookahead) { \ 207 | state = map[i + 1]; \ 208 | goto next_state; \ 209 | } \ 210 | } \ 211 | } 212 | 213 | #define SKIP(state_value) \ 214 | { \ 215 | skip = true; \ 216 | state = state_value; \ 217 | goto next_state; \ 218 | } 219 | 220 | #define ACCEPT_TOKEN(symbol_value) \ 221 | result = true; \ 222 | lexer->result_symbol = symbol_value; \ 223 | lexer->mark_end(lexer); 224 | 225 | #define END_STATE() return result; 226 | 227 | /* 228 | * Parse Table Macros 229 | */ 230 | 231 | #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) 232 | 233 | #define STATE(id) id 234 | 235 | #define ACTIONS(id) id 236 | 237 | #define SHIFT(state_value) \ 238 | {{ \ 239 | .shift = { \ 240 | .type = TSParseActionTypeShift, \ 241 | .state = (state_value) \ 242 | } \ 243 | }} 244 | 245 | #define SHIFT_REPEAT(state_value) \ 246 | {{ \ 247 | .shift = { \ 248 | .type = TSParseActionTypeShift, \ 249 | .state = (state_value), \ 250 | .repetition = true \ 251 | } \ 252 | }} 253 | 254 | #define SHIFT_EXTRA() \ 255 | {{ \ 256 | .shift = { \ 257 | .type = TSParseActionTypeShift, \ 258 | .extra = true \ 259 | } \ 260 | }} 261 | 262 | #define REDUCE(symbol_name, children, precedence, prod_id) \ 263 | {{ \ 264 | .reduce = { \ 265 | .type = TSParseActionTypeReduce, \ 266 | .symbol = symbol_name, \ 267 | .child_count = children, \ 268 | .dynamic_precedence = precedence, \ 269 | .production_id = prod_id \ 270 | }, \ 271 | }} 272 | 273 | #define RECOVER() \ 274 | {{ \ 275 | .type = TSParseActionTypeRecover \ 276 | }} 277 | 278 | #define ACCEPT_INPUT() \ 279 | {{ \ 280 | .type = TSParseActionTypeAccept \ 281 | }} 282 | 283 | #ifdef __cplusplus 284 | } 285 | #endif 286 | 287 | #endif // TREE_SITTER_PARSER_H_ 288 | -------------------------------------------------------------------------------- /src/language.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_LANGUAGE_H_ 2 | #define TREE_SITTER_LANGUAGE_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./subtree.h" 9 | #include "./parser.h" 10 | 11 | #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) 12 | 13 | #define LANGUAGE_VERSION_WITH_RESERVED_WORDS 15 14 | #define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14 15 | 16 | typedef struct { 17 | const TSParseAction *actions; 18 | uint32_t action_count; 19 | bool is_reusable; 20 | } TableEntry; 21 | 22 | typedef struct { 23 | const TSLanguage *language; 24 | const uint16_t *data; 25 | const uint16_t *group_end; 26 | TSStateId state; 27 | uint16_t table_value; 28 | uint16_t section_index; 29 | uint16_t group_count; 30 | bool is_small_state; 31 | 32 | const TSParseAction *actions; 33 | TSSymbol symbol; 34 | TSStateId next_state; 35 | uint16_t action_count; 36 | } LookaheadIterator; 37 | 38 | void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result); 39 | TSLexerMode ts_language_lex_mode_for_state(const TSLanguage *self, TSStateId state); 40 | bool ts_language_is_reserved_word(const TSLanguage *self, TSStateId state, TSSymbol symbol); 41 | TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol); 42 | TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol); 43 | 44 | static inline const TSParseAction *ts_language_actions( 45 | const TSLanguage *self, 46 | TSStateId state, 47 | TSSymbol symbol, 48 | uint32_t *count 49 | ) { 50 | TableEntry entry; 51 | ts_language_table_entry(self, state, symbol, &entry); 52 | *count = entry.action_count; 53 | return entry.actions; 54 | } 55 | 56 | static inline bool ts_language_has_reduce_action( 57 | const TSLanguage *self, 58 | TSStateId state, 59 | TSSymbol symbol 60 | ) { 61 | TableEntry entry; 62 | ts_language_table_entry(self, state, symbol, &entry); 63 | return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce; 64 | } 65 | 66 | // Lookup the table value for a given symbol and state. 67 | // 68 | // For non-terminal symbols, the table value represents a successor state. 69 | // For terminal symbols, it represents an index in the actions table. 70 | // For 'large' parse states, this is a direct lookup. For 'small' parse 71 | // states, this requires searching through the symbol groups to find 72 | // the given symbol. 73 | static inline uint16_t ts_language_lookup( 74 | const TSLanguage *self, 75 | TSStateId state, 76 | TSSymbol symbol 77 | ) { 78 | if (state >= self->large_state_count) { 79 | uint32_t index = self->small_parse_table_map[state - self->large_state_count]; 80 | const uint16_t *data = &self->small_parse_table[index]; 81 | uint16_t group_count = *(data++); 82 | for (unsigned i = 0; i < group_count; i++) { 83 | uint16_t section_value = *(data++); 84 | uint16_t symbol_count = *(data++); 85 | for (unsigned j = 0; j < symbol_count; j++) { 86 | if (*(data++) == symbol) return section_value; 87 | } 88 | } 89 | return 0; 90 | } else { 91 | return self->parse_table[state * self->symbol_count + symbol]; 92 | } 93 | } 94 | 95 | static inline bool ts_language_has_actions( 96 | const TSLanguage *self, 97 | TSStateId state, 98 | TSSymbol symbol 99 | ) { 100 | return ts_language_lookup(self, state, symbol) != 0; 101 | } 102 | 103 | // Iterate over all of the symbols that are valid in the given state. 104 | // 105 | // For 'large' parse states, this just requires iterating through 106 | // all possible symbols and checking the parse table for each one. 107 | // For 'small' parse states, this exploits the structure of the 108 | // table to only visit the valid symbols. 109 | static inline LookaheadIterator ts_language_lookaheads( 110 | const TSLanguage *self, 111 | TSStateId state 112 | ) { 113 | bool is_small_state = state >= self->large_state_count; 114 | const uint16_t *data; 115 | const uint16_t *group_end = NULL; 116 | uint16_t group_count = 0; 117 | if (is_small_state) { 118 | uint32_t index = self->small_parse_table_map[state - self->large_state_count]; 119 | data = &self->small_parse_table[index]; 120 | group_end = data + 1; 121 | group_count = *data; 122 | } else { 123 | data = &self->parse_table[state * self->symbol_count] - 1; 124 | } 125 | return (LookaheadIterator) { 126 | .language = self, 127 | .data = data, 128 | .group_end = group_end, 129 | .group_count = group_count, 130 | .is_small_state = is_small_state, 131 | .symbol = UINT16_MAX, 132 | .next_state = 0, 133 | }; 134 | } 135 | 136 | static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) { 137 | // For small parse states, valid symbols are listed explicitly, 138 | // grouped by their value. There's no need to look up the actions 139 | // again until moving to the next group. 140 | if (self->is_small_state) { 141 | self->data++; 142 | if (self->data == self->group_end) { 143 | if (self->group_count == 0) return false; 144 | self->group_count--; 145 | self->table_value = *(self->data++); 146 | unsigned symbol_count = *(self->data++); 147 | self->group_end = self->data + symbol_count; 148 | self->symbol = *self->data; 149 | } else { 150 | self->symbol = *self->data; 151 | return true; 152 | } 153 | } 154 | 155 | // For large parse states, iterate through every symbol until one 156 | // is found that has valid actions. 157 | else { 158 | do { 159 | self->data++; 160 | self->symbol++; 161 | if (self->symbol >= self->language->symbol_count) return false; 162 | self->table_value = *self->data; 163 | } while (!self->table_value); 164 | } 165 | 166 | // Depending on if the symbols is terminal or non-terminal, the table value either 167 | // represents a list of actions or a successor state. 168 | if (self->symbol < self->language->token_count) { 169 | const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value]; 170 | self->action_count = entry->entry.count; 171 | self->actions = (const TSParseAction *)(entry + 1); 172 | self->next_state = 0; 173 | } else { 174 | self->action_count = 0; 175 | self->next_state = self->table_value; 176 | } 177 | return true; 178 | } 179 | 180 | // Whether the state is a "primary state". If this returns false, it indicates that there exists 181 | // another state that behaves identically to this one with respect to query analysis. 182 | static inline bool ts_language_state_is_primary( 183 | const TSLanguage *self, 184 | TSStateId state 185 | ) { 186 | if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) { 187 | return state == self->primary_state_ids[state]; 188 | } else { 189 | return true; 190 | } 191 | } 192 | 193 | static inline const bool *ts_language_enabled_external_tokens( 194 | const TSLanguage *self, 195 | unsigned external_scanner_state 196 | ) { 197 | if (external_scanner_state == 0) { 198 | return NULL; 199 | } else { 200 | return self->external_scanner.states + self->external_token_count * external_scanner_state; 201 | } 202 | } 203 | 204 | static inline const TSSymbol *ts_language_alias_sequence( 205 | const TSLanguage *self, 206 | uint32_t production_id 207 | ) { 208 | return production_id ? 209 | &self->alias_sequences[production_id * self->max_alias_sequence_length] : 210 | NULL; 211 | } 212 | 213 | static inline TSSymbol ts_language_alias_at( 214 | const TSLanguage *self, 215 | uint32_t production_id, 216 | uint32_t child_index 217 | ) { 218 | return production_id ? 219 | self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] : 220 | 0; 221 | } 222 | 223 | static inline void ts_language_field_map( 224 | const TSLanguage *self, 225 | uint32_t production_id, 226 | const TSFieldMapEntry **start, 227 | const TSFieldMapEntry **end 228 | ) { 229 | if (self->field_count == 0) { 230 | *start = NULL; 231 | *end = NULL; 232 | return; 233 | } 234 | 235 | TSMapSlice slice = self->field_map_slices[production_id]; 236 | *start = &self->field_map_entries[slice.index]; 237 | *end = &self->field_map_entries[slice.index] + slice.length; 238 | } 239 | 240 | static inline void ts_language_aliases_for_symbol( 241 | const TSLanguage *self, 242 | TSSymbol original_symbol, 243 | const TSSymbol **start, 244 | const TSSymbol **end 245 | ) { 246 | *start = &self->public_symbol_map[original_symbol]; 247 | *end = *start + 1; 248 | 249 | unsigned idx = 0; 250 | for (;;) { 251 | TSSymbol symbol = self->alias_map[idx++]; 252 | if (symbol == 0 || symbol > original_symbol) break; 253 | uint16_t count = self->alias_map[idx++]; 254 | if (symbol == original_symbol) { 255 | *start = &self->alias_map[idx]; 256 | *end = &self->alias_map[idx + count]; 257 | break; 258 | } 259 | idx += count; 260 | } 261 | } 262 | 263 | static inline void ts_language_write_symbol_as_dot_string( 264 | const TSLanguage *self, 265 | FILE *f, 266 | TSSymbol symbol 267 | ) { 268 | const char *name = ts_language_symbol_name(self, symbol); 269 | for (const char *chr = name; *chr; chr++) { 270 | switch (*chr) { 271 | case '"': 272 | case '\\': 273 | fputc('\\', f); 274 | fputc(*chr, f); 275 | break; 276 | case '\n': 277 | fputs("\\n", f); 278 | break; 279 | case '\t': 280 | fputs("\\t", f); 281 | break; 282 | default: 283 | fputc(*chr, f); 284 | break; 285 | } 286 | } 287 | } 288 | 289 | #ifdef __cplusplus 290 | } 291 | #endif 292 | 293 | #endif // TREE_SITTER_LANGUAGE_H_ 294 | -------------------------------------------------------------------------------- /src/language.c: -------------------------------------------------------------------------------- 1 | #include "./language.h" 2 | #include "./wasm_store.h" 3 | #include "tree_sitter/api.h" 4 | #include 5 | 6 | const TSLanguage *ts_language_copy(const TSLanguage *self) { 7 | if (self && ts_language_is_wasm(self)) { 8 | ts_wasm_language_retain(self); 9 | } 10 | return self; 11 | } 12 | 13 | void ts_language_delete(const TSLanguage *self) { 14 | if (self && ts_language_is_wasm(self)) { 15 | ts_wasm_language_release(self); 16 | } 17 | } 18 | 19 | uint32_t ts_language_symbol_count(const TSLanguage *self) { 20 | return self->symbol_count + self->alias_count; 21 | } 22 | 23 | uint32_t ts_language_state_count(const TSLanguage *self) { 24 | return self->state_count; 25 | } 26 | 27 | const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) { 28 | if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) { 29 | *length = self->supertype_count; 30 | return self->supertype_symbols; 31 | } else { 32 | *length = 0; 33 | return NULL; 34 | } 35 | } 36 | 37 | const TSSymbol *ts_language_subtypes( 38 | const TSLanguage *self, 39 | TSSymbol supertype, 40 | uint32_t *length 41 | ) { 42 | if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) { 43 | *length = 0; 44 | return NULL; 45 | } 46 | 47 | TSMapSlice slice = self->supertype_map_slices[supertype]; 48 | *length = slice.length; 49 | return &self->supertype_map_entries[slice.index]; 50 | } 51 | 52 | uint32_t ts_language_version(const TSLanguage *self) { 53 | return self->abi_version; 54 | } 55 | 56 | uint32_t ts_language_abi_version(const TSLanguage *self) { 57 | return self->abi_version; 58 | } 59 | 60 | const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) { 61 | return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL; 62 | } 63 | 64 | const char *ts_language_name(const TSLanguage *self) { 65 | return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL; 66 | } 67 | 68 | uint32_t ts_language_field_count(const TSLanguage *self) { 69 | return self->field_count; 70 | } 71 | 72 | void ts_language_table_entry( 73 | const TSLanguage *self, 74 | TSStateId state, 75 | TSSymbol symbol, 76 | TableEntry *result 77 | ) { 78 | if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { 79 | result->action_count = 0; 80 | result->is_reusable = false; 81 | result->actions = NULL; 82 | } else { 83 | ts_assert(symbol < self->token_count); 84 | uint32_t action_index = ts_language_lookup(self, state, symbol); 85 | const TSParseActionEntry *entry = &self->parse_actions[action_index]; 86 | result->action_count = entry->entry.count; 87 | result->is_reusable = entry->entry.reusable; 88 | result->actions = (const TSParseAction *)(entry + 1); 89 | } 90 | } 91 | 92 | TSLexerMode ts_language_lex_mode_for_state( 93 | const TSLanguage *self, 94 | TSStateId state 95 | ) { 96 | if (self->abi_version < 15) { 97 | TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state]; 98 | return (TSLexerMode) { 99 | .lex_state = mode.lex_state, 100 | .external_lex_state = mode.external_lex_state, 101 | .reserved_word_set_id = 0, 102 | }; 103 | } else { 104 | return self->lex_modes[state]; 105 | } 106 | } 107 | 108 | bool ts_language_is_reserved_word( 109 | const TSLanguage *self, 110 | TSStateId state, 111 | TSSymbol symbol 112 | ) { 113 | TSLexerMode lex_mode = ts_language_lex_mode_for_state(self, state); 114 | if (lex_mode.reserved_word_set_id > 0) { 115 | unsigned start = lex_mode.reserved_word_set_id * self->max_reserved_word_set_size; 116 | unsigned end = start + self->max_reserved_word_set_size; 117 | for (unsigned i = start; i < end; i++) { 118 | if (self->reserved_words[i] == symbol) return true; 119 | if (self->reserved_words[i] == 0) break; 120 | } 121 | } 122 | return false; 123 | } 124 | 125 | TSSymbolMetadata ts_language_symbol_metadata( 126 | const TSLanguage *self, 127 | TSSymbol symbol 128 | ) { 129 | if (symbol == ts_builtin_sym_error) { 130 | return (TSSymbolMetadata) {.visible = true, .named = true}; 131 | } else if (symbol == ts_builtin_sym_error_repeat) { 132 | return (TSSymbolMetadata) {.visible = false, .named = false}; 133 | } else { 134 | return self->symbol_metadata[symbol]; 135 | } 136 | } 137 | 138 | TSSymbol ts_language_public_symbol( 139 | const TSLanguage *self, 140 | TSSymbol symbol 141 | ) { 142 | if (symbol == ts_builtin_sym_error) return symbol; 143 | return self->public_symbol_map[symbol]; 144 | } 145 | 146 | TSStateId ts_language_next_state( 147 | const TSLanguage *self, 148 | TSStateId state, 149 | TSSymbol symbol 150 | ) { 151 | if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { 152 | return 0; 153 | } else if (symbol < self->token_count) { 154 | uint32_t count; 155 | const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); 156 | if (count > 0) { 157 | TSParseAction action = actions[count - 1]; 158 | if (action.type == TSParseActionTypeShift) { 159 | return action.shift.extra ? state : action.shift.state; 160 | } 161 | } 162 | return 0; 163 | } else { 164 | return ts_language_lookup(self, state, symbol); 165 | } 166 | } 167 | 168 | const char *ts_language_symbol_name( 169 | const TSLanguage *self, 170 | TSSymbol symbol 171 | ) { 172 | if (symbol == ts_builtin_sym_error) { 173 | return "ERROR"; 174 | } else if (symbol == ts_builtin_sym_error_repeat) { 175 | return "_ERROR"; 176 | } else if (symbol < ts_language_symbol_count(self)) { 177 | return self->symbol_names[symbol]; 178 | } else { 179 | return NULL; 180 | } 181 | } 182 | 183 | TSSymbol ts_language_symbol_for_name( 184 | const TSLanguage *self, 185 | const char *string, 186 | uint32_t length, 187 | bool is_named 188 | ) { 189 | if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error; 190 | uint16_t count = (uint16_t)ts_language_symbol_count(self); 191 | for (TSSymbol i = 0; i < count; i++) { 192 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i); 193 | if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; 194 | const char *symbol_name = self->symbol_names[i]; 195 | if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { 196 | return self->public_symbol_map[i]; 197 | } 198 | } 199 | return 0; 200 | } 201 | 202 | TSSymbolType ts_language_symbol_type( 203 | const TSLanguage *self, 204 | TSSymbol symbol 205 | ) { 206 | TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); 207 | if (metadata.named && metadata.visible) { 208 | return TSSymbolTypeRegular; 209 | } else if (metadata.visible) { 210 | return TSSymbolTypeAnonymous; 211 | } else if (metadata.supertype) { 212 | return TSSymbolTypeSupertype; 213 | } else { 214 | return TSSymbolTypeAuxiliary; 215 | } 216 | } 217 | 218 | const char *ts_language_field_name_for_id( 219 | const TSLanguage *self, 220 | TSFieldId id 221 | ) { 222 | uint32_t count = ts_language_field_count(self); 223 | if (count && id <= count) { 224 | return self->field_names[id]; 225 | } else { 226 | return NULL; 227 | } 228 | } 229 | 230 | TSFieldId ts_language_field_id_for_name( 231 | const TSLanguage *self, 232 | const char *name, 233 | uint32_t name_length 234 | ) { 235 | uint16_t count = (uint16_t)ts_language_field_count(self); 236 | for (TSSymbol i = 1; i < count + 1; i++) { 237 | switch (strncmp(name, self->field_names[i], name_length)) { 238 | case 0: 239 | if (self->field_names[i][name_length] == 0) return i; 240 | break; 241 | case -1: 242 | return 0; 243 | default: 244 | break; 245 | } 246 | } 247 | return 0; 248 | } 249 | 250 | TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { 251 | if (state >= self->state_count) return NULL; 252 | LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); 253 | *iterator = ts_language_lookaheads(self, state); 254 | return (TSLookaheadIterator *)iterator; 255 | } 256 | 257 | void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { 258 | ts_free(self); 259 | } 260 | 261 | bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { 262 | LookaheadIterator *iterator = (LookaheadIterator *)self; 263 | if (state >= iterator->language->state_count) return false; 264 | *iterator = ts_language_lookaheads(iterator->language, state); 265 | return true; 266 | } 267 | 268 | const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) { 269 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; 270 | return iterator->language; 271 | } 272 | 273 | bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { 274 | if (state >= language->state_count) return false; 275 | LookaheadIterator *iterator = (LookaheadIterator *)self; 276 | *iterator = ts_language_lookaheads(language, state); 277 | return true; 278 | } 279 | 280 | bool ts_lookahead_iterator_next(TSLookaheadIterator *self) { 281 | LookaheadIterator *iterator = (LookaheadIterator *)self; 282 | return ts_lookahead_iterator__next(iterator); 283 | } 284 | 285 | TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { 286 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; 287 | return iterator->symbol; 288 | } 289 | 290 | const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { 291 | const LookaheadIterator *iterator = (const LookaheadIterator *)self; 292 | return ts_language_symbol_name(iterator->language, iterator->symbol); 293 | } 294 | -------------------------------------------------------------------------------- /src/array.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_ARRAY_H_ 2 | #define TREE_SITTER_ARRAY_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "./alloc.h" 9 | #include "./ts_assert.h" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #ifdef _MSC_VER 17 | #pragma warning(push) 18 | #pragma warning(disable : 4101) 19 | #elif defined(__GNUC__) || defined(__clang__) 20 | #pragma GCC diagnostic push 21 | #pragma GCC diagnostic ignored "-Wunused-variable" 22 | #endif 23 | 24 | #define Array(T) \ 25 | struct { \ 26 | T *contents; \ 27 | uint32_t size; \ 28 | uint32_t capacity; \ 29 | } 30 | 31 | /// Initialize an array. 32 | #define array_init(self) \ 33 | ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) 34 | 35 | /// Create an empty array. 36 | #define array_new() \ 37 | { NULL, 0, 0 } 38 | 39 | /// Get a pointer to the element at a given `index` in the array. 40 | #define array_get(self, _index) \ 41 | (ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) 42 | 43 | /// Get a pointer to the first element in the array. 44 | #define array_front(self) array_get(self, 0) 45 | 46 | /// Get a pointer to the last element in the array. 47 | #define array_back(self) array_get(self, (self)->size - 1) 48 | 49 | /// Clear the array, setting its size to zero. Note that this does not free any 50 | /// memory allocated for the array's contents. 51 | #define array_clear(self) ((self)->size = 0) 52 | 53 | /// Reserve `new_capacity` elements of space in the array. If `new_capacity` is 54 | /// less than the array's current capacity, this function has no effect. 55 | #define array_reserve(self, new_capacity) \ 56 | _array__reserve((Array *)(self), array_elem_size(self), new_capacity) 57 | 58 | /// Free any memory allocated for this array. Note that this does not free any 59 | /// memory allocated for the array's contents. 60 | #define array_delete(self) _array__delete((Array *)(self)) 61 | 62 | /// Push a new `element` onto the end of the array. 63 | #define array_push(self, element) \ 64 | (_array__grow((Array *)(self), 1, array_elem_size(self)), \ 65 | (self)->contents[(self)->size++] = (element)) 66 | 67 | /// Increase the array's size by `count` elements. 68 | /// New elements are zero-initialized. 69 | #define array_grow_by(self, count) \ 70 | do { \ 71 | if ((count) == 0) break; \ 72 | _array__grow((Array *)(self), count, array_elem_size(self)); \ 73 | memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ 74 | (self)->size += (count); \ 75 | } while (0) 76 | 77 | /// Append all elements from one array to the end of another. 78 | #define array_push_all(self, other) \ 79 | array_extend((self), (other)->size, (other)->contents) 80 | 81 | /// Append `count` elements to the end of the array, reading their values from the 82 | /// `contents` pointer. 83 | #define array_extend(self, count, contents) \ 84 | _array__splice( \ 85 | (Array *)(self), array_elem_size(self), (self)->size, \ 86 | 0, count, contents \ 87 | ) 88 | 89 | /// Remove `old_count` elements from the array starting at the given `index`. At 90 | /// the same index, insert `new_count` new elements, reading their values from the 91 | /// `new_contents` pointer. 92 | #define array_splice(self, _index, old_count, new_count, new_contents) \ 93 | _array__splice( \ 94 | (Array *)(self), array_elem_size(self), _index, \ 95 | old_count, new_count, new_contents \ 96 | ) 97 | 98 | /// Insert one `element` into the array at the given `index`. 99 | #define array_insert(self, _index, element) \ 100 | _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) 101 | 102 | /// Remove one element from the array at the given `index`. 103 | #define array_erase(self, _index) \ 104 | _array__erase((Array *)(self), array_elem_size(self), _index) 105 | 106 | /// Pop the last element off the array, returning the element by value. 107 | #define array_pop(self) ((self)->contents[--(self)->size]) 108 | 109 | /// Assign the contents of one array to another, reallocating if necessary. 110 | #define array_assign(self, other) \ 111 | _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) 112 | 113 | /// Swap one array with another 114 | #define array_swap(self, other) \ 115 | _array__swap((Array *)(self), (Array *)(other)) 116 | 117 | /// Get the size of the array contents 118 | #define array_elem_size(self) (sizeof *(self)->contents) 119 | 120 | /// Search a sorted array for a given `needle` value, using the given `compare` 121 | /// callback to determine the order. 122 | /// 123 | /// If an existing element is found to be equal to `needle`, then the `index` 124 | /// out-parameter is set to the existing value's index, and the `exists` 125 | /// out-parameter is set to true. Otherwise, `index` is set to an index where 126 | /// `needle` should be inserted in order to preserve the sorting, and `exists` 127 | /// is set to false. 128 | #define array_search_sorted_with(self, compare, needle, _index, _exists) \ 129 | _array__search_sorted(self, 0, compare, , needle, _index, _exists) 130 | 131 | /// Search a sorted array for a given `needle` value, using integer comparisons 132 | /// of a given struct field (specified with a leading dot) to determine the order. 133 | /// 134 | /// See also `array_search_sorted_with`. 135 | #define array_search_sorted_by(self, field, needle, _index, _exists) \ 136 | _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) 137 | 138 | /// Insert a given `value` into a sorted array, using the given `compare` 139 | /// callback to determine the order. 140 | #define array_insert_sorted_with(self, compare, value) \ 141 | do { \ 142 | unsigned _index, _exists; \ 143 | array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ 144 | if (!_exists) array_insert(self, _index, value); \ 145 | } while (0) 146 | 147 | /// Insert a given `value` into a sorted array, using integer comparisons of 148 | /// a given struct field (specified with a leading dot) to determine the order. 149 | /// 150 | /// See also `array_search_sorted_by`. 151 | #define array_insert_sorted_by(self, field, value) \ 152 | do { \ 153 | unsigned _index, _exists; \ 154 | array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ 155 | if (!_exists) array_insert(self, _index, value); \ 156 | } while (0) 157 | 158 | // Private 159 | 160 | typedef Array(void) Array; 161 | 162 | /// This is not what you're looking for, see `array_delete`. 163 | static inline void _array__delete(Array *self) { 164 | if (self->contents) { 165 | ts_free(self->contents); 166 | self->contents = NULL; 167 | self->size = 0; 168 | self->capacity = 0; 169 | } 170 | } 171 | 172 | /// This is not what you're looking for, see `array_erase`. 173 | static inline void _array__erase(Array *self, size_t element_size, 174 | uint32_t index) { 175 | ts_assert(index < self->size); 176 | char *contents = (char *)self->contents; 177 | memmove(contents + index * element_size, contents + (index + 1) * element_size, 178 | (self->size - index - 1) * element_size); 179 | self->size--; 180 | } 181 | 182 | /// This is not what you're looking for, see `array_reserve`. 183 | static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { 184 | if (new_capacity > self->capacity) { 185 | if (self->contents) { 186 | self->contents = ts_realloc(self->contents, new_capacity * element_size); 187 | } else { 188 | self->contents = ts_malloc(new_capacity * element_size); 189 | } 190 | self->capacity = new_capacity; 191 | } 192 | } 193 | 194 | /// This is not what you're looking for, see `array_assign`. 195 | static inline void _array__assign(Array *self, const Array *other, size_t element_size) { 196 | _array__reserve(self, element_size, other->size); 197 | self->size = other->size; 198 | memcpy(self->contents, other->contents, self->size * element_size); 199 | } 200 | 201 | /// This is not what you're looking for, see `array_swap`. 202 | static inline void _array__swap(Array *self, Array *other) { 203 | Array swap = *other; 204 | *other = *self; 205 | *self = swap; 206 | } 207 | 208 | /// This is not what you're looking for, see `array_push` or `array_grow_by`. 209 | static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { 210 | uint32_t new_size = self->size + count; 211 | if (new_size > self->capacity) { 212 | uint32_t new_capacity = self->capacity * 2; 213 | if (new_capacity < 8) new_capacity = 8; 214 | if (new_capacity < new_size) new_capacity = new_size; 215 | _array__reserve(self, element_size, new_capacity); 216 | } 217 | } 218 | 219 | /// This is not what you're looking for, see `array_splice`. 220 | static inline void _array__splice(Array *self, size_t element_size, 221 | uint32_t index, uint32_t old_count, 222 | uint32_t new_count, const void *elements) { 223 | uint32_t new_size = self->size + new_count - old_count; 224 | uint32_t old_end = index + old_count; 225 | uint32_t new_end = index + new_count; 226 | ts_assert(old_end <= self->size); 227 | 228 | _array__reserve(self, element_size, new_size); 229 | 230 | char *contents = (char *)self->contents; 231 | if (self->size > old_end) { 232 | memmove( 233 | contents + new_end * element_size, 234 | contents + old_end * element_size, 235 | (self->size - old_end) * element_size 236 | ); 237 | } 238 | if (new_count > 0) { 239 | if (elements) { 240 | memcpy( 241 | (contents + index * element_size), 242 | elements, 243 | new_count * element_size 244 | ); 245 | } else { 246 | memset( 247 | (contents + index * element_size), 248 | 0, 249 | new_count * element_size 250 | ); 251 | } 252 | } 253 | self->size += new_count - old_count; 254 | } 255 | 256 | /// A binary search routine, based on Rust's `std::slice::binary_search_by`. 257 | /// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. 258 | #define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ 259 | do { \ 260 | *(_index) = start; \ 261 | *(_exists) = false; \ 262 | uint32_t size = (self)->size - *(_index); \ 263 | if (size == 0) break; \ 264 | int comparison; \ 265 | while (size > 1) { \ 266 | uint32_t half_size = size / 2; \ 267 | uint32_t mid_index = *(_index) + half_size; \ 268 | comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ 269 | if (comparison <= 0) *(_index) = mid_index; \ 270 | size -= half_size; \ 271 | } \ 272 | comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ 273 | if (comparison == 0) *(_exists) = true; \ 274 | else if (comparison < 0) *(_index) += 1; \ 275 | } while (0) 276 | 277 | /// Helper macro for the `_sorted_by` routines below. This takes the left (existing) 278 | /// parameter by reference in order to work with the generic sorting function above. 279 | #define _compare_int(a, b) ((int)*(a) - (int)(b)) 280 | 281 | #ifdef _MSC_VER 282 | #pragma warning(pop) 283 | #elif defined(__GNUC__) || defined(__clang__) 284 | #pragma GCC diagnostic pop 285 | #endif 286 | 287 | #ifdef __cplusplus 288 | } 289 | #endif 290 | 291 | #endif // TREE_SITTER_ARRAY_H_ 292 | -------------------------------------------------------------------------------- /node.go: -------------------------------------------------------------------------------- 1 | package tree_sitter 2 | 3 | /* 4 | #cgo CFLAGS: -Iinclude -Isrc -std=c11 -D_POSIX_C_SOURCE=200112L -D_DEFAULT_SOURCE 5 | #include 6 | */ 7 | import "C" 8 | import "unsafe" 9 | 10 | // A single node within a syntax [Tree]. 11 | // Note that this is a C-compatible struct 12 | type Node struct { 13 | _inner C.TSNode 14 | } 15 | 16 | func newNode(node C.TSNode) *Node { 17 | if node.id == nil { 18 | return nil 19 | } 20 | return &Node{_inner: node} 21 | } 22 | 23 | // Get a numeric id for this node that is unique. 24 | // 25 | // Within a given syntax tree, no two nodes have the same id. However, if 26 | // a new tree is created based on an older tree, and a node from the old 27 | // tree is reused in the process, then that node will have the same id in 28 | // both trees. 29 | func (n *Node) Id() uintptr { 30 | return uintptr(n._inner.id) 31 | } 32 | 33 | // Get this node's type as a numerical id. 34 | func (n *Node) KindId() uint16 { 35 | return uint16(C.ts_node_symbol(n._inner)) 36 | } 37 | 38 | // Get the node's type as a numerical id as it appears in the grammar 39 | // ignoring aliases. 40 | func (n *Node) GrammarId() uint16 { 41 | return uint16(C.ts_node_grammar_symbol(n._inner)) 42 | } 43 | 44 | // Get this node's type as a string. 45 | func (n *Node) Kind() string { 46 | return C.GoString(C.ts_node_type(n._inner)) 47 | } 48 | 49 | // Get this node's symbol name as it appears in the grammar ignoring 50 | // aliases as a string. 51 | func (n *Node) GrammarName() string { 52 | return C.GoString(C.ts_node_grammar_type(n._inner)) 53 | } 54 | 55 | // Get the [Language] that was used to parse this node's syntax tree. 56 | func (n *Node) Language() *Language { 57 | return &Language{Inner: C.ts_node_language(n._inner)} 58 | } 59 | 60 | // Check if this node is *named*. 61 | // 62 | // Named nodes correspond to named rules in the grammar, whereas 63 | // *anonymous* nodes correspond to string literals in the grammar. 64 | func (n *Node) IsNamed() bool { 65 | return bool(C.ts_node_is_named(n._inner)) 66 | } 67 | 68 | // Check if this node is *extra*. 69 | // 70 | // Extra nodes represent things like comments, which are not required in the 71 | // grammar, but can appear anywhere. 72 | func (n *Node) IsExtra() bool { 73 | return bool(C.ts_node_is_extra(n._inner)) 74 | } 75 | 76 | // Check if this node has been edited. 77 | func (n *Node) HasChanges() bool { 78 | return bool(C.ts_node_has_changes(n._inner)) 79 | } 80 | 81 | // Check if this node represents a syntax error or contains any syntax 82 | // errors anywhere within it. 83 | func (n *Node) HasError() bool { 84 | return bool(C.ts_node_has_error(n._inner)) 85 | } 86 | 87 | // Check if this node represents a syntax error. 88 | // 89 | // Syntax errors represent parts of the code that could not be incorporated 90 | // into a valid syntax tree. 91 | func (n *Node) IsError() bool { 92 | return bool(C.ts_node_is_error(n._inner)) 93 | } 94 | 95 | // Get this node's parse state. 96 | func (n *Node) ParseState() uint16 { 97 | return uint16(C.ts_node_parse_state(n._inner)) 98 | } 99 | 100 | // Get the parse state after this node. 101 | func (n *Node) NextParseState() uint16 { 102 | return uint16(C.ts_node_next_parse_state(n._inner)) 103 | } 104 | 105 | // Check if this node is *missing*. 106 | // 107 | // Missing nodes are inserted by the parser in order to recover from 108 | // certain kinds of syntax errors. 109 | func (n *Node) IsMissing() bool { 110 | return bool(C.ts_node_is_missing(n._inner)) 111 | } 112 | 113 | // Get the byte offsets where this node starts. 114 | func (n *Node) StartByte() uint { 115 | return uint(C.ts_node_start_byte(n._inner)) 116 | } 117 | 118 | // Get the byte offsets where this node end. 119 | func (n *Node) EndByte() uint { 120 | return uint(C.ts_node_end_byte(n._inner)) 121 | } 122 | 123 | // Get the byte range of source code that this node represents. 124 | func (n *Node) ByteRange() (uint, uint) { 125 | return n.StartByte(), n.EndByte() 126 | } 127 | 128 | // Get the range of source code that this node represents, both in terms of 129 | // raw bytes and of row/column coordinates. 130 | func (n *Node) Range() Range { 131 | return Range{ 132 | StartByte: n.StartByte(), 133 | EndByte: n.EndByte(), 134 | StartPoint: n.StartPosition(), 135 | EndPoint: n.EndPosition(), 136 | } 137 | } 138 | 139 | // Get this node's start position in terms of rows and columns. 140 | func (n *Node) StartPosition() Point { 141 | p := Point{} 142 | p.fromTSPoint(C.ts_node_start_point(n._inner)) 143 | return p 144 | } 145 | 146 | // Get this node's end position in terms of rows and columns. 147 | func (n *Node) EndPosition() Point { 148 | p := Point{} 149 | p.fromTSPoint(C.ts_node_end_point(n._inner)) 150 | return p 151 | } 152 | 153 | // Get the node's child at the given index, where zero represents the first 154 | // child. 155 | // 156 | // This method is fairly fast, but its cost is technically log(i), so if 157 | // you might be iterating over a long list of children, you should use 158 | // [Node.Children] instead. 159 | func (n *Node) Child(i uint) *Node { 160 | return newNode(C.ts_node_child(n._inner, C.uint(i))) 161 | } 162 | 163 | // Get this node's number of children. 164 | func (n *Node) ChildCount() uint { 165 | return uint(C.ts_node_child_count(n._inner)) 166 | } 167 | 168 | // Get this node's *named* child at the given index. 169 | // 170 | // See also [Node.IsNamed]. 171 | // This method is fairly fast, but its cost is technically log(i), so if 172 | // you might be iterating over a long list of children, you should use 173 | // [Node.NamedChildren] instead. 174 | func (n *Node) NamedChild(i uint) *Node { 175 | return newNode(C.ts_node_named_child(n._inner, C.uint(i))) 176 | } 177 | 178 | // Get this node's number of *named* children. 179 | // 180 | // See also [Node.IsNamed]. 181 | func (n *Node) NamedChildCount() uint { 182 | return uint(C.ts_node_named_child_count(n._inner)) 183 | } 184 | 185 | // Get the first child with the given field name. 186 | // 187 | // If multiple children may have the same field name, access them using 188 | // [Node.ChildrenByFieldName] 189 | func (n *Node) ChildByFieldName(fieldName string) *Node { 190 | cFieldName := C.CString(fieldName) 191 | defer go_free(unsafe.Pointer(cFieldName)) 192 | return newNode(C.ts_node_child_by_field_name(n._inner, cFieldName, C.uint32_t(len(fieldName)))) 193 | } 194 | 195 | // Get this node's child with the given numerical field id. 196 | // 197 | // See also [Node.ChildByFieldName]. You can 198 | // convert a field name to an id using [Language.FieldIdForName]. 199 | func (n *Node) ChildByFieldId(fieldId uint16) *Node { 200 | return newNode(C.ts_node_child_by_field_id(n._inner, C.uint16_t(fieldId))) 201 | } 202 | 203 | // Get the field name of this node's child at the given index. 204 | func (n *Node) FieldNameForChild(childIndex uint32) string { 205 | ptr := C.ts_node_field_name_for_child(n._inner, C.uint32_t(childIndex)) 206 | if ptr == nil { 207 | return "" 208 | } 209 | return C.GoString(ptr) 210 | } 211 | 212 | // Get the field name of this node's named child at the given index. 213 | func (n *Node) FieldNameForNamedChild(namedChildIndex uint32) string { 214 | ptr := C.ts_node_field_name_for_named_child(n._inner, C.uint32_t(namedChildIndex)) 215 | if ptr == nil { 216 | return "" 217 | } 218 | return C.GoString(ptr) 219 | } 220 | 221 | // Iterate over this node's children. 222 | // 223 | // A [TreeCursor] is used to retrieve the children efficiently. Obtain 224 | // a [TreeCursor] by calling [Tree.Walk] or [Node.Walk]. To avoid 225 | // unnecessary allocations, you should reuse the same cursor for 226 | // subsequent calls to this method. 227 | // 228 | // If you're walking the tree recursively, you may want to use the 229 | // [TreeCursor] APIs directly instead. 230 | func (n *Node) Children(cursor *TreeCursor) []Node { 231 | cursor.Reset(*n) 232 | cursor.GotoFirstChild() 233 | childCount := n.ChildCount() 234 | result := make([]Node, 0, childCount) 235 | for i := 0; i < int(childCount); i++ { 236 | result = append(result, *cursor.Node()) 237 | cursor.GotoNextSibling() 238 | } 239 | return result 240 | } 241 | 242 | // Iterate over this node's named children. 243 | // 244 | // See also [Node.Children]. 245 | func (n *Node) NamedChildren(cursor *TreeCursor) []Node { 246 | cursor.Reset(*n) 247 | cursor.GotoFirstChild() 248 | namedChildCount := n.NamedChildCount() 249 | result := make([]Node, 0, namedChildCount) 250 | for i := 0; i < int(namedChildCount); i++ { 251 | for !cursor.Node().IsNamed() { 252 | if !cursor.GotoNextSibling() { 253 | break 254 | } 255 | } 256 | result = append(result, *cursor.Node()) 257 | cursor.GotoNextSibling() 258 | } 259 | return result 260 | } 261 | 262 | // Iterate over this node's children with a given field name. 263 | // 264 | // See also [Node.Children]. 265 | func (n *Node) ChildrenByFieldName(fieldName string, cursor *TreeCursor) []Node { 266 | fieldId := n.Language().FieldIdForName(fieldName) 267 | done := fieldId == 0 268 | if !done { 269 | cursor.Reset(*n) 270 | cursor.GotoFirstChild() 271 | } 272 | result := make([]Node, 0) 273 | for !done { 274 | for cursor.FieldId() != fieldId { 275 | if !cursor.GotoNextSibling() { 276 | return result 277 | } 278 | } 279 | result = append(result, *cursor.Node()) 280 | if !cursor.GotoNextSibling() { 281 | done = true 282 | } 283 | } 284 | return result 285 | } 286 | 287 | // Get this node's immediate parent. 288 | // Prefer [Node.ChildWithDescendant] 289 | // for iterating over this node's ancestors. 290 | func (n *Node) Parent() *Node { 291 | return newNode(C.ts_node_parent(n._inner)) 292 | } 293 | 294 | // Get the node that contains `descendant`. 295 | // Note that this can return `descendant` itself. 296 | func (n *Node) ChildWithDescendant(descendant *Node) *Node { 297 | return newNode(C.ts_node_child_with_descendant(n._inner, descendant._inner)) 298 | } 299 | 300 | // Get this node's next sibling. 301 | func (n *Node) NextSibling() *Node { 302 | return newNode(C.ts_node_next_sibling(n._inner)) 303 | } 304 | 305 | // Get this node's previous sibling. 306 | func (n *Node) PrevSibling() *Node { 307 | return newNode(C.ts_node_prev_sibling(n._inner)) 308 | } 309 | 310 | // Get this node's next named sibling. 311 | func (n *Node) NextNamedSibling() *Node { 312 | return newNode(C.ts_node_next_named_sibling(n._inner)) 313 | } 314 | 315 | // Get this node's previous named sibling. 316 | func (n *Node) PrevNamedSibling() *Node { 317 | return newNode(C.ts_node_prev_named_sibling(n._inner)) 318 | } 319 | 320 | // Get the node's first child that contains or starts after the given byte offset. 321 | func (n *Node) FirstChildForByte(byteOffset uint) *Node { 322 | return newNode(C.ts_node_first_child_for_byte(n._inner, C.uint(byteOffset))) 323 | } 324 | 325 | // Get the node's first named child that contains or starts after the given byte offset. 326 | func (n *Node) FirstNamedChildForByte(byteOffset uint) *Node { 327 | return newNode(C.ts_node_first_named_child_for_byte(n._inner, C.uint(byteOffset))) 328 | } 329 | 330 | // Get the node's number of descendants, including one for the node itself. 331 | func (n *Node) DescendantCount() uint { 332 | return uint(C.ts_node_descendant_count(n._inner)) 333 | } 334 | 335 | // Get the smallest node within this node that spans the given range. 336 | func (n *Node) DescendantForByteRange(start, end uint) *Node { 337 | return newNode(C.ts_node_descendant_for_byte_range(n._inner, C.uint(start), C.uint(end))) 338 | } 339 | 340 | // Get the smallest named node within this node that spans the given range. 341 | func (n *Node) NamedDescendantForByteRange(start, end uint) *Node { 342 | return newNode(C.ts_node_named_descendant_for_byte_range(n._inner, C.uint(start), C.uint(end))) 343 | } 344 | 345 | // Get the smallest node within this node that spans the given range. 346 | func (n *Node) DescendantForPointRange(start, end Point) *Node { 347 | return newNode(C.ts_node_descendant_for_point_range(n._inner, start.toTSPoint(), end.toTSPoint())) 348 | } 349 | 350 | // Get the smallest named node within this node that spans the given range. 351 | func (n *Node) NamedDescendantForPointRange(start, end Point) *Node { 352 | return newNode(C.ts_node_named_descendant_for_point_range(n._inner, start.toTSPoint(), end.toTSPoint())) 353 | } 354 | 355 | func (n *Node) ToSexp() string { 356 | cString := C.ts_node_string(n._inner) 357 | result := C.GoString(cString) 358 | go_free(unsafe.Pointer(cString)) 359 | return result 360 | } 361 | 362 | func (n *Node) Utf8Text(source []byte) string { 363 | return string(source[n.StartByte():n.EndByte()]) 364 | } 365 | 366 | func (n *Node) Utf16Text(source []uint16) []uint16 { 367 | return source[n.StartByte():n.EndByte()] 368 | } 369 | 370 | // Create a new [TreeCursor] starting from this node. 371 | // 372 | // Note that the given node is considered the root of the cursor, 373 | // and the cursor cannot walk outside this node. 374 | func (n *Node) Walk() *TreeCursor { 375 | return newTreeCursor(*n) 376 | } 377 | 378 | // Edit this node to keep it in-sync with source code that has been edited. 379 | // 380 | // This function is only rarely needed. When you edit a syntax tree with 381 | // the [Tree.Edit] method, all of the nodes that you retrieve from 382 | // the tree afterward will already reflect the edit. You only need to 383 | // use [Node.Edit] when you have a specific [Node] instance that 384 | // you want to keep and continue to use after an edit. 385 | func (n *Node) Edit(edit *InputEdit) { 386 | C.ts_node_edit(&n._inner, edit.toTSInputEdit()) 387 | } 388 | 389 | // Check if two nodes are identical. 390 | func (n *Node) Equals(other Node) bool { 391 | return bool(C.ts_node_eq(n._inner, other._inner)) 392 | } 393 | -------------------------------------------------------------------------------- /src/subtree.h: -------------------------------------------------------------------------------- 1 | #ifndef TREE_SITTER_SUBTREE_H_ 2 | #define TREE_SITTER_SUBTREE_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | #include "./length.h" 12 | #include "./array.h" 13 | #include "./error_costs.h" 14 | #include "./host.h" 15 | #include "tree_sitter/api.h" 16 | #include "./parser.h" 17 | 18 | #define TS_TREE_STATE_NONE USHRT_MAX 19 | #define NULL_SUBTREE ((Subtree) {.ptr = NULL}) 20 | 21 | // The serialized state of an external scanner. 22 | // 23 | // Every time an external token subtree is created after a call to an 24 | // external scanner, the scanner's `serialize` function is called to 25 | // retrieve a serialized copy of its state. The bytes are then copied 26 | // onto the subtree itself so that the scanner's state can later be 27 | // restored using its `deserialize` function. 28 | // 29 | // Small byte arrays are stored inline, and long ones are allocated 30 | // separately on the heap. 31 | typedef struct { 32 | union { 33 | char *long_data; 34 | char short_data[24]; 35 | }; 36 | uint32_t length; 37 | } ExternalScannerState; 38 | 39 | // A compact representation of a subtree. 40 | // 41 | // This representation is used for small leaf nodes that are not 42 | // errors, and were not created by an external scanner. 43 | // 44 | // The idea behind the layout of this struct is that the `is_inline` 45 | // bit will fall exactly into the same location as the least significant 46 | // bit of the pointer in `Subtree` or `MutableSubtree`, respectively. 47 | // Because of alignment, for any valid pointer this will be 0, giving 48 | // us the opportunity to make use of this bit to signify whether to use 49 | // the pointer or the inline struct. 50 | typedef struct SubtreeInlineData SubtreeInlineData; 51 | 52 | #define SUBTREE_BITS \ 53 | bool visible : 1; \ 54 | bool named : 1; \ 55 | bool extra : 1; \ 56 | bool has_changes : 1; \ 57 | bool is_missing : 1; \ 58 | bool is_keyword : 1; 59 | 60 | #define SUBTREE_SIZE \ 61 | uint8_t padding_columns; \ 62 | uint8_t padding_rows : 4; \ 63 | uint8_t lookahead_bytes : 4; \ 64 | uint8_t padding_bytes; \ 65 | uint8_t size_bytes; 66 | 67 | #if TS_BIG_ENDIAN 68 | #if TS_PTR_SIZE == 32 69 | 70 | struct SubtreeInlineData { 71 | uint16_t parse_state; 72 | uint8_t symbol; 73 | SUBTREE_BITS 74 | bool unused : 1; 75 | bool is_inline : 1; 76 | SUBTREE_SIZE 77 | }; 78 | 79 | #else 80 | 81 | struct SubtreeInlineData { 82 | SUBTREE_SIZE 83 | uint16_t parse_state; 84 | uint8_t symbol; 85 | SUBTREE_BITS 86 | bool unused : 1; 87 | bool is_inline : 1; 88 | }; 89 | 90 | #endif 91 | #else 92 | 93 | struct SubtreeInlineData { 94 | bool is_inline : 1; 95 | SUBTREE_BITS 96 | uint8_t symbol; 97 | uint16_t parse_state; 98 | SUBTREE_SIZE 99 | }; 100 | 101 | #endif 102 | 103 | #undef SUBTREE_BITS 104 | #undef SUBTREE_SIZE 105 | 106 | // A heap-allocated representation of a subtree. 107 | // 108 | // This representation is used for parent nodes, external tokens, 109 | // errors, and other leaf nodes whose data is too large to fit into 110 | // the inline representation. 111 | typedef struct { 112 | volatile uint32_t ref_count; 113 | Length padding; 114 | Length size; 115 | uint32_t lookahead_bytes; 116 | uint32_t error_cost; 117 | uint32_t child_count; 118 | TSSymbol symbol; 119 | TSStateId parse_state; 120 | 121 | bool visible : 1; 122 | bool named : 1; 123 | bool extra : 1; 124 | bool fragile_left : 1; 125 | bool fragile_right : 1; 126 | bool has_changes : 1; 127 | bool has_external_tokens : 1; 128 | bool has_external_scanner_state_change : 1; 129 | bool depends_on_column: 1; 130 | bool is_missing : 1; 131 | bool is_keyword : 1; 132 | 133 | union { 134 | // Non-terminal subtrees (`child_count > 0`) 135 | struct { 136 | uint32_t visible_child_count; 137 | uint32_t named_child_count; 138 | uint32_t visible_descendant_count; 139 | int32_t dynamic_precedence; 140 | uint16_t repeat_depth; 141 | uint16_t production_id; 142 | struct { 143 | TSSymbol symbol; 144 | TSStateId parse_state; 145 | } first_leaf; 146 | }; 147 | 148 | // External terminal subtrees (`child_count == 0 && has_external_tokens`) 149 | ExternalScannerState external_scanner_state; 150 | 151 | // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`) 152 | int32_t lookahead_char; 153 | }; 154 | } SubtreeHeapData; 155 | 156 | // The fundamental building block of a syntax tree. 157 | typedef union { 158 | SubtreeInlineData data; 159 | const SubtreeHeapData *ptr; 160 | } Subtree; 161 | 162 | // Like Subtree, but mutable. 163 | typedef union { 164 | SubtreeInlineData data; 165 | SubtreeHeapData *ptr; 166 | } MutableSubtree; 167 | 168 | typedef Array(Subtree) SubtreeArray; 169 | typedef Array(MutableSubtree) MutableSubtreeArray; 170 | 171 | typedef struct { 172 | MutableSubtreeArray free_trees; 173 | MutableSubtreeArray tree_stack; 174 | } SubtreePool; 175 | 176 | void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length); 177 | const char *ts_external_scanner_state_data(const ExternalScannerState *self); 178 | bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length); 179 | void ts_external_scanner_state_delete(ExternalScannerState *self); 180 | 181 | void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest); 182 | void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self); 183 | void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self); 184 | void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination); 185 | void ts_subtree_array_reverse(SubtreeArray *self); 186 | 187 | SubtreePool ts_subtree_pool_new(uint32_t capacity); 188 | void ts_subtree_pool_delete(SubtreePool *self); 189 | 190 | Subtree ts_subtree_new_leaf( 191 | SubtreePool *pool, TSSymbol symbol, Length padding, Length size, 192 | uint32_t lookahead_bytes, TSStateId parse_state, 193 | bool has_external_tokens, bool depends_on_column, 194 | bool is_keyword, const TSLanguage *language 195 | ); 196 | Subtree ts_subtree_new_error( 197 | SubtreePool *pool, int32_t lookahead_char, Length padding, Length size, 198 | uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language 199 | ); 200 | MutableSubtree ts_subtree_new_node( 201 | TSSymbol symbol, 202 | SubtreeArray *chiildren, 203 | unsigned production_id, 204 | const TSLanguage *language 205 | ); 206 | Subtree ts_subtree_new_error_node( 207 | SubtreeArray *children, 208 | bool extra, 209 | const TSLanguage * language 210 | ); 211 | Subtree ts_subtree_new_missing_leaf( 212 | SubtreePool *pool, 213 | TSSymbol symbol, 214 | Length padding, 215 | uint32_t lookahead_bytes, 216 | const TSLanguage *language 217 | ); 218 | MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self); 219 | void ts_subtree_retain(Subtree self); 220 | void ts_subtree_release(SubtreePool *pool, Subtree self); 221 | int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool); 222 | void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language); 223 | void ts_subtree_compress(MutableSubtree self, unsigned count, const TSLanguage *language, MutableSubtreeArray *stack); 224 | void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language); 225 | Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool); 226 | char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named, const TSLanguage *language, bool include_all); 227 | void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f); 228 | Subtree ts_subtree_last_external_token(Subtree tree); 229 | const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self); 230 | bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other); 231 | 232 | #define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name) 233 | 234 | static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); } 235 | static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); } 236 | static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); } 237 | static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); } 238 | static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); } 239 | static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); } 240 | static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); } 241 | static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); } 242 | static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); } 243 | 244 | #undef SUBTREE_GET 245 | 246 | // Get the size needed to store a heap-allocated subtree with the given 247 | // number of children. 248 | static inline size_t ts_subtree_alloc_size(uint32_t child_count) { 249 | return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData); 250 | } 251 | 252 | // Get a subtree's children, which are allocated immediately before the 253 | // tree's own heap data. 254 | #define ts_subtree_children(self) \ 255 | ((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count) 256 | 257 | static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) { 258 | if (self->data.is_inline) { 259 | self->data.extra = is_extra; 260 | } else { 261 | self->ptr->extra = is_extra; 262 | } 263 | } 264 | 265 | static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) { 266 | if (self.data.is_inline) return self.data.symbol; 267 | if (self.ptr->child_count == 0) return self.ptr->symbol; 268 | return self.ptr->first_leaf.symbol; 269 | } 270 | 271 | static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) { 272 | if (self.data.is_inline) return self.data.parse_state; 273 | if (self.ptr->child_count == 0) return self.ptr->parse_state; 274 | return self.ptr->first_leaf.parse_state; 275 | } 276 | 277 | static inline Length ts_subtree_padding(Subtree self) { 278 | if (self.data.is_inline) { 279 | Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}}; 280 | return result; 281 | } else { 282 | return self.ptr->padding; 283 | } 284 | } 285 | 286 | static inline Length ts_subtree_size(Subtree self) { 287 | if (self.data.is_inline) { 288 | Length result = {self.data.size_bytes, {0, self.data.size_bytes}}; 289 | return result; 290 | } else { 291 | return self.ptr->size; 292 | } 293 | } 294 | 295 | static inline Length ts_subtree_total_size(Subtree self) { 296 | return length_add(ts_subtree_padding(self), ts_subtree_size(self)); 297 | } 298 | 299 | static inline uint32_t ts_subtree_total_bytes(Subtree self) { 300 | return ts_subtree_total_size(self).bytes; 301 | } 302 | 303 | static inline uint32_t ts_subtree_child_count(Subtree self) { 304 | return self.data.is_inline ? 0 : self.ptr->child_count; 305 | } 306 | 307 | static inline uint32_t ts_subtree_repeat_depth(Subtree self) { 308 | return self.data.is_inline ? 0 : self.ptr->repeat_depth; 309 | } 310 | 311 | static inline uint32_t ts_subtree_is_repetition(Subtree self) { 312 | return self.data.is_inline 313 | ? 0 314 | : !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0; 315 | } 316 | 317 | static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) { 318 | return (self.data.is_inline || self.ptr->child_count == 0) 319 | ? 0 320 | : self.ptr->visible_descendant_count; 321 | } 322 | 323 | static inline uint32_t ts_subtree_visible_child_count(Subtree self) { 324 | if (ts_subtree_child_count(self) > 0) { 325 | return self.ptr->visible_child_count; 326 | } else { 327 | return 0; 328 | } 329 | } 330 | 331 | static inline uint32_t ts_subtree_error_cost(Subtree self) { 332 | if (ts_subtree_missing(self)) { 333 | return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY; 334 | } else { 335 | return self.data.is_inline ? 0 : self.ptr->error_cost; 336 | } 337 | } 338 | 339 | static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { 340 | return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; 341 | } 342 | 343 | static inline uint16_t ts_subtree_production_id(Subtree self) { 344 | if (ts_subtree_child_count(self) > 0) { 345 | return self.ptr->production_id; 346 | } else { 347 | return 0; 348 | } 349 | } 350 | 351 | static inline bool ts_subtree_fragile_left(Subtree self) { 352 | return self.data.is_inline ? false : self.ptr->fragile_left; 353 | } 354 | 355 | static inline bool ts_subtree_fragile_right(Subtree self) { 356 | return self.data.is_inline ? false : self.ptr->fragile_right; 357 | } 358 | 359 | static inline bool ts_subtree_has_external_tokens(Subtree self) { 360 | return self.data.is_inline ? false : self.ptr->has_external_tokens; 361 | } 362 | 363 | static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) { 364 | return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change; 365 | } 366 | 367 | static inline bool ts_subtree_depends_on_column(Subtree self) { 368 | return self.data.is_inline ? false : self.ptr->depends_on_column; 369 | } 370 | 371 | static inline bool ts_subtree_is_fragile(Subtree self) { 372 | return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); 373 | } 374 | 375 | static inline bool ts_subtree_is_error(Subtree self) { 376 | return ts_subtree_symbol(self) == ts_builtin_sym_error; 377 | } 378 | 379 | static inline bool ts_subtree_is_eof(Subtree self) { 380 | return ts_subtree_symbol(self) == ts_builtin_sym_end; 381 | } 382 | 383 | static inline Subtree ts_subtree_from_mut(MutableSubtree self) { 384 | Subtree result; 385 | result.data = self.data; 386 | return result; 387 | } 388 | 389 | static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) { 390 | MutableSubtree result; 391 | result.data = self.data; 392 | return result; 393 | } 394 | 395 | #ifdef __cplusplus 396 | } 397 | #endif 398 | 399 | #endif // TREE_SITTER_SUBTREE_H_ 400 | -------------------------------------------------------------------------------- /src/unicode/umachine.h: -------------------------------------------------------------------------------- 1 | // © 2016 and later: Unicode, Inc. and others. 2 | // License & terms of use: http://www.unicode.org/copyright.html 3 | /* 4 | ****************************************************************************** 5 | * 6 | * Copyright (C) 1999-2015, International Business Machines 7 | * Corporation and others. All Rights Reserved. 8 | * 9 | ****************************************************************************** 10 | * file name: umachine.h 11 | * encoding: UTF-8 12 | * tab size: 8 (not used) 13 | * indentation:4 14 | * 15 | * created on: 1999sep13 16 | * created by: Markus W. Scherer 17 | * 18 | * This file defines basic types and constants for ICU to be 19 | * platform-independent. umachine.h and utf.h are included into 20 | * utypes.h to provide all the general definitions for ICU. 21 | * All of these definitions used to be in utypes.h before 22 | * the UTF-handling macros made this unmaintainable. 23 | */ 24 | 25 | #ifndef __UMACHINE_H__ 26 | #define __UMACHINE_H__ 27 | 28 | 29 | /** 30 | * \file 31 | * \brief Basic types and constants for UTF 32 | * 33 | *

Basic types and constants for UTF

34 | * This file defines basic types and constants for utf.h to be 35 | * platform-independent. umachine.h and utf.h are included into 36 | * utypes.h to provide all the general definitions for ICU. 37 | * All of these definitions used to be in utypes.h before 38 | * the UTF-handling macros made this unmaintainable. 39 | * 40 | */ 41 | /*==========================================================================*/ 42 | /* Include platform-dependent definitions */ 43 | /* which are contained in the platform-specific file platform.h */ 44 | /*==========================================================================*/ 45 | 46 | #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ 47 | 48 | /* 49 | * ANSI C headers: 50 | * stddef.h defines wchar_t 51 | */ 52 | #include 53 | 54 | /*==========================================================================*/ 55 | /* For C wrappers, we use the symbol U_STABLE. */ 56 | /* This works properly if the includer is C or C++. */ 57 | /* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ 58 | /*==========================================================================*/ 59 | 60 | /** 61 | * \def U_CFUNC 62 | * This is used in a declaration of a library private ICU C function. 63 | * @stable ICU 2.4 64 | */ 65 | 66 | /** 67 | * \def U_CDECL_BEGIN 68 | * This is used to begin a declaration of a library private ICU C API. 69 | * @stable ICU 2.4 70 | */ 71 | 72 | /** 73 | * \def U_CDECL_END 74 | * This is used to end a declaration of a library private ICU C API 75 | * @stable ICU 2.4 76 | */ 77 | 78 | #ifdef __cplusplus 79 | # define U_CFUNC extern "C" 80 | # define U_CDECL_BEGIN extern "C" { 81 | # define U_CDECL_END } 82 | #else 83 | # define U_CFUNC extern 84 | # define U_CDECL_BEGIN 85 | # define U_CDECL_END 86 | #endif 87 | 88 | #ifndef U_ATTRIBUTE_DEPRECATED 89 | /** 90 | * \def U_ATTRIBUTE_DEPRECATED 91 | * This is used for GCC specific attributes 92 | * @internal 93 | */ 94 | #if U_GCC_MAJOR_MINOR >= 302 95 | # define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) 96 | /** 97 | * \def U_ATTRIBUTE_DEPRECATED 98 | * This is used for Visual C++ specific attributes 99 | * @internal 100 | */ 101 | #elif defined(_MSC_VER) && (_MSC_VER >= 1400) 102 | # define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) 103 | #else 104 | # define U_ATTRIBUTE_DEPRECATED 105 | #endif 106 | #endif 107 | 108 | /** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ 109 | #define U_CAPI U_CFUNC U_EXPORT 110 | /** This is used to declare a function as a stable public ICU C API*/ 111 | #define U_STABLE U_CAPI 112 | /** This is used to declare a function as a draft public ICU C API */ 113 | #define U_DRAFT U_CAPI 114 | /** This is used to declare a function as a deprecated public ICU C API */ 115 | #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED 116 | /** This is used to declare a function as an obsolete public ICU C API */ 117 | #define U_OBSOLETE U_CAPI 118 | /** This is used to declare a function as an internal ICU C API */ 119 | #define U_INTERNAL U_CAPI 120 | 121 | /** 122 | * \def U_OVERRIDE 123 | * Defined to the C++11 "override" keyword if available. 124 | * Denotes a class or member which is an override of the base class. 125 | * May result in an error if it applied to something not an override. 126 | * @internal 127 | */ 128 | #ifndef U_OVERRIDE 129 | #define U_OVERRIDE override 130 | #endif 131 | 132 | /** 133 | * \def U_FINAL 134 | * Defined to the C++11 "final" keyword if available. 135 | * Denotes a class or member which may not be overridden in subclasses. 136 | * May result in an error if subclasses attempt to override. 137 | * @internal 138 | */ 139 | #if !defined(U_FINAL) || defined(U_IN_DOXYGEN) 140 | #define U_FINAL final 141 | #endif 142 | 143 | // Before ICU 65, function-like, multi-statement ICU macros were just defined as 144 | // series of statements wrapped in { } blocks and the caller could choose to 145 | // either treat them as if they were actual functions and end the invocation 146 | // with a trailing ; creating an empty statement after the block or else omit 147 | // this trailing ; using the knowledge that the macro would expand to { }. 148 | // 149 | // But doing so doesn't work well with macros that look like functions and 150 | // compiler warnings about empty statements (ICU-20601) and ICU 65 therefore 151 | // switches to the standard solution of wrapping such macros in do { } while. 152 | // 153 | // This will however break existing code that depends on being able to invoke 154 | // these macros without a trailing ; so to be able to remain compatible with 155 | // such code the wrapper is itself defined as macros so that it's possible to 156 | // build ICU 65 and later with the old macro behaviour, like this: 157 | // 158 | // CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""' 159 | // runConfigureICU ... 160 | 161 | /** 162 | * \def UPRV_BLOCK_MACRO_BEGIN 163 | * Defined as the "do" keyword by default. 164 | * @internal 165 | */ 166 | #ifndef UPRV_BLOCK_MACRO_BEGIN 167 | #define UPRV_BLOCK_MACRO_BEGIN do 168 | #endif 169 | 170 | /** 171 | * \def UPRV_BLOCK_MACRO_END 172 | * Defined as "while (FALSE)" by default. 173 | * @internal 174 | */ 175 | #ifndef UPRV_BLOCK_MACRO_END 176 | #define UPRV_BLOCK_MACRO_END while (FALSE) 177 | #endif 178 | 179 | /*==========================================================================*/ 180 | /* limits for int32_t etc., like in POSIX inttypes.h */ 181 | /*==========================================================================*/ 182 | 183 | #ifndef INT8_MIN 184 | /** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ 185 | # define INT8_MIN ((int8_t)(-128)) 186 | #endif 187 | #ifndef INT16_MIN 188 | /** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ 189 | # define INT16_MIN ((int16_t)(-32767-1)) 190 | #endif 191 | #ifndef INT32_MIN 192 | /** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ 193 | # define INT32_MIN ((int32_t)(-2147483647-1)) 194 | #endif 195 | 196 | #ifndef INT8_MAX 197 | /** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ 198 | # define INT8_MAX ((int8_t)(127)) 199 | #endif 200 | #ifndef INT16_MAX 201 | /** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ 202 | # define INT16_MAX ((int16_t)(32767)) 203 | #endif 204 | #ifndef INT32_MAX 205 | /** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ 206 | # define INT32_MAX ((int32_t)(2147483647)) 207 | #endif 208 | 209 | #ifndef UINT8_MAX 210 | /** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ 211 | # define UINT8_MAX ((uint8_t)(255U)) 212 | #endif 213 | #ifndef UINT16_MAX 214 | /** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ 215 | # define UINT16_MAX ((uint16_t)(65535U)) 216 | #endif 217 | #ifndef UINT32_MAX 218 | /** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ 219 | # define UINT32_MAX ((uint32_t)(4294967295U)) 220 | #endif 221 | 222 | #if defined(U_INT64_T_UNAVAILABLE) 223 | # error int64_t is required for decimal format and rule-based number format. 224 | #else 225 | # ifndef INT64_C 226 | /** 227 | * Provides a platform independent way to specify a signed 64-bit integer constant. 228 | * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C 229 | * @stable ICU 2.8 230 | */ 231 | # define INT64_C(c) c ## LL 232 | # endif 233 | # ifndef UINT64_C 234 | /** 235 | * Provides a platform independent way to specify an unsigned 64-bit integer constant. 236 | * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C 237 | * @stable ICU 2.8 238 | */ 239 | # define UINT64_C(c) c ## ULL 240 | # endif 241 | # ifndef U_INT64_MIN 242 | /** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ 243 | # define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) 244 | # endif 245 | # ifndef U_INT64_MAX 246 | /** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ 247 | # define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) 248 | # endif 249 | # ifndef U_UINT64_MAX 250 | /** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ 251 | # define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) 252 | # endif 253 | #endif 254 | 255 | /*==========================================================================*/ 256 | /* Boolean data type */ 257 | /*==========================================================================*/ 258 | 259 | /** The ICU boolean type @stable ICU 2.0 */ 260 | typedef int8_t UBool; 261 | 262 | #ifndef TRUE 263 | /** The TRUE value of a UBool @stable ICU 2.0 */ 264 | # define TRUE 1 265 | #endif 266 | #ifndef FALSE 267 | /** The FALSE value of a UBool @stable ICU 2.0 */ 268 | # define FALSE 0 269 | #endif 270 | 271 | 272 | /*==========================================================================*/ 273 | /* Unicode data types */ 274 | /*==========================================================================*/ 275 | 276 | /* wchar_t-related definitions -------------------------------------------- */ 277 | 278 | /* 279 | * \def U_WCHAR_IS_UTF16 280 | * Defined if wchar_t uses UTF-16. 281 | * 282 | * @stable ICU 2.0 283 | */ 284 | /* 285 | * \def U_WCHAR_IS_UTF32 286 | * Defined if wchar_t uses UTF-32. 287 | * 288 | * @stable ICU 2.0 289 | */ 290 | #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 291 | # ifdef __STDC_ISO_10646__ 292 | # if (U_SIZEOF_WCHAR_T==2) 293 | # define U_WCHAR_IS_UTF16 294 | # elif (U_SIZEOF_WCHAR_T==4) 295 | # define U_WCHAR_IS_UTF32 296 | # endif 297 | # elif defined __UCS2__ 298 | # if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) 299 | # define U_WCHAR_IS_UTF16 300 | # endif 301 | # elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) 302 | # if (U_SIZEOF_WCHAR_T==4) 303 | # define U_WCHAR_IS_UTF32 304 | # endif 305 | # elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) 306 | # define U_WCHAR_IS_UTF32 307 | # elif U_PLATFORM_HAS_WIN32_API 308 | # define U_WCHAR_IS_UTF16 309 | # endif 310 | #endif 311 | 312 | /* UChar and UChar32 definitions -------------------------------------------- */ 313 | 314 | /** Number of bytes in a UChar. @stable ICU 2.0 */ 315 | #define U_SIZEOF_UCHAR 2 316 | 317 | /** 318 | * \def U_CHAR16_IS_TYPEDEF 319 | * If 1, then char16_t is a typedef and not a real type (yet) 320 | * @internal 321 | */ 322 | #if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) 323 | // for AIX, uchar.h needs to be included 324 | # include 325 | # define U_CHAR16_IS_TYPEDEF 1 326 | #elif defined(_MSC_VER) && (_MSC_VER < 1900) 327 | // Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, 328 | // and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx 329 | # define U_CHAR16_IS_TYPEDEF 1 330 | #else 331 | # define U_CHAR16_IS_TYPEDEF 0 332 | #endif 333 | 334 | 335 | /** 336 | * \var UChar 337 | * 338 | * The base type for UTF-16 code units and pointers. 339 | * Unsigned 16-bit integer. 340 | * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. 341 | * 342 | * UChar is configurable by defining the macro UCHAR_TYPE 343 | * on the preprocessor or compiler command line: 344 | * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. 345 | * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.) 346 | * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. 347 | * 348 | * The default is UChar=char16_t. 349 | * 350 | * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. 351 | * 352 | * In C, char16_t is a simple typedef of uint_least16_t. 353 | * ICU requires uint_least16_t=uint16_t for data memory mapping. 354 | * On macOS, char16_t is not available because the uchar.h standard header is missing. 355 | * 356 | * @stable ICU 4.4 357 | */ 358 | 359 | #if 1 360 | // #if 1 is normal. UChar defaults to char16_t in C++. 361 | // For configuration testing of UChar=uint16_t temporarily change this to #if 0. 362 | // The intltest Makefile #defines UCHAR_TYPE=char16_t, 363 | // so we only #define it to uint16_t if it is undefined so far. 364 | #elif !defined(UCHAR_TYPE) 365 | # define UCHAR_TYPE uint16_t 366 | #endif 367 | 368 | #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ 369 | defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 370 | // Inside the ICU library code, never configurable. 371 | typedef char16_t UChar; 372 | #elif defined(UCHAR_TYPE) 373 | typedef UCHAR_TYPE UChar; 374 | #elif defined(__cplusplus) 375 | typedef char16_t UChar; 376 | #else 377 | typedef uint16_t UChar; 378 | #endif 379 | 380 | /** 381 | * \var OldUChar 382 | * Default ICU 58 definition of UChar. 383 | * A base type for UTF-16 code units and pointers. 384 | * Unsigned 16-bit integer. 385 | * 386 | * Define OldUChar to be wchar_t if that is 16 bits wide. 387 | * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. 388 | * 389 | * This makes the definition of OldUChar platform-dependent 390 | * but allows direct string type compatibility with platforms with 391 | * 16-bit wchar_t types. 392 | * 393 | * This is how UChar was defined in ICU 58, for transition convenience. 394 | * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. 395 | * The current UChar responds to UCHAR_TYPE but OldUChar does not. 396 | * 397 | * @stable ICU 59 398 | */ 399 | #if U_SIZEOF_WCHAR_T==2 400 | typedef wchar_t OldUChar; 401 | #elif defined(__CHAR16_TYPE__) 402 | typedef __CHAR16_TYPE__ OldUChar; 403 | #else 404 | typedef uint16_t OldUChar; 405 | #endif 406 | 407 | /** 408 | * Define UChar32 as a type for single Unicode code points. 409 | * UChar32 is a signed 32-bit integer (same as int32_t). 410 | * 411 | * The Unicode code point range is 0..0x10ffff. 412 | * All other values (negative or >=0x110000) are illegal as Unicode code points. 413 | * They may be used as sentinel values to indicate "done", "error" 414 | * or similar non-code point conditions. 415 | * 416 | * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined 417 | * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) 418 | * or else to be uint32_t. 419 | * That is, the definition of UChar32 was platform-dependent. 420 | * 421 | * @see U_SENTINEL 422 | * @stable ICU 2.4 423 | */ 424 | typedef int32_t UChar32; 425 | 426 | /** 427 | * This value is intended for sentinel values for APIs that 428 | * (take or) return single code points (UChar32). 429 | * It is outside of the Unicode code point range 0..0x10ffff. 430 | * 431 | * For example, a "done" or "error" value in a new API 432 | * could be indicated with U_SENTINEL. 433 | * 434 | * ICU APIs designed before ICU 2.4 usually define service-specific "done" 435 | * values, mostly 0xffff. 436 | * Those may need to be distinguished from 437 | * actual U+ffff text contents by calling functions like 438 | * CharacterIterator::hasNext() or UnicodeString::length(). 439 | * 440 | * @return -1 441 | * @see UChar32 442 | * @stable ICU 2.4 443 | */ 444 | #define U_SENTINEL (-1) 445 | 446 | #include "unicode/urename.h" 447 | 448 | #endif 449 | -------------------------------------------------------------------------------- /src/lexer.c: -------------------------------------------------------------------------------- 1 | #include "./length.h" 2 | #include "./lexer.h" 3 | #include "./unicode.h" 4 | 5 | #include "tree_sitter/api.h" 6 | 7 | #include 8 | #include 9 | 10 | #define LOG(message, character) \ 11 | if (self->logger.log) { \ 12 | snprintf( \ 13 | self->debug_buffer, \ 14 | TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \ 15 | 32 <= character && character < 127 ? \ 16 | message " character:'%c'" : \ 17 | message " character:%d", \ 18 | character \ 19 | ); \ 20 | self->logger.log( \ 21 | self->logger.payload, \ 22 | TSLogTypeLex, \ 23 | self->debug_buffer \ 24 | ); \ 25 | } 26 | 27 | static const int32_t BYTE_ORDER_MARK = 0xFEFF; 28 | 29 | static const TSRange DEFAULT_RANGE = { 30 | .start_point = { 31 | .row = 0, 32 | .column = 0, 33 | }, 34 | .end_point = { 35 | .row = UINT32_MAX, 36 | .column = UINT32_MAX, 37 | }, 38 | .start_byte = 0, 39 | .end_byte = UINT32_MAX 40 | }; 41 | 42 | /** 43 | * Sets the column data to the given value and marks it valid. 44 | * @param self The lexer state. 45 | * @param val The new value of the column data. 46 | */ 47 | static void ts_lexer__set_column_data(Lexer *self, uint32_t val) { 48 | self->column_data.valid = true; 49 | self->column_data.value = val; 50 | } 51 | 52 | /** 53 | * Increments the value of the column data; no-op if invalid. 54 | * @param self The lexer state. 55 | */ 56 | static void ts_lexer__increment_column_data(Lexer *self) { 57 | if (self->column_data.valid) { 58 | self->column_data.value++; 59 | } 60 | } 61 | 62 | /** 63 | * Marks the column data as invalid. 64 | * @param self The lexer state. 65 | */ 66 | static void ts_lexer__invalidate_column_data(Lexer *self) { 67 | self->column_data.valid = false; 68 | self->column_data.value = 0; 69 | } 70 | 71 | // Check if the lexer has reached EOF. This state is stored 72 | // by setting the lexer's `current_included_range_index` such that 73 | // it has consumed all of its available ranges. 74 | static bool ts_lexer__eof(const TSLexer *_self) { 75 | Lexer *self = (Lexer *)_self; 76 | return self->current_included_range_index == self->included_range_count; 77 | } 78 | 79 | // Clear the currently stored chunk of source code, because the lexer's 80 | // position has changed. 81 | static void ts_lexer__clear_chunk(Lexer *self) { 82 | self->chunk = NULL; 83 | self->chunk_size = 0; 84 | self->chunk_start = 0; 85 | } 86 | 87 | // Call the lexer's input callback to obtain a new chunk of source code 88 | // for the current position. 89 | static void ts_lexer__get_chunk(Lexer *self) { 90 | self->chunk_start = self->current_position.bytes; 91 | self->chunk = self->input.read( 92 | self->input.payload, 93 | self->current_position.bytes, 94 | self->current_position.extent, 95 | &self->chunk_size 96 | ); 97 | if (!self->chunk_size) { 98 | self->current_included_range_index = self->included_range_count; 99 | self->chunk = NULL; 100 | } 101 | } 102 | 103 | // Decode the next unicode character in the current chunk of source code. 104 | // This assumes that the lexer has already retrieved a chunk of source 105 | // code that spans the current position. 106 | static void ts_lexer__get_lookahead(Lexer *self) { 107 | uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; 108 | uint32_t size = self->chunk_size - position_in_chunk; 109 | 110 | if (size == 0) { 111 | self->lookahead_size = 1; 112 | self->data.lookahead = '\0'; 113 | return; 114 | } 115 | 116 | const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; 117 | DecodeFunction decode = 118 | self->input.encoding == TSInputEncodingUTF8 ? ts_decode_utf8 : 119 | self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le : 120 | self->input.encoding == TSInputEncodingUTF16BE ? ts_decode_utf16_be : self->input.decode; 121 | 122 | self->lookahead_size = decode(chunk, size, &self->data.lookahead); 123 | 124 | // If this chunk ended in the middle of a multi-byte character, 125 | // try again with a fresh chunk. 126 | if (self->data.lookahead == TS_DECODE_ERROR && size < 4) { 127 | ts_lexer__get_chunk(self); 128 | chunk = (const uint8_t *)self->chunk; 129 | size = self->chunk_size; 130 | self->lookahead_size = decode(chunk, size, &self->data.lookahead); 131 | } 132 | 133 | if (self->data.lookahead == TS_DECODE_ERROR) { 134 | self->lookahead_size = 1; 135 | } 136 | } 137 | 138 | static void ts_lexer_goto(Lexer *self, Length position) { 139 | if (position.bytes != self->current_position.bytes) { 140 | ts_lexer__invalidate_column_data(self); 141 | } 142 | 143 | self->current_position = position; 144 | 145 | // Move to the first valid position at or after the given position. 146 | bool found_included_range = false; 147 | for (unsigned i = 0; i < self->included_range_count; i++) { 148 | TSRange *included_range = &self->included_ranges[i]; 149 | if ( 150 | included_range->end_byte > self->current_position.bytes && 151 | included_range->end_byte > included_range->start_byte 152 | ) { 153 | if (included_range->start_byte >= self->current_position.bytes) { 154 | self->current_position = (Length) { 155 | .bytes = included_range->start_byte, 156 | .extent = included_range->start_point, 157 | }; 158 | } 159 | 160 | self->current_included_range_index = i; 161 | found_included_range = true; 162 | break; 163 | } 164 | } 165 | 166 | if (found_included_range) { 167 | // If the current position is outside of the current chunk of text, 168 | // then clear out the current chunk of text. 169 | if (self->chunk && ( 170 | self->current_position.bytes < self->chunk_start || 171 | self->current_position.bytes >= self->chunk_start + self->chunk_size 172 | )) { 173 | ts_lexer__clear_chunk(self); 174 | } 175 | 176 | self->lookahead_size = 0; 177 | self->data.lookahead = '\0'; 178 | } 179 | 180 | // If the given position is beyond any of included ranges, move to the EOF 181 | // state - past the end of the included ranges. 182 | else { 183 | self->current_included_range_index = self->included_range_count; 184 | TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; 185 | self->current_position = (Length) { 186 | .bytes = last_included_range->end_byte, 187 | .extent = last_included_range->end_point, 188 | }; 189 | ts_lexer__clear_chunk(self); 190 | self->lookahead_size = 1; 191 | self->data.lookahead = '\0'; 192 | } 193 | } 194 | 195 | /** 196 | * Actually advances the lexer. Does not log anything. 197 | * @param self The lexer state. 198 | * @param skip Whether to mark the consumed codepoint as whitespace. 199 | */ 200 | static void ts_lexer__do_advance(Lexer *self, bool skip) { 201 | if (self->lookahead_size) { 202 | if (self->data.lookahead == '\n') { 203 | self->current_position.extent.row++; 204 | self->current_position.extent.column = 0; 205 | ts_lexer__set_column_data(self, 0); 206 | } else { 207 | bool is_bom = self->current_position.bytes == 0 && 208 | self->data.lookahead == BYTE_ORDER_MARK; 209 | if (!is_bom) ts_lexer__increment_column_data(self); 210 | self->current_position.extent.column += self->lookahead_size; 211 | } 212 | self->current_position.bytes += self->lookahead_size; 213 | } 214 | 215 | const TSRange *current_range = &self->included_ranges[self->current_included_range_index]; 216 | while ( 217 | self->current_position.bytes >= current_range->end_byte || 218 | current_range->end_byte == current_range->start_byte 219 | ) { 220 | if (self->current_included_range_index < self->included_range_count) { 221 | self->current_included_range_index++; 222 | } 223 | if (self->current_included_range_index < self->included_range_count) { 224 | current_range++; 225 | self->current_position = (Length) { 226 | current_range->start_byte, 227 | current_range->start_point, 228 | }; 229 | } else { 230 | current_range = NULL; 231 | break; 232 | } 233 | } 234 | 235 | if (skip) self->token_start_position = self->current_position; 236 | 237 | if (current_range) { 238 | if ( 239 | self->current_position.bytes < self->chunk_start || 240 | self->current_position.bytes >= self->chunk_start + self->chunk_size 241 | ) { 242 | ts_lexer__get_chunk(self); 243 | } 244 | ts_lexer__get_lookahead(self); 245 | } else { 246 | ts_lexer__clear_chunk(self); 247 | self->data.lookahead = '\0'; 248 | self->lookahead_size = 1; 249 | } 250 | } 251 | 252 | // Advance to the next character in the source code, retrieving a new 253 | // chunk of source code if needed. 254 | static void ts_lexer__advance(TSLexer *_self, bool skip) { 255 | Lexer *self = (Lexer *)_self; 256 | if (!self->chunk) return; 257 | 258 | if (skip) { 259 | LOG("skip", self->data.lookahead) 260 | } else { 261 | LOG("consume", self->data.lookahead) 262 | } 263 | 264 | ts_lexer__do_advance(self, skip); 265 | } 266 | 267 | // Mark that a token match has completed. This can be called multiple 268 | // times if a longer match is found later. 269 | static void ts_lexer__mark_end(TSLexer *_self) { 270 | Lexer *self = (Lexer *)_self; 271 | if (!ts_lexer__eof(&self->data)) { 272 | // If the lexer is right at the beginning of included range, 273 | // then the token should be considered to end at the *end* of the 274 | // previous included range, rather than here. 275 | TSRange *current_included_range = &self->included_ranges[ 276 | self->current_included_range_index 277 | ]; 278 | if ( 279 | self->current_included_range_index > 0 && 280 | self->current_position.bytes == current_included_range->start_byte 281 | ) { 282 | TSRange *previous_included_range = current_included_range - 1; 283 | self->token_end_position = (Length) { 284 | previous_included_range->end_byte, 285 | previous_included_range->end_point, 286 | }; 287 | return; 288 | } 289 | } 290 | self->token_end_position = self->current_position; 291 | } 292 | 293 | static uint32_t ts_lexer__get_column(TSLexer *_self) { 294 | Lexer *self = (Lexer *)_self; 295 | 296 | self->did_get_column = true; 297 | 298 | if (!self->column_data.valid) { 299 | // Record current position 300 | uint32_t goal_byte = self->current_position.bytes; 301 | 302 | // Back up to the beginning of the line 303 | Length start_of_col = { 304 | self->current_position.bytes - self->current_position.extent.column, 305 | {self->current_position.extent.row, 0}, 306 | }; 307 | ts_lexer_goto(self, start_of_col); 308 | ts_lexer__set_column_data(self, 0); 309 | ts_lexer__get_chunk(self); 310 | 311 | if (!ts_lexer__eof(_self)) { 312 | ts_lexer__get_lookahead(self); 313 | 314 | // Advance to the recorded position 315 | while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) { 316 | ts_lexer__do_advance(self, false); 317 | if (ts_lexer__eof(_self)) break; 318 | } 319 | } 320 | } 321 | 322 | return self->column_data.value; 323 | } 324 | 325 | // Is the lexer at a boundary between two disjoint included ranges of 326 | // source code? This is exposed as an API because some languages' external 327 | // scanners need to perform custom actions at these boundaries. 328 | static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) { 329 | const Lexer *self = (const Lexer *)_self; 330 | if (self->current_included_range_index < self->included_range_count) { 331 | TSRange *current_range = &self->included_ranges[self->current_included_range_index]; 332 | return self->current_position.bytes == current_range->start_byte; 333 | } else { 334 | return false; 335 | } 336 | } 337 | 338 | static void ts_lexer__log(const TSLexer *_self, const char *fmt, ...) { 339 | Lexer *self = (Lexer *)_self; 340 | va_list args; 341 | va_start(args, fmt); 342 | if (self->logger.log) { 343 | vsnprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, fmt, args); 344 | self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); 345 | } 346 | va_end(args); 347 | } 348 | 349 | void ts_lexer_init(Lexer *self) { 350 | *self = (Lexer) { 351 | .data = { 352 | // The lexer's methods are stored as struct fields so that generated 353 | // parsers can call them without needing to be linked against this 354 | // library. 355 | .advance = ts_lexer__advance, 356 | .mark_end = ts_lexer__mark_end, 357 | .get_column = ts_lexer__get_column, 358 | .is_at_included_range_start = ts_lexer__is_at_included_range_start, 359 | .eof = ts_lexer__eof, 360 | .log = ts_lexer__log, 361 | .lookahead = 0, 362 | .result_symbol = 0, 363 | }, 364 | .chunk = NULL, 365 | .chunk_size = 0, 366 | .chunk_start = 0, 367 | .current_position = {0, {0, 0}}, 368 | .logger = { 369 | .payload = NULL, 370 | .log = NULL 371 | }, 372 | .included_ranges = NULL, 373 | .included_range_count = 0, 374 | .current_included_range_index = 0, 375 | .did_get_column = false, 376 | .column_data = { 377 | .valid = false, 378 | .value = 0 379 | } 380 | }; 381 | ts_lexer_set_included_ranges(self, NULL, 0); 382 | } 383 | 384 | void ts_lexer_delete(Lexer *self) { 385 | ts_free(self->included_ranges); 386 | } 387 | 388 | void ts_lexer_set_input(Lexer *self, TSInput input) { 389 | self->input = input; 390 | ts_lexer__clear_chunk(self); 391 | ts_lexer_goto(self, self->current_position); 392 | } 393 | 394 | // Move the lexer to the given position. This doesn't do any work 395 | // if the parser is already at the given position. 396 | void ts_lexer_reset(Lexer *self, Length position) { 397 | if (position.bytes != self->current_position.bytes) { 398 | ts_lexer_goto(self, position); 399 | } 400 | } 401 | 402 | void ts_lexer_start(Lexer *self) { 403 | self->token_start_position = self->current_position; 404 | self->token_end_position = LENGTH_UNDEFINED; 405 | self->data.result_symbol = 0; 406 | self->did_get_column = false; 407 | if (!ts_lexer__eof(&self->data)) { 408 | if (!self->chunk_size) ts_lexer__get_chunk(self); 409 | if (!self->lookahead_size) ts_lexer__get_lookahead(self); 410 | if (self->current_position.bytes == 0) { 411 | if (self->data.lookahead == BYTE_ORDER_MARK) { 412 | ts_lexer__advance(&self->data, true); 413 | } 414 | ts_lexer__set_column_data(self, 0); 415 | } 416 | } 417 | } 418 | 419 | void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) { 420 | if (length_is_undefined(self->token_end_position)) { 421 | ts_lexer__mark_end(&self->data); 422 | } 423 | 424 | // If the token ended at an included range boundary, then its end position 425 | // will have been reset to the end of the preceding range. Reset the start 426 | // position to match. 427 | if (self->token_end_position.bytes < self->token_start_position.bytes) { 428 | self->token_start_position = self->token_end_position; 429 | } 430 | 431 | uint32_t current_lookahead_end_byte = self->current_position.bytes + 1; 432 | 433 | // In order to determine that a byte sequence is invalid UTF8 or UTF16, 434 | // the character decoding algorithm may have looked at the following byte. 435 | // Therefore, the next byte *after* the current (invalid) character 436 | // affects the interpretation of the current character. 437 | if (self->data.lookahead == TS_DECODE_ERROR) { 438 | current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point 439 | } 440 | 441 | if (current_lookahead_end_byte > *lookahead_end_byte) { 442 | *lookahead_end_byte = current_lookahead_end_byte; 443 | } 444 | } 445 | 446 | void ts_lexer_mark_end(Lexer *self) { 447 | ts_lexer__mark_end(&self->data); 448 | } 449 | 450 | bool ts_lexer_set_included_ranges( 451 | Lexer *self, 452 | const TSRange *ranges, 453 | uint32_t count 454 | ) { 455 | if (count == 0 || !ranges) { 456 | ranges = &DEFAULT_RANGE; 457 | count = 1; 458 | } else { 459 | uint32_t previous_byte = 0; 460 | for (unsigned i = 0; i < count; i++) { 461 | const TSRange *range = &ranges[i]; 462 | if ( 463 | range->start_byte < previous_byte || 464 | range->end_byte < range->start_byte 465 | ) return false; 466 | previous_byte = range->end_byte; 467 | } 468 | } 469 | 470 | size_t size = count * sizeof(TSRange); 471 | self->included_ranges = ts_realloc(self->included_ranges, size); 472 | memcpy(self->included_ranges, ranges, size); 473 | self->included_range_count = count; 474 | ts_lexer_goto(self, self->current_position); 475 | return true; 476 | } 477 | 478 | TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) { 479 | *count = self->included_range_count; 480 | return self->included_ranges; 481 | } 482 | 483 | #undef LOG 484 | -------------------------------------------------------------------------------- /src/get_changed_ranges.c: -------------------------------------------------------------------------------- 1 | #include "./get_changed_ranges.h" 2 | #include "./subtree.h" 3 | #include "./language.h" 4 | #include "./error_costs.h" 5 | #include "./tree_cursor.h" 6 | #include "./ts_assert.h" 7 | 8 | // #define DEBUG_GET_CHANGED_RANGES 9 | 10 | static void ts_range_array_add( 11 | TSRangeArray *self, 12 | Length start, 13 | Length end 14 | ) { 15 | if (self->size > 0) { 16 | TSRange *last_range = array_back(self); 17 | if (start.bytes <= last_range->end_byte) { 18 | last_range->end_byte = end.bytes; 19 | last_range->end_point = end.extent; 20 | return; 21 | } 22 | } 23 | 24 | if (start.bytes < end.bytes) { 25 | TSRange range = { start.extent, end.extent, start.bytes, end.bytes }; 26 | array_push(self, range); 27 | } 28 | } 29 | 30 | bool ts_range_array_intersects( 31 | const TSRangeArray *self, 32 | unsigned start_index, 33 | uint32_t start_byte, 34 | uint32_t end_byte 35 | ) { 36 | for (unsigned i = start_index; i < self->size; i++) { 37 | TSRange *range = &self->contents[i]; 38 | if (range->end_byte > start_byte) { 39 | if (range->start_byte >= end_byte) break; 40 | return true; 41 | } 42 | } 43 | return false; 44 | } 45 | 46 | void ts_range_array_get_changed_ranges( 47 | const TSRange *old_ranges, unsigned old_range_count, 48 | const TSRange *new_ranges, unsigned new_range_count, 49 | TSRangeArray *differences 50 | ) { 51 | unsigned new_index = 0; 52 | unsigned old_index = 0; 53 | Length current_position = length_zero(); 54 | bool in_old_range = false; 55 | bool in_new_range = false; 56 | 57 | while (old_index < old_range_count || new_index < new_range_count) { 58 | const TSRange *old_range = &old_ranges[old_index]; 59 | const TSRange *new_range = &new_ranges[new_index]; 60 | 61 | Length next_old_position; 62 | if (in_old_range) { 63 | next_old_position = (Length) {old_range->end_byte, old_range->end_point}; 64 | } else if (old_index < old_range_count) { 65 | next_old_position = (Length) {old_range->start_byte, old_range->start_point}; 66 | } else { 67 | next_old_position = LENGTH_MAX; 68 | } 69 | 70 | Length next_new_position; 71 | if (in_new_range) { 72 | next_new_position = (Length) {new_range->end_byte, new_range->end_point}; 73 | } else if (new_index < new_range_count) { 74 | next_new_position = (Length) {new_range->start_byte, new_range->start_point}; 75 | } else { 76 | next_new_position = LENGTH_MAX; 77 | } 78 | 79 | if (next_old_position.bytes < next_new_position.bytes) { 80 | if (in_old_range != in_new_range) { 81 | ts_range_array_add(differences, current_position, next_old_position); 82 | } 83 | if (in_old_range) old_index++; 84 | current_position = next_old_position; 85 | in_old_range = !in_old_range; 86 | } else if (next_new_position.bytes < next_old_position.bytes) { 87 | if (in_old_range != in_new_range) { 88 | ts_range_array_add(differences, current_position, next_new_position); 89 | } 90 | if (in_new_range) new_index++; 91 | current_position = next_new_position; 92 | in_new_range = !in_new_range; 93 | } else { 94 | if (in_old_range != in_new_range) { 95 | ts_range_array_add(differences, current_position, next_new_position); 96 | } 97 | if (in_old_range) old_index++; 98 | if (in_new_range) new_index++; 99 | in_old_range = !in_old_range; 100 | in_new_range = !in_new_range; 101 | current_position = next_new_position; 102 | } 103 | } 104 | } 105 | 106 | typedef struct { 107 | TreeCursor cursor; 108 | const TSLanguage *language; 109 | unsigned visible_depth; 110 | bool in_padding; 111 | } Iterator; 112 | 113 | static Iterator iterator_new( 114 | TreeCursor *cursor, 115 | const Subtree *tree, 116 | const TSLanguage *language 117 | ) { 118 | array_clear(&cursor->stack); 119 | array_push(&cursor->stack, ((TreeCursorEntry) { 120 | .subtree = tree, 121 | .position = length_zero(), 122 | .child_index = 0, 123 | .structural_child_index = 0, 124 | })); 125 | return (Iterator) { 126 | .cursor = *cursor, 127 | .language = language, 128 | .visible_depth = 1, 129 | .in_padding = false, 130 | }; 131 | } 132 | 133 | static bool iterator_done(Iterator *self) { 134 | return self->cursor.stack.size == 0; 135 | } 136 | 137 | static Length iterator_start_position(Iterator *self) { 138 | TreeCursorEntry entry = *array_back(&self->cursor.stack); 139 | if (self->in_padding) { 140 | return entry.position; 141 | } else { 142 | return length_add(entry.position, ts_subtree_padding(*entry.subtree)); 143 | } 144 | } 145 | 146 | static Length iterator_end_position(Iterator *self) { 147 | TreeCursorEntry entry = *array_back(&self->cursor.stack); 148 | Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree)); 149 | if (self->in_padding) { 150 | return result; 151 | } else { 152 | return length_add(result, ts_subtree_size(*entry.subtree)); 153 | } 154 | } 155 | 156 | static bool iterator_tree_is_visible(const Iterator *self) { 157 | TreeCursorEntry entry = *array_back(&self->cursor.stack); 158 | if (ts_subtree_visible(*entry.subtree)) return true; 159 | if (self->cursor.stack.size > 1) { 160 | Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; 161 | return ts_language_alias_at( 162 | self->language, 163 | parent.ptr->production_id, 164 | entry.structural_child_index 165 | ) != 0; 166 | } 167 | return false; 168 | } 169 | 170 | static void iterator_get_visible_state( 171 | const Iterator *self, 172 | Subtree *tree, 173 | TSSymbol *alias_symbol, 174 | uint32_t *start_byte 175 | ) { 176 | uint32_t i = self->cursor.stack.size - 1; 177 | 178 | if (self->in_padding) { 179 | if (i == 0) return; 180 | i--; 181 | } 182 | 183 | for (; i + 1 > 0; i--) { 184 | TreeCursorEntry entry = self->cursor.stack.contents[i]; 185 | 186 | if (i > 0) { 187 | const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; 188 | *alias_symbol = ts_language_alias_at( 189 | self->language, 190 | parent->ptr->production_id, 191 | entry.structural_child_index 192 | ); 193 | } 194 | 195 | if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { 196 | *tree = *entry.subtree; 197 | *start_byte = entry.position.bytes; 198 | break; 199 | } 200 | } 201 | } 202 | 203 | static void iterator_ascend(Iterator *self) { 204 | if (iterator_done(self)) return; 205 | if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--; 206 | if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false; 207 | self->cursor.stack.size--; 208 | } 209 | 210 | static bool iterator_descend(Iterator *self, uint32_t goal_position) { 211 | if (self->in_padding) return false; 212 | 213 | bool did_descend = false; 214 | do { 215 | did_descend = false; 216 | TreeCursorEntry entry = *array_back(&self->cursor.stack); 217 | Length position = entry.position; 218 | uint32_t structural_child_index = 0; 219 | for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { 220 | const Subtree *child = &ts_subtree_children(*entry.subtree)[i]; 221 | Length child_left = length_add(position, ts_subtree_padding(*child)); 222 | Length child_right = length_add(child_left, ts_subtree_size(*child)); 223 | 224 | if (child_right.bytes > goal_position) { 225 | array_push(&self->cursor.stack, ((TreeCursorEntry) { 226 | .subtree = child, 227 | .position = position, 228 | .child_index = i, 229 | .structural_child_index = structural_child_index, 230 | })); 231 | 232 | if (iterator_tree_is_visible(self)) { 233 | if (child_left.bytes > goal_position) { 234 | self->in_padding = true; 235 | } else { 236 | self->visible_depth++; 237 | } 238 | return true; 239 | } 240 | 241 | did_descend = true; 242 | break; 243 | } 244 | 245 | position = child_right; 246 | if (!ts_subtree_extra(*child)) structural_child_index++; 247 | } 248 | } while (did_descend); 249 | 250 | return false; 251 | } 252 | 253 | static void iterator_advance(Iterator *self) { 254 | if (self->in_padding) { 255 | self->in_padding = false; 256 | if (iterator_tree_is_visible(self)) { 257 | self->visible_depth++; 258 | } else { 259 | iterator_descend(self, 0); 260 | } 261 | return; 262 | } 263 | 264 | for (;;) { 265 | if (iterator_tree_is_visible(self)) self->visible_depth--; 266 | TreeCursorEntry entry = array_pop(&self->cursor.stack); 267 | if (iterator_done(self)) return; 268 | 269 | const Subtree *parent = array_back(&self->cursor.stack)->subtree; 270 | uint32_t child_index = entry.child_index + 1; 271 | if (ts_subtree_child_count(*parent) > child_index) { 272 | Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); 273 | uint32_t structural_child_index = entry.structural_child_index; 274 | if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; 275 | const Subtree *next_child = &ts_subtree_children(*parent)[child_index]; 276 | 277 | array_push(&self->cursor.stack, ((TreeCursorEntry) { 278 | .subtree = next_child, 279 | .position = position, 280 | .child_index = child_index, 281 | .structural_child_index = structural_child_index, 282 | })); 283 | 284 | if (iterator_tree_is_visible(self)) { 285 | if (ts_subtree_padding(*next_child).bytes > 0) { 286 | self->in_padding = true; 287 | } else { 288 | self->visible_depth++; 289 | } 290 | } else { 291 | iterator_descend(self, 0); 292 | } 293 | break; 294 | } 295 | } 296 | } 297 | 298 | typedef enum { 299 | IteratorDiffers, 300 | IteratorMayDiffer, 301 | IteratorMatches, 302 | } IteratorComparison; 303 | 304 | static IteratorComparison iterator_compare( 305 | const Iterator *old_iter, 306 | const Iterator *new_iter 307 | ) { 308 | Subtree old_tree = NULL_SUBTREE; 309 | Subtree new_tree = NULL_SUBTREE; 310 | uint32_t old_start = 0; 311 | uint32_t new_start = 0; 312 | TSSymbol old_alias_symbol = 0; 313 | TSSymbol new_alias_symbol = 0; 314 | iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start); 315 | iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start); 316 | 317 | if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches; 318 | if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers; 319 | 320 | if ( 321 | old_alias_symbol == new_alias_symbol && 322 | ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree) 323 | ) { 324 | if (old_start == new_start && 325 | !ts_subtree_has_changes(old_tree) && 326 | ts_subtree_symbol(old_tree) != ts_builtin_sym_error && 327 | ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes && 328 | ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE && 329 | ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE && 330 | (ts_subtree_parse_state(old_tree) == ERROR_STATE) == 331 | (ts_subtree_parse_state(new_tree) == ERROR_STATE)) { 332 | return IteratorMatches; 333 | } else { 334 | return IteratorMayDiffer; 335 | } 336 | } 337 | 338 | return IteratorDiffers; 339 | } 340 | 341 | #ifdef DEBUG_GET_CHANGED_RANGES 342 | static inline void iterator_print_state(Iterator *self) { 343 | TreeCursorEntry entry = *array_back(&self->cursor.stack); 344 | TSPoint start = iterator_start_position(self).extent; 345 | TSPoint end = iterator_end_position(self).extent; 346 | const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree)); 347 | printf( 348 | "(%-25s %s\t depth:%u [%u, %u] - [%u, %u])", 349 | name, self->in_padding ? "(p)" : " ", 350 | self->visible_depth, 351 | start.row + 1, start.column, 352 | end.row + 1, end.column 353 | ); 354 | } 355 | #endif 356 | 357 | unsigned ts_subtree_get_changed_ranges( 358 | const Subtree *old_tree, const Subtree *new_tree, 359 | TreeCursor *cursor1, TreeCursor *cursor2, 360 | const TSLanguage *language, 361 | const TSRangeArray *included_range_differences, 362 | TSRange **ranges 363 | ) { 364 | TSRangeArray results = array_new(); 365 | 366 | Iterator old_iter = iterator_new(cursor1, old_tree, language); 367 | Iterator new_iter = iterator_new(cursor2, new_tree, language); 368 | 369 | unsigned included_range_difference_index = 0; 370 | 371 | Length position = iterator_start_position(&old_iter); 372 | Length next_position = iterator_start_position(&new_iter); 373 | if (position.bytes < next_position.bytes) { 374 | ts_range_array_add(&results, position, next_position); 375 | position = next_position; 376 | } else if (position.bytes > next_position.bytes) { 377 | ts_range_array_add(&results, next_position, position); 378 | next_position = position; 379 | } 380 | 381 | do { 382 | #ifdef DEBUG_GET_CHANGED_RANGES 383 | printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column); 384 | iterator_print_state(&old_iter); 385 | printf("\tvs\t"); 386 | iterator_print_state(&new_iter); 387 | puts(""); 388 | #endif 389 | 390 | // Compare the old and new subtrees. 391 | IteratorComparison comparison = iterator_compare(&old_iter, &new_iter); 392 | 393 | // Even if the two subtrees appear to be identical, they could differ 394 | // internally if they contain a range of text that was previously 395 | // excluded from the parse, and is now included, or vice-versa. 396 | if (comparison == IteratorMatches && ts_range_array_intersects( 397 | included_range_differences, 398 | included_range_difference_index, 399 | position.bytes, 400 | iterator_end_position(&old_iter).bytes 401 | )) { 402 | comparison = IteratorMayDiffer; 403 | } 404 | 405 | bool is_changed = false; 406 | switch (comparison) { 407 | // If the subtrees are definitely identical, move to the end 408 | // of both subtrees. 409 | case IteratorMatches: 410 | next_position = iterator_end_position(&old_iter); 411 | break; 412 | 413 | // If the subtrees might differ internally, descend into both 414 | // subtrees, finding the first child that spans the current position. 415 | case IteratorMayDiffer: 416 | if (iterator_descend(&old_iter, position.bytes)) { 417 | if (!iterator_descend(&new_iter, position.bytes)) { 418 | is_changed = true; 419 | next_position = iterator_end_position(&old_iter); 420 | } 421 | } else if (iterator_descend(&new_iter, position.bytes)) { 422 | is_changed = true; 423 | next_position = iterator_end_position(&new_iter); 424 | } else { 425 | next_position = length_min( 426 | iterator_end_position(&old_iter), 427 | iterator_end_position(&new_iter) 428 | ); 429 | } 430 | break; 431 | 432 | // If the subtrees are different, record a change and then move 433 | // to the end of both subtrees. 434 | case IteratorDiffers: 435 | is_changed = true; 436 | next_position = length_min( 437 | iterator_end_position(&old_iter), 438 | iterator_end_position(&new_iter) 439 | ); 440 | break; 441 | } 442 | 443 | // Ensure that both iterators are caught up to the current position. 444 | while ( 445 | !iterator_done(&old_iter) && 446 | iterator_end_position(&old_iter).bytes <= next_position.bytes 447 | ) iterator_advance(&old_iter); 448 | while ( 449 | !iterator_done(&new_iter) && 450 | iterator_end_position(&new_iter).bytes <= next_position.bytes 451 | ) iterator_advance(&new_iter); 452 | 453 | // Ensure that both iterators are at the same depth in the tree. 454 | while (old_iter.visible_depth > new_iter.visible_depth) { 455 | iterator_ascend(&old_iter); 456 | } 457 | while (new_iter.visible_depth > old_iter.visible_depth) { 458 | iterator_ascend(&new_iter); 459 | } 460 | 461 | if (is_changed) { 462 | #ifdef DEBUG_GET_CHANGED_RANGES 463 | printf( 464 | " change: [[%u, %u] - [%u, %u]]\n", 465 | position.extent.row + 1, position.extent.column, 466 | next_position.extent.row + 1, next_position.extent.column 467 | ); 468 | #endif 469 | 470 | ts_range_array_add(&results, position, next_position); 471 | } 472 | 473 | position = next_position; 474 | 475 | // Keep track of the current position in the included range differences 476 | // array in order to avoid scanning the entire array on each iteration. 477 | while (included_range_difference_index < included_range_differences->size) { 478 | const TSRange *range = &included_range_differences->contents[ 479 | included_range_difference_index 480 | ]; 481 | if (range->end_byte <= position.bytes) { 482 | included_range_difference_index++; 483 | } else { 484 | break; 485 | } 486 | } 487 | } while (!iterator_done(&old_iter) && !iterator_done(&new_iter)); 488 | 489 | Length old_size = ts_subtree_total_size(*old_tree); 490 | Length new_size = ts_subtree_total_size(*new_tree); 491 | if (old_size.bytes < new_size.bytes) { 492 | ts_range_array_add(&results, old_size, new_size); 493 | } else if (new_size.bytes < old_size.bytes) { 494 | ts_range_array_add(&results, new_size, old_size); 495 | } 496 | 497 | *cursor1 = old_iter.cursor; 498 | *cursor2 = new_iter.cursor; 499 | *ranges = results.contents; 500 | return results.size; 501 | } 502 | --------------------------------------------------------------------------------